配置及注意事项
- 配置: 每个节点2*Intel(R) Xeon(R) Platinum 9242 CPU @ 2.30GHz,共96C/节点,总共192C
- 仅有一个节点有能ipv4访问互联网的ip,两个节点均支持ipv6,因此配置安装过程尽量使用ipv6.
- 使用有ipv4的节点做管理节点和计算节点,下面称为master节点,
另一个节点称为client01节点 - 环境变量仅添加到master的
/etc/profile
是不够的,还要添加到各个计算节点 - 仅在master节点安装编译需要的库(如libibmad.so),需要在计算节点也安装,才能正常提交作业
重装系统Centos7
安装界面勾选IB驱动和相关的库
配置源
#ipv6源
sudo su
echo '2402:f000:1:408:8100::1 mirrors6.tuna.tsinghua.edu.cn mirrors.tuna.tsinghua.edu.cn' >> /etc/hosts
cp /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.bak
cat << EOF > /etc/yum.repos.d/CentOS-Base.repo
# CentOS-Base.repo
#
# The mirror system uses the connecting IP address of the client and the
# update status of each mirror to pick mirrors that are updated to and
# geographically close to the client. You should use this for CentOS updates
# unless you are manually picking other mirrors.
#
# If the mirrorlist= does not work for you, as a fall back you can try the
# remarked out baseurl= line instead.
#
#
[base]
name=CentOS-\$releasever - Base
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/\$releasever/os/\$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=\$releasever&arch=\$basearch&repo=os
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-7
#released updates
[updates]
name=CentOS-\$releasever - Updates
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/\$releasever/updates/\$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=\$releasever&arch=\$basearch&repo=updates
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-7
#additional packages that may be useful
[extras]
name=CentOS-\$releasever - Extras
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/\$releasever/extras/\$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=\$releasever&arch=\$basearch&repo=extras
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-7
#additional packages that extend functionality of existing packages
[centosplus]
name=CentOS-\$releasever - Plus
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/\$releasever/centosplus/\$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=\$releasever&arch=\$basearch&repo=centosplus
gpgcheck=1
enabled=0
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-7
EOF
yum makecache
#清华源的GPG名称和centos的不同啊,这里这样解决
cp /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 /etc/pki/rpm-gpg/RPM-GPG-KEY-7
#epel 源
cd /etc/pki/rpm-gpg
wget https://mirrors.tuna.tsinghua.edu.cn/epel/RPM-GPG-KEY-EPEL-7
yum -y install epel-release
sed -e 's!^metalink=!#metalink=!g' \
-e 's!^#baseurl=!baseurl=!g' \
-e 's!//download\.fedoraproject\.org/pub!//mirrors.tuna.tsinghua.edu.cn!g' \
-e 's!http://mirrors\.tuna!https://mirrors.tuna!g' \
-i /etc/yum.repos.d/epel.repo /etc/yum.repos.d/epel-testing.repo
网络
IB驱动,后续关于IB的学习会更新在Centos 配置IB网络
## 如果ib网卡不识别,或着识别后配置后无法启用,安装ib驱动
yum install -y infiniband-diags
yum install -y opensm
systemctl start opensm
systemctl enable opensm
#后期编译mvapich时需要ib库
yum install -y libibverbs
yum install -y libibverbs-devel
yum install -y libibmad-devel
master
#node7
#主机名
hostnamectl --static set-hostname master
sed -i 's/BOOTPROTO=dhcp/BOOTPROTO=static/g' /etc/sysconfig/network-scripts/ifcfg-ib0
sed -i 's/ONBOOT=no/ONBOOT=yes/g' /etc/sysconfig/network-scripts/ifcfg-ib0
echo IPADDR=172.16.100.7 >> /etc/sysconfig/network-scripts/ifcfg-ib0
echo PREFIX=24 >> /etc/sysconfig/network-scripts/ifcfg-ib0
client01
hostnamectl --static set-hostname client01
sed -i 's/BOOTPROTO=dhcp/BOOTPROTO=static/g' /etc/sysconfig/network-scripts/ifcfg-ib0
sed -i 's/ONBOOT=no/ONBOOT=yes/g' /etc/sysconfig/network-scripts/ifcfg-ib0
echo IPADDR=172.16.100.8 >> /etc/sysconfig/network-scripts/ifcfg-ib0
echo PREFIX=24 >> /etc/sysconfig/network-scripts/ifcfg-ib0
hosts
echo 172.16.100.7 master >> /etc/hosts
echo 172.16.100.8 client01 >> /etc/hosts
开机自启动脚本
vi /etc/systemd/system/rc-local.service
填入
[Unit]
Description=/etc/rc.local Compatibility
ConditionPathExists=/etc/rc.local
[Service]
Type=forking
ExecStart=/etc/rc.local start
TimeoutSec=0
RemainAfterExit=yes
[Install]
WantedBy=multi-user.target
Alias=rc-local.service
添加执行脚本到/etc/rc.local
systemctl daemon-reload
systemctl enable rc-local
chmod +x /etc/rc.local
NIS,NFS,pbs
按照从0搭建Centos7 计算集群配置防火墙,NIS,NFS,pbs系统,使用master作为这些系统的服务器
注意:关闭SELinux 程序编译安装在/opt
目录进行共享
安装编译环境
把下面的命令填入gnu-mvapich.sh
执行gnu-mvapich.sh
$ROOT
是安装目录,因为/opt
目录是共享的,所以安装在此目录
#ROOT=$HOME/opt/gnu-mvapich
ROOT=/opt/gnu-mvapich
mkdir -p $ROOT
mkdir $ROOT/source
#创建编译脚本
cd $ROOT/source
cat << EOF > ./make.sh
if [ -e ./netlib.py ]
then
./setup.py --prefix=$ROOT/math --downall
else
make -j10
make
make install
fi
EOF
#添加环境变量到/etc/profile或者个人bashrx或者作业脚本
MATHDIR=$ROOT/math/lib
export LD_LIBRARY_PATH=$ROOT/mvapich2-2.3.1/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$ROOT/libxc-4.3.4/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$ROOT/fftw-3.3.3/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$ROOT/math/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=$ROOT/mvapich2-2.3.1/lib:$LIBRARY_PATH
export LIBRARY_PATH=$ROOT/libxc-4.3.4/lib:$LIBRARY_PATH
export LIBRARY_PATH=$ROOT/fftw-3.3.3/lib:$LIBRARY_PATH
export LIBRARY_PATH=$ROOT/math/lib:$LIBRARY_PATH
export C_INCLUDE_PATH=$ROOT/mvapich2-2.3.1/include:$C_INCLUDE_PATH
export C_INCLUDE_PATH=$ROOT/libxc-4.3.4/include:$C_INCLUDE_PATH
export C_INCLUDE_PATH=$ROOT/fftw-3.3.3/include:$C_INCLUDE_PATH
export C_INCLUDE_PATH=$ROOT/math/include:$C_INCLUDE_PATH
export PATH=$ROOT/mvapich2-2.3.1/bin:$PATH
export PATH=$ROOT/fftw-3.3.3/bin:$PATH
#mvapich
cd $ROOT/source
wget https://cndaqiang.gitee.io/packages/mirrors/mvapich2/mvapich2-2.3.1.tar.gz
tar xzvf mvapich2-2.3.1.tar.gz
cd mvapich2-2.3.1/
./configure --prefix=$ROOT/mvapich2-2.3.1 CC=gcc FC=gfortran CXX=g++
make -j40
#安装
make install
#libxc-4.3.4 for octopus-10.1
cd $ROOT/source
wget https://cndaqiang.gitee.io/packages//mirrors/libxc/libxc-4.3.4.tar.gz
tar xzvf libxc-4.3.4.tar.gz
cd libxc-4.3.4
#--enable-shared 动态库
./configure --prefix=$ROOT/libxc-4.3.4 CC=gcc CXX=g++ FC=gfortran --enable-shared
bash ../make.sh
#fft
cd $ROOT/source
wget https://cndaqiang.gitee.io/packages//mirrors/fftw/fftw-3.3.3.tar.gz
tar xzvf fftw-3.3.3.tar.gz
cd fftw-3.3.3/
./configure --prefix=$ROOT/fftw-3.3.3 --enable-mpi --enable-shared
bash ../make.sh
#从0编译动态库scalapack
if [ ! -d $ROOT/math ]
then
mkdir $ROOT/math
mkdir $ROOT/math/lib
mkdir $ROOT/math/include
fi
cd $ROOT/source
wget https://cndaqiang.gitee.io/packages//mirrors/math/lapack-3.8.0.tar.gz
rm -rf lapack-3.8.0
tar xzvf lapack-3.8.0.tar.gz
cd lapack-3.8.0/
#修改编译参数,并默认输出动态库
#BLAS&LAPACK
echo -e "
all:libblas.so
libblas.so: \$(ALLOBJ)
\t \$(FORTRAN) -shared -Wl,-soname,\$@ -o \$@ \$(ALLOBJ)
" >> BLAS/SRC/Makefile
#TMG
echo -e "
all:libtmg.so
libtmg.so: \$(ALLOBJ)
\t \$(FORTRAN) -shared -Wl,-soname,\$@ -o \$@ \$(ALLOBJ)
" >> TESTING/MATGEN/Makefile
#LAPACK
echo -e "
all: liblapack.so
liblapack.so: \$(ALLOBJ)
\t \$(FORTRAN) -shared -Wl,-soname,\$@ -o \$@ \$(ALLOBJ)
" >> SRC/Makefile
cp make.inc.example make.inc
echo "OPTS += -fPIC" >> make.inc
echo "NOOPT += -fPIC" >> make.inc
echo -e "
MATHDIR=$ROOT/math/lib
install:all
\t cp $PWD/BLAS/SRC/libblas.so \$(MATHDIR)
\t cp $PWD/SRC/liblapack.so \$(MATHDIR)
\t cp $PWD/TESTING/MATGEN/libtmg.so \$(MATHDIR)
\t cp $PWD/*.a \$(MATHDIR)
" >> make.inc
#安装
make -j36 #必须用make,用其他参数/完成后再输make会编译测试程序,没必要
#不可以注释Makefile中
# all: lapack_install lib blas_testing lapack_testing
#为
# all: lapack_install lib
# #blas_testing lapack_testing
#可以跳过测试的过程,编译更快
# 因为blas_testing依赖blas的编译
#如果非要注释,要添加依赖blas
#Scalapack
cd $ROOT/source
wget https://cndaqiang.gitee.io/packages//mirrors/math/scalapack-2.0.2.tgz
rm -rf scalapack-2.0.2
tar xzvf scalapack-2.0.2.tgz
cd scalapack-2.0.2
#echo -e "
#ALLOBJ += \$(SLASRC) \$(DLASRC) \$(CLASRC) \$(ZLASRC) \
#\t \$(SCLAUX) \$(DZLAUX) \$(ALLAUX)
#all:libscalapack.so
#libscalapack.so: \$(ALLOBJ)
#\t \$(FC) -shared -Wl,-soname,\$@ -o \$@ \$(ALLOBJ)
#" >> SRC/Makefile
#使用fPIC参数,这样静态库可以转为动态库
cp SLmake.inc.example SLmake.inc
echo "FC += -fPIC" >> SLmake.inc
echo "CC += -fPIC" >> SLmake.inc
echo -e "
MATHDIR=$ROOT/math/lib
install: all
\t \$(FC) -shared -o $PWD/libscalapack.so -Wl,--whole-archive $PWD/libscalapack.a -Wl,--no-whole-archive
\t cp $PWD/libscalapack.so \$(MATHDIR)
" >> SLmake.inc
#不支持-j20,就得逐个编译,把每个库依次打包添加到libscalapack.a,没编译成功也有libscalapack.a
make install
安装计算程序
过程略,提交作业 qsub run-qe-iop.sh
#!/bin/bash
#PBS -N QE
#PBS -l nodes=2:ppn=96
#PBS -q regular
#PBS -j oe
#PBS -l walltime=08:00:00
ROOT=/opt/gnu-mvapich
MATHDIR=$ROOT/math/lib
export LD_LIBRARY_PATH=$ROOT/mvapich2-2.3.1/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$ROOT/libxc-4.3.4/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$ROOT/fftw-3.3.3/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$ROOT/math/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=$ROOT/mvapich2-2.3.1/lib:$LIBRARY_PATH
export LIBRARY_PATH=$ROOT/libxc-4.3.4/lib:$LIBRARY_PATH
export LIBRARY_PATH=$ROOT/fftw-3.3.3/lib:$LIBRARY_PATH
export LIBRARY_PATH=$ROOT/math/lib:$LIBRARY_PATH
export C_INCLUDE_PATH=$ROOT/mvapich2-2.3.1/include:$C_INCLUDE_PATH
export C_INCLUDE_PATH=$ROOT/libxc-4.3.4/include:$C_INCLUDE_PATH
export C_INCLUDE_PATH=$ROOT/fftw-3.3.3/include:$C_INCLUDE_PATH
export C_INCLUDE_PATH=$ROOT/math/include:$C_INCLUDE_PATH
export PATH=$ROOT/mvapich2-2.3.1/bin:$PATH
export PATH=$ROOT/fftw-3.3.3/bin:$PATH
EXEC=~/code/tdpw2.0/q-e-qe-6.4.1-devel-1.0/bin/tdpw.x
#EXEC=pw.x
cd $PBS_O_WORKDIR
NP=`cat $PBS_NODEFILE | wc -l`
echo Job starts at `date`
#mpirun -np $NP -machinefile $PBS_NODEFILE
mpirun $EXE < input.in | tee result
echo Job ends at `date`
192核一起运行,隔音墙都挡不住的噪声
声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。
评论(0)