searchusermenu
  • 发布文章
  • 消息中心
点赞
收藏
评论
分享
原创

基于预编译的二进制包安装slurm

2023-10-13 07:12:11
44
0

环境基于 CentOS 7.6, slurm 基于 v20.11.9 版本, 前置条件是镜像已经完整集成 slurm运行时依赖

munge

准备munge用户

export MUNGE_USER_ID=2000

groupadd -g $MUNGE_USER_ID munge

useradd  -m -c "MUNGE User" -d /var/lib/munge -u $MUNGE_USER_ID -g munge  -s /sbin/nologin munge

安装 munge

yum -y install munge munge-devel

br-A. 生成munge.key

/usr/sbin/create-munge-key

生成/etc/munge/munge.key文件,并将此文件复制到其他节点

scp /etc/munge/munge.key compute-XXX:/etc/munge/munge.key

br-B. 接收 /etc/munge/munge.key 文件并设置文件归属

chown munge:munge /etc/munge/munge.key

开机自启/启动 munge 服务

systemctl enable munge

systemctl start munge

systemctl status munge

slurm

准备依赖

  • hwloc-devel cgroup Task Constraining
  • hdf5-devel HDF5 Job Profiling
  • man2html HTML Man Pages
  • libibumad and libibmad-devel InfiniBand Accounting
  • lua-devel Lua Support
  • mariadb-devel MySQL support for accounting
  • pam-devel PAM Support
  • numactl-devel NUMA Affinity
  • readline-devel Readline Support
  • rrdtool-devel RRD External Sensor Data Collection
  • gtk2 and gtk2-devel sview
  • http-parser-devel slurmdrestd
  • json-c-devel slurmdrestd
  • libyaml and libyaml-devel slurmdrestd
  • libcurl and libcurl-devel slurmdrestd
  • libjwt and libjwt-devel slurmdrestd

运行及编译依赖:

yum -y install gcc

yum -y install tcl tk dwz libtirpc zip

rpm -ivh --force \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-tkinter-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-libs-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-pip-9.0.3-5.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-setuptools-39.2.0-10.el7.noarch.rpm  \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-debug-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-devel-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-idle-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-test-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/redhat-rpm-config-9.1.0-88.el7.centos.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-rpm-macros-3-32.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-rpm-generators-6-2.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python-rpm-macros-3-32.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/perl-srpm-macros-1-8.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python-srpm-macros-3-32.el7.noarch.rpm

yum -y install openssl openssl-devel \

hwloc hwloc-devel \

hdf5 hdf5-devel \

man2html \

libibumad \

libibmad libibmad-devel \

lua lua-devel \

mariadb mariadb-devel \

pam pam-devel \

numactl numactl-devel \

readline readline-devel \

rrdtool rrdtool-devel \

ncurses ncurses-devel \

gtk2 gtk2-devel \

http-parser http-parser-devel \

json-c json-c-devel \

libyaml libyaml-devel \

libcurl libcurl-devel \

libjwt libjwt-devel

准备slurm用户

export SLURM_USER_ID=2001

groupadd -g $SLURM_USER_ID slurm

useradd  -m -c "SLURM workload manager" -d /var/lib/slurm -u $SLURM_USER_ID -g slurm  -s /bin/bash slurm

安装 slurm

安装 slurm-v20.11.9-centos-7.6-amd64.tar.gz

tar xf slurm-v20.11.9-centos-7.6-amd64.tar.gz -C /opt

export PREFIX=/opt/slurm/v20.11.9

拷贝 service 文件

find $PREFIX/etc/systemd/system | grep service$ | xargs -i cp -v {} /etc/systemd/system/

systemctl daemon-reload

生成配置文件

$PREFIX/etc/slurmdbd.conf

cp  $PREFIX/etc/slurmdbd.conf.example $PREFIX/etc/slurmdbd.conf

chmod 600 $PREFIX/etc/slurmdbd.conf

chown -R slurm:slurm $PREFIX/etc/slurmdbd.conf

安装数据库

yum -y install mariadb-server

systemctl start mariadb.service

systemctl enable mariadb.service

# 设置 root 密码

# 移除 anonymous 用户

# 禁止 root 远程登陆

# 移除 test 数据库

/usr/bin/mysql_secure_installation

创建 MySQL 管理账户

mysql -u root -p

create user 'slurm'@'%' identified by '123456';

grant all privileges on *.* to 'slurm'@'%' identified by '123456';

flush privileges;

配置 $PREFIX/etc/slurmdbd.conf

#

# Example slurmdbd.conf file.

#

# See the slurmdbd.conf man page for more information.

#

# Archive info

#ArchiveJobs=yes

#ArchiveDir="/tmp"

#ArchiveSteps=yes

#ArchiveScript=

#JobPurge=12

#StepPurge=1

#

# Authentication info

AuthType=auth/munge

#AuthInfo=/var/run/munge/munge.socket.2

#

# slurmDBD info

DbdAddr=localhost

DbdHost=localhost

#DbdPort=7031

SlurmUser=slurm

#MessageTimeout=300

DebugLevel=verbose

#DefaultQOS=normal,standby

LogFile=$PREFIX/var/log/slurmdbd.log

PidFile=$PREFIX/run/slurmdbd.pid

#PluginDir=/usr/lib/slurm

#PrivateData=accounts,users,usage,jobs

#TrackWCKey=yes

#

# Database info

StorageType=accounting_storage/mysql

#StorageHost=localhost

#StoragePort=1234

StoragePass=123456

StorageUser=slurm

#StorageLoc=slurm_acct_db

启动 slurmdbd

systemctl start slurmdbd

systemctl enable slurmdbd

systemctl status slurmdbd

准备 $PREFIX/etc/slurm.conf

cp  $PREFIX/etc/slurm.conf.example $PREFIX/etc/slurm.conf

$PREFIX/etc/slurm.conf 配置如下

#

# Example slurm.conf file. Please run configurator.html

# (in doc/html) to build a configuration file customized

# for your environment.

#

#

# slurm.conf file generated by configurator.html.

#

# See the slurm.conf man page for more information.

#

ClusterName=linux

ControlMachine=linux0

#ControlAddr=

#BackupController=

#BackupAddr=

#

SlurmUser=slurm

#SlurmdUser=root

SlurmctldPort=6817

SlurmdPort=6818

AuthType=auth/munge

#JobCredentialPrivateKey=

#JobCredentialPublicCertificate=

StateSaveLocation=$PREFIX/var/spool

SlurmdSpoolDir=$PREFIX/var/spool

SwitchType=switch/none

MpiDefault=none

SlurmctldPidFile=$PREFIX/run/slurmctld.pid

SlurmdPidFile=$PREFIX/run/slurmd.pid

ProctrackType=proctrack/pgid

#PluginDir=

#FirstJobId=

ReturnToService=0

#MaxJobCount=

#PlugStackConfig=

#PropagatePrioProcess=

#PropagateResourceLimits=

#PropagateResourceLimitsExcept=

#Prolog=

#Epilog=

#SrunProlog=

#SrunEpilog=

#TaskProlog=

#TaskEpilog=

#TaskPlugin=

#TrackWCKey=no

#TreeWidth=50

#TmpFS=

#UsePAM=

#

# TIMERS

SlurmctldTimeout=300

SlurmdTimeout=300

InactiveLimit=0

MinJobAge=300

KillWait=30

Waittime=0

#

# SCHEDULING

SchedulerType=sched/backfill

#SchedulerAuth=

SelectType=select/cons_tres

SelectTypeParameters=CR_Core

#PriorityType=priority/multifactor

#PriorityDecayHalfLife=14-0

#PriorityUsageResetPeriod=14-0

#PriorityWeightFairshare=100000

#PriorityWeightAge=1000

#PriorityWeightPartition=10000

#PriorityWeightJobSize=1000

#PriorityMaxAge=1-0

#

# LOGGING

SlurmctldDebug=info

SlurmctldLogFile=$PREFIX/var/log/slurmctld.log

SlurmdDebug=info

SlurmdLogFile=$PREFIX/var/log/slurmd.log

JobCompType=jobcomp/none

#JobCompLoc=

#

# ACCOUNTING

#JobAcctGatherType=jobacct_gather/linux

#JobAcctGatherFrequency=30

#

AccountingStorageType=accounting_storage/slurmdbd

#AccountingStorageHost=

#AccountingStorageLoc=

#AccountingStoragePass=

#AccountingStorageUser=

 

AuthAltTypes=auth/jwt

AuthAltParameters=jwt_key=$PREFIX/var/spool/jwt.key

 

# COMPUTE NODES

PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP

NodeName=linux0 CPUs=4 Boards=1 SocketsPerBoard=1 CoresPerSocket=4 ThreadsPerCore=1 RealMemory=1998

注意先去掉自带的NodeName=xxxx, 追加 $PREFIX/sbin/slurmd -C >> $PREFIX/etc/slurm.conf, 去掉UpTime, 注意添加jwt认证配置

sed -i '/^NodeName=/d' $PREFIX/etc/slurm.conf

$PREFIX/sbin/slurmd -C >> $PREFIX/etc/slurm.conf

sed -i '/^UpTime=/d' $PREFIX/etc/slurm.conf

 

 

dd if=/dev/random of=$PREFIX/var/spool/jwt.key bs=32 count=1

chown slurm:slurm $PREFIX/var/spool/jwt.key

chmod 0600 $PREFIX/var/spool/jwt.key

$PREFIX/etc/cgroup.conf

cp $PREFIX/etc/cgroup.conf.example $PREFIX/etc/cgroup.conf

$PREFIX/etc/cgroup.conf 文件内容如下

###

#

# Slurm cgroup support configuration file

#

# See man slurm.conf and man cgroup.conf for further

# information on cgroup configuration parameters

#--

CgroupAutomount=yes

 

ConstrainCores=no

ConstrainRAMSpace=no

启动 slurmctld/slurmd

systemctl enable slurmctld

systemctl start slurmctld

systemctl status slurmctld

 

systemctl enable slurmd

systemctl start slurmd

systemctl status slurmd

启动 slurmrestd

systemctl daemon-reload

systemctl start slurmrestd

systemctl enable slurmrestd

systemctl status slurmrestd

验证srun

$PREFIX/bin/srun -n4 hostname

验证sinfo

$PREFIX/bin/sinfo

验证 slurmrestd

export `$PREFIX/bin/scontrol token username=slurm`

curl -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:${SLURM_JWT}" localhost:6820/openapi

curl -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:${SLURM_JWT}" localhost:6820/slurm/v0.0.35/diag

curl -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:${SLURM_JWT}" localhost:6820/slurm/v0.0.35/nodes

curl -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:${SLURM_JWT}" localhost:6820/slurm/v0.0.35/jobs

 

0条评论
作者已关闭评论
小梅
19文章数
0粉丝数
小梅
19 文章 | 0 粉丝
原创

基于预编译的二进制包安装slurm

2023-10-13 07:12:11
44
0

环境基于 CentOS 7.6, slurm 基于 v20.11.9 版本, 前置条件是镜像已经完整集成 slurm运行时依赖

munge

准备munge用户

export MUNGE_USER_ID=2000

groupadd -g $MUNGE_USER_ID munge

useradd  -m -c "MUNGE User" -d /var/lib/munge -u $MUNGE_USER_ID -g munge  -s /sbin/nologin munge

安装 munge

yum -y install munge munge-devel

br-A. 生成munge.key

/usr/sbin/create-munge-key

生成/etc/munge/munge.key文件,并将此文件复制到其他节点

scp /etc/munge/munge.key compute-XXX:/etc/munge/munge.key

br-B. 接收 /etc/munge/munge.key 文件并设置文件归属

chown munge:munge /etc/munge/munge.key

开机自启/启动 munge 服务

systemctl enable munge

systemctl start munge

systemctl status munge

slurm

准备依赖

  • hwloc-devel cgroup Task Constraining
  • hdf5-devel HDF5 Job Profiling
  • man2html HTML Man Pages
  • libibumad and libibmad-devel InfiniBand Accounting
  • lua-devel Lua Support
  • mariadb-devel MySQL support for accounting
  • pam-devel PAM Support
  • numactl-devel NUMA Affinity
  • readline-devel Readline Support
  • rrdtool-devel RRD External Sensor Data Collection
  • gtk2 and gtk2-devel sview
  • http-parser-devel slurmdrestd
  • json-c-devel slurmdrestd
  • libyaml and libyaml-devel slurmdrestd
  • libcurl and libcurl-devel slurmdrestd
  • libjwt and libjwt-devel slurmdrestd

运行及编译依赖:

yum -y install gcc

yum -y install tcl tk dwz libtirpc zip

rpm -ivh --force \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-tkinter-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-libs-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-pip-9.0.3-5.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-setuptools-39.2.0-10.el7.noarch.rpm  \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-debug-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-devel-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-idle-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-test-3.6.8-10.el7.x86_64.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/redhat-rpm-config-9.1.0-88.el7.centos.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-rpm-macros-3-32.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python3-rpm-generators-6-2.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python-rpm-macros-3-32.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/perl-srpm-macros-1-8.el7.noarch.rpm \

h体体ps://mirror.tuna.tsinghua.edu.cn/centos-vault/7.7.1908/os/x86_64/Packages/python-srpm-macros-3-32.el7.noarch.rpm

yum -y install openssl openssl-devel \

hwloc hwloc-devel \

hdf5 hdf5-devel \

man2html \

libibumad \

libibmad libibmad-devel \

lua lua-devel \

mariadb mariadb-devel \

pam pam-devel \

numactl numactl-devel \

readline readline-devel \

rrdtool rrdtool-devel \

ncurses ncurses-devel \

gtk2 gtk2-devel \

http-parser http-parser-devel \

json-c json-c-devel \

libyaml libyaml-devel \

libcurl libcurl-devel \

libjwt libjwt-devel

准备slurm用户

export SLURM_USER_ID=2001

groupadd -g $SLURM_USER_ID slurm

useradd  -m -c "SLURM workload manager" -d /var/lib/slurm -u $SLURM_USER_ID -g slurm  -s /bin/bash slurm

安装 slurm

安装 slurm-v20.11.9-centos-7.6-amd64.tar.gz

tar xf slurm-v20.11.9-centos-7.6-amd64.tar.gz -C /opt

export PREFIX=/opt/slurm/v20.11.9

拷贝 service 文件

find $PREFIX/etc/systemd/system | grep service$ | xargs -i cp -v {} /etc/systemd/system/

systemctl daemon-reload

生成配置文件

$PREFIX/etc/slurmdbd.conf

cp  $PREFIX/etc/slurmdbd.conf.example $PREFIX/etc/slurmdbd.conf

chmod 600 $PREFIX/etc/slurmdbd.conf

chown -R slurm:slurm $PREFIX/etc/slurmdbd.conf

安装数据库

yum -y install mariadb-server

systemctl start mariadb.service

systemctl enable mariadb.service

# 设置 root 密码

# 移除 anonymous 用户

# 禁止 root 远程登陆

# 移除 test 数据库

/usr/bin/mysql_secure_installation

创建 MySQL 管理账户

mysql -u root -p

create user 'slurm'@'%' identified by '123456';

grant all privileges on *.* to 'slurm'@'%' identified by '123456';

flush privileges;

配置 $PREFIX/etc/slurmdbd.conf

#

# Example slurmdbd.conf file.

#

# See the slurmdbd.conf man page for more information.

#

# Archive info

#ArchiveJobs=yes

#ArchiveDir="/tmp"

#ArchiveSteps=yes

#ArchiveScript=

#JobPurge=12

#StepPurge=1

#

# Authentication info

AuthType=auth/munge

#AuthInfo=/var/run/munge/munge.socket.2

#

# slurmDBD info

DbdAddr=localhost

DbdHost=localhost

#DbdPort=7031

SlurmUser=slurm

#MessageTimeout=300

DebugLevel=verbose

#DefaultQOS=normal,standby

LogFile=$PREFIX/var/log/slurmdbd.log

PidFile=$PREFIX/run/slurmdbd.pid

#PluginDir=/usr/lib/slurm

#PrivateData=accounts,users,usage,jobs

#TrackWCKey=yes

#

# Database info

StorageType=accounting_storage/mysql

#StorageHost=localhost

#StoragePort=1234

StoragePass=123456

StorageUser=slurm

#StorageLoc=slurm_acct_db

启动 slurmdbd

systemctl start slurmdbd

systemctl enable slurmdbd

systemctl status slurmdbd

准备 $PREFIX/etc/slurm.conf

cp  $PREFIX/etc/slurm.conf.example $PREFIX/etc/slurm.conf

$PREFIX/etc/slurm.conf 配置如下

#

# Example slurm.conf file. Please run configurator.html

# (in doc/html) to build a configuration file customized

# for your environment.

#

#

# slurm.conf file generated by configurator.html.

#

# See the slurm.conf man page for more information.

#

ClusterName=linux

ControlMachine=linux0

#ControlAddr=

#BackupController=

#BackupAddr=

#

SlurmUser=slurm

#SlurmdUser=root

SlurmctldPort=6817

SlurmdPort=6818

AuthType=auth/munge

#JobCredentialPrivateKey=

#JobCredentialPublicCertificate=

StateSaveLocation=$PREFIX/var/spool

SlurmdSpoolDir=$PREFIX/var/spool

SwitchType=switch/none

MpiDefault=none

SlurmctldPidFile=$PREFIX/run/slurmctld.pid

SlurmdPidFile=$PREFIX/run/slurmd.pid

ProctrackType=proctrack/pgid

#PluginDir=

#FirstJobId=

ReturnToService=0

#MaxJobCount=

#PlugStackConfig=

#PropagatePrioProcess=

#PropagateResourceLimits=

#PropagateResourceLimitsExcept=

#Prolog=

#Epilog=

#SrunProlog=

#SrunEpilog=

#TaskProlog=

#TaskEpilog=

#TaskPlugin=

#TrackWCKey=no

#TreeWidth=50

#TmpFS=

#UsePAM=

#

# TIMERS

SlurmctldTimeout=300

SlurmdTimeout=300

InactiveLimit=0

MinJobAge=300

KillWait=30

Waittime=0

#

# SCHEDULING

SchedulerType=sched/backfill

#SchedulerAuth=

SelectType=select/cons_tres

SelectTypeParameters=CR_Core

#PriorityType=priority/multifactor

#PriorityDecayHalfLife=14-0

#PriorityUsageResetPeriod=14-0

#PriorityWeightFairshare=100000

#PriorityWeightAge=1000

#PriorityWeightPartition=10000

#PriorityWeightJobSize=1000

#PriorityMaxAge=1-0

#

# LOGGING

SlurmctldDebug=info

SlurmctldLogFile=$PREFIX/var/log/slurmctld.log

SlurmdDebug=info

SlurmdLogFile=$PREFIX/var/log/slurmd.log

JobCompType=jobcomp/none

#JobCompLoc=

#

# ACCOUNTING

#JobAcctGatherType=jobacct_gather/linux

#JobAcctGatherFrequency=30

#

AccountingStorageType=accounting_storage/slurmdbd

#AccountingStorageHost=

#AccountingStorageLoc=

#AccountingStoragePass=

#AccountingStorageUser=

 

AuthAltTypes=auth/jwt

AuthAltParameters=jwt_key=$PREFIX/var/spool/jwt.key

 

# COMPUTE NODES

PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP

NodeName=linux0 CPUs=4 Boards=1 SocketsPerBoard=1 CoresPerSocket=4 ThreadsPerCore=1 RealMemory=1998

注意先去掉自带的NodeName=xxxx, 追加 $PREFIX/sbin/slurmd -C >> $PREFIX/etc/slurm.conf, 去掉UpTime, 注意添加jwt认证配置

sed -i '/^NodeName=/d' $PREFIX/etc/slurm.conf

$PREFIX/sbin/slurmd -C >> $PREFIX/etc/slurm.conf

sed -i '/^UpTime=/d' $PREFIX/etc/slurm.conf

 

 

dd if=/dev/random of=$PREFIX/var/spool/jwt.key bs=32 count=1

chown slurm:slurm $PREFIX/var/spool/jwt.key

chmod 0600 $PREFIX/var/spool/jwt.key

$PREFIX/etc/cgroup.conf

cp $PREFIX/etc/cgroup.conf.example $PREFIX/etc/cgroup.conf

$PREFIX/etc/cgroup.conf 文件内容如下

###

#

# Slurm cgroup support configuration file

#

# See man slurm.conf and man cgroup.conf for further

# information on cgroup configuration parameters

#--

CgroupAutomount=yes

 

ConstrainCores=no

ConstrainRAMSpace=no

启动 slurmctld/slurmd

systemctl enable slurmctld

systemctl start slurmctld

systemctl status slurmctld

 

systemctl enable slurmd

systemctl start slurmd

systemctl status slurmd

启动 slurmrestd

systemctl daemon-reload

systemctl start slurmrestd

systemctl enable slurmrestd

systemctl status slurmrestd

验证srun

$PREFIX/bin/srun -n4 hostname

验证sinfo

$PREFIX/bin/sinfo

验证 slurmrestd

export `$PREFIX/bin/scontrol token username=slurm`

curl -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:${SLURM_JWT}" localhost:6820/openapi

curl -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:${SLURM_JWT}" localhost:6820/slurm/v0.0.35/diag

curl -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:${SLURM_JWT}" localhost:6820/slurm/v0.0.35/nodes

curl -H "X-SLURM-USER-NAME:slurm" -H "X-SLURM-USER-TOKEN:${SLURM_JWT}" localhost:6820/slurm/v0.0.35/jobs

 

文章来自个人专栏
外设中断
9 文章 | 1 订阅
0条评论
作者已关闭评论
作者已关闭评论
1
0