| @@ -1,20 +0,0 @@ | |||||
| package main | |||||
| import "slurm/extra" | |||||
| import "fmt" | |||||
| import "os" | |||||
| import "strconv" | |||||
| func main(){ | |||||
| if len(os.Args)<2 { | |||||
| fmt.Printf("Please specify Job ID\n") | |||||
| return | |||||
| } | |||||
| id,_ := strconv.Atoi(os.Args[1]) | |||||
| fmt.Printf("try to cancel %d\n", id) | |||||
| err:= extra.Cancel_job(uint32( id)) | |||||
| if(err!= nil){ | |||||
| fmt.Printf(err.Error()) | |||||
| } | |||||
| } | |||||
| @@ -1,25 +0,0 @@ | |||||
| package main | |||||
| import "slurm/extra" | |||||
| import "fmt" | |||||
| import "os" | |||||
| import "strconv" | |||||
| func main(){ | |||||
| if len(os.Args)<2 { | |||||
| fmt.Printf("Please specify Job ID\n") | |||||
| return | |||||
| } | |||||
| id,_ := strconv.Atoi(os.Args[1]) | |||||
| jobs, err := extra. Get_job_info_accounting(uint32( id)) | |||||
| if err!= nil { | |||||
| fmt.Printf(err.Error()) | |||||
| return | |||||
| } | |||||
| fmt.Printf("JobId\tuser\taccount\tstate\t\tJobName\n") | |||||
| for i := range(jobs) { | |||||
| fmt.Printf("%d\t%s\t%s\t%s\t%s\n", jobs[i].JobId, jobs[i].User, jobs[i].Account, jobs[i].State, jobs[i].JobName) | |||||
| } | |||||
| } | |||||
| @@ -1,19 +0,0 @@ | |||||
| package main | |||||
| import "slurm/jobinfo" | |||||
| import "fmt" | |||||
| func main(){ | |||||
| job_list := job_info.Get_all_jobs() | |||||
| fmt.Printf("Found %d jobs \n", job_list.Record_count) | |||||
| /* a little bit nicer */ | |||||
| fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n") | |||||
| fmt.Printf("________________________________________________________________________________________________\n") | |||||
| for i := range job_list.Job_list { | |||||
| job := job_list.Job_list[i] | |||||
| fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" , | |||||
| job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS , | |||||
| job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority) | |||||
| } | |||||
| } | |||||
| @@ -1,20 +0,0 @@ | |||||
| package main | |||||
| import "slurm/nodeinfo" | |||||
| import "fmt" | |||||
| func main(){ | |||||
| node_list := node_info.Get_all_nodes() | |||||
| fmt.Printf("Found %d nodes \n", node_list.Record_count) | |||||
| /* a little bit nicer*/ | |||||
| fmt.Printf("name\t State\t\t\t Reason\t\t Tres\n") | |||||
| fmt.Printf("________________________________________\n") | |||||
| for i := range node_list.Node_list { | |||||
| node := node_list.Node_list[i] | |||||
| fmt.Printf("%s\t %s\t %s\t %s\n", node.Node_hostname, node_info.State_to_string(node.Node_state), node.Reason, node.Tres_fmt_str) | |||||
| } | |||||
| } | |||||
| @@ -1,15 +0,0 @@ | |||||
| package main | |||||
| import "slurm" | |||||
| import "fmt" | |||||
| func main(){ | |||||
| version := int(0) | |||||
| var config slurm.Ctl_conf | |||||
| version = slurm.Version() | |||||
| fmt.Printf("Version is %s\n", slurm.VersionString(version)); | |||||
| config = slurm.GetConfig() | |||||
| slurm.Print_Ctl_conf(config) | |||||
| } | |||||
| @@ -1,41 +0,0 @@ | |||||
| package main | |||||
| import "slurm/jobinfo" | |||||
| import "slurm" | |||||
| import "fmt" | |||||
| import "os" | |||||
| import "strconv" | |||||
| func main(){ | |||||
| if len(os.Args)<2 { | |||||
| fmt.Printf("Please specify Job ID\n") | |||||
| return | |||||
| } | |||||
| id,_ := strconv.Atoi(os.Args[1]) | |||||
| job_list := job_info.Get_job(uint32(id)) | |||||
| if job_list.Error_code != 0 { | |||||
| msg := slurm.GetErrorString(job_list.Error_code) | |||||
| fmt.Printf("Error: %s\n" ,msg) | |||||
| return | |||||
| } | |||||
| for i := range job_list.Job_list { | |||||
| job_info.Print_Job_info(job_list.Job_list[i]) | |||||
| } | |||||
| fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n") | |||||
| fmt.Printf("________________________________________________________________________________________________\n") | |||||
| for i := range job_list.Job_list { | |||||
| job := job_list.Job_list[i] | |||||
| fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" , | |||||
| job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS, | |||||
| job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority) | |||||
| } | |||||
| end_time :=job_info.Get_job_endtime(uint32(id)) | |||||
| fmt.Printf("End-Time: %s\n", end_time) | |||||
| } | |||||
| @@ -1,32 +0,0 @@ | |||||
| package main | |||||
| import "slurm/nodeinfo" | |||||
| import "slurm" | |||||
| import "fmt" | |||||
| import "os" | |||||
| func main(){ | |||||
| if len(os.Args)<2 { | |||||
| fmt.Printf("Please specify node name\n") | |||||
| return | |||||
| } | |||||
| name:= os.Args[1] | |||||
| node_list := node_info.Get_node_info(name) | |||||
| if(node_list.Error_code !=0) { | |||||
| msg := slurm.GetErrorString(node_list.Error_code) | |||||
| fmt.Printf("Error: %s\n" ,msg) | |||||
| return | |||||
| } | |||||
| fmt.Printf("Found %d nodes \n", node_list.Record_count) | |||||
| /* a little bit nicer*/ | |||||
| fmt.Printf("name\t State\t\t\t Reason\t\t Tres\n") | |||||
| fmt.Printf("________________________________________\n") | |||||
| for i := range node_list.Node_list { | |||||
| node := node_list.Node_list[i] | |||||
| fmt.Printf("%s\t %s\t %s\t %s\n", node.Node_hostname, node_info.State_to_string(node.Node_state), node.Reason, node.Tres_fmt_str) | |||||
| } | |||||
| } | |||||
| @@ -1,21 +0,0 @@ | |||||
| package main | |||||
| import ( | |||||
| partition_info "code.gitlink.org.cn/JCCE/PCM.git/adaptor/pcm_slurm/cgo_tianhe/src/slurm/partitioninfo" | |||||
| "fmt" | |||||
| ) | |||||
| func main() { | |||||
| partition_list := partition_info.Get_partitions() | |||||
| fmt.Printf("Found %d partions \n", partition_list.Record_count) | |||||
| /* a little bit nicer */ | |||||
| fmt.Printf("Name\t Nodes\t\t\t Max_time(min)\t\t Tres\n") | |||||
| fmt.Printf("________________________________________\n") | |||||
| for i := range partition_list.Partition_list { | |||||
| partition := partition_list.Partition_list[i] | |||||
| fmt.Printf("%s\t %s\t %d\t %d\n", partition.Name, partition.Nodes, partition.Max_time, partition.Node_inx) | |||||
| } | |||||
| } | |||||
| @@ -1,35 +0,0 @@ | |||||
| package main | |||||
| import "slurm/jobinfo" | |||||
| import "slurm" | |||||
| import "fmt" | |||||
| import "os" | |||||
| func main(){ | |||||
| if len(os.Args)<2 { | |||||
| fmt.Printf("Please specify username\n") | |||||
| return | |||||
| } | |||||
| name := os.Args[1] | |||||
| job_list := job_info.Get_user_jobs(name) | |||||
| if job_list.Error_code != 0 { | |||||
| msg := slurm.GetErrorString(job_list.Error_code) | |||||
| fmt.Printf("Error: %s\n" ,msg) | |||||
| return | |||||
| } | |||||
| fmt.Printf("Id\tName\t\tPartion\tUser\tRuntime\tStatus\t\t(Reason)\tNodes\tPriority\n") | |||||
| fmt.Printf("________________________________________________________________________________________________\n") | |||||
| for i := range job_list.Job_list { | |||||
| job := job_list.Job_list[i] | |||||
| fmt.Printf("%d\t%s\t%s\t%s %s\t%s\t%s\t%s\t%d\n" , | |||||
| job.Job_id, job.Name, job.Partition, job.User_name,job_info.Get_job_runtime(job).String(), job.Job_stateS , | |||||
| job_info.Reason_to_string(job.State_reason), job.Nodes,job.Priority) | |||||
| } | |||||
| } | |||||
| @@ -1,109 +0,0 @@ | |||||
| # Submission of jobs | |||||
| This folder shows in a few more examples of how jobs can be submitted in Slurm. Some examples use containers. | |||||
| Attention: The parameters for job names and partitions probably have to be adjusted! | |||||
| # Simple Jobs | |||||
| ## submit_job.go | |||||
| In this example, a simple bash-Jobs is submitted. The used partition is *long* (adapt probably). | |||||
| ``` | |||||
| job_desc.Partition="long" | |||||
| ``` | |||||
| The job sets two environment variables and executes a | |||||
| ``` | |||||
| hostname | |||||
| env | grep SLUM | |||||
| ``` | |||||
| On a single node of the cluster (single task job). | |||||
| The application does not wait until the hob is completed, but dirctly returns. | |||||
| The (std) output is written to | |||||
| out-jobid.txt, the std- error to err-jobid.txt | |||||
| ``` | |||||
| job_desc.Std_out = ("./out-%j.txt") | |||||
| job_desc.Std_err = ("./err-%j.txt") | |||||
| ```` | |||||
| ## update_job.go | |||||
| This example allows to update the qos and the partition a job is running on. This can help to move the job to another queue with another partition. | |||||
| Note to users: In theory, the API allows the update of the number of nodes and the tasks per node. However, since this is only allowed by root or a slurm admin, we do not include an example here. | |||||
| Synthax | |||||
| ``` | |||||
| ./update_job JobId qos partition | |||||
| ``` | |||||
| (Note: This requires that the Job with the Id JobID is already submitted and in a pending state) | |||||
| # Container jobs | |||||
| The following examples all submit a job that starts singulrity containers. | |||||
| These containers, if they do not exist, are created. However, problems can arise if the user does not have sudo permissions.. | |||||
| ## The containers | |||||
| The first container is an MPI container. This is used by and `submit_mpi_containier.go` and `submit_mpi_and_update.go`. The definition is stored in `mpi_container.def` | |||||
| It can also be created with the command | |||||
| ``` | |||||
| sudo singularity build mpi_container.img mpi_container.def | |||||
| ``` | |||||
| The program mpi_pingppong (source code enclosed: `mpi_pingpong.c` ) is built into the container. It performs a ping-pong test between two processes. | |||||
| This container uses the hybrid model, which assumes that MPI is installed on the cluter (to start the job) and installs it in the container itself. Works with OpenMPI. | |||||
| The second container is an openmp container, including a sample OpenMP programm openmp_example (source code: ` openmp_example.c`). | |||||
| It can also be created with the command: | |||||
| ``` | |||||
| sudo singularity build openmp_container.img openmp_container.def | |||||
| ``` | |||||
| This container is used bei `submit_openmp_container.go`. | |||||
| ## submit_mpi_containier.go | |||||
| Submits a mpi-container job to the cluster. It runs to Processes on two nodes | |||||
| ``` | |||||
| job_desc.Min_nodes =uint32(2) | |||||
| job_desc.Num_tasks = uint32(2) | |||||
| ``` | |||||
| The application blocks, until the job is completed. The (std) output is written to | |||||
| jobid-out.txt, the std- error to jobId-err.txt | |||||
| ``` | |||||
| job_desc.Std_out = ("./%j-out.txt") | |||||
| job_desc.Std_err = ("./%j-err.txt") | |||||
| ``` | |||||
| ## submit_omp_container.go | |||||
| Submits two openMP jobs to the cluster and wait, until they are completed. | |||||
| Both jobs allocate *one process* for the job, but *two CPUs per task/process* (for multi-threading). | |||||
| ``` | |||||
| job_desc.Num_tasks = uint32(1) | |||||
| job_desc.Cpus_per_task = uint16(2) | |||||
| ``` | |||||
| The first job reads the environment variable ` SLURM_JOB_CPUS_PER_NODE` and sets the number of openMP threads to exactly the number of cpus that are available per task/process. | |||||
| ``` | |||||
| job_desc.Script+= "export OMP_NUM_THREADS=$SLURM_JOB_CPUS_PER_NODE\n" | |||||
| ``` | |||||
| The second job sets the number of threads to 4 (which is oversuscribing because more threads are started than processes) and executes the same job. | |||||
| ``` | |||||
| job_desc.Script+= "export OMP_NUM_THREADS=4\n" | |||||
| ``` | |||||
| The program waits until both jobs are completed. The results are written to the two outputs files, similiar to `submit_mpi_container.go` | |||||
| ### submit_mpi_and_update.go | |||||
| This application is dooing the same as `submit_mpi_container.go` | |||||
| ``` | |||||
| ops.Qos = "shortjobs" | |||||
| ops.Partition = "short" | |||||
| ``` | |||||
| This situation, can, for example, be created my submitting longer, other jobs bevore in the background (depending on the partion size) and than start this application: | |||||
| ``` | |||||
| ./submit_mpi_containier & ./submit_mpi_containier & ./submit_mpi_and_update | |||||
| ``` | |||||
| @@ -1,37 +0,0 @@ | |||||
| Bootstrap: docker | |||||
| From: ubuntu:latest | |||||
| %files | |||||
| mpi_pingpong.c /opt | |||||
| %environment | |||||
| export OMPI_DIR=/home0/opt/openmpi | |||||
| export SINGULARITY_OMPI_DIR=$OMPI_DIR | |||||
| export SINGULARITYENV_APPEND_PATH=$OMPI_DIR/bin | |||||
| export SINGULAIRTYENV_APPEND_LD_LIBRARY_PATH=$OMPI_DIR/lib | |||||
| %post | |||||
| echo "Installing required packages..." | |||||
| apt-get update && apt-get install -y wget git bash gcc gfortran g++ make file | |||||
| echo "Installing Open MPI" | |||||
| export OMPI_DIR=/home0/opt/openmpi | |||||
| export OMPI_VERSION=4.0.3 | |||||
| export OMPI_URL="https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-$OMPI_VERSION.tar.bz2" | |||||
| mkdir -p /tmp/ompi | |||||
| mkdir -p /opt | |||||
| chmod a+w /opt/ | |||||
| chmod a+r /opt/ | |||||
| ls -la /tmp/ompi | |||||
| # Download | |||||
| cd /tmp/ompi && wget -O openmpi-$OMPI_VERSION.tar.bz2 $OMPI_URL && tar -xjf openmpi-$OMPI_VERSION.tar.bz2 | |||||
| ls -la | |||||
| # Compile and install | |||||
| cd /tmp/ompi/openmpi-$OMPI_VERSION && ./configure --prefix=$OMPI_DIR && make install | |||||
| # Set env variables so we can compile our application | |||||
| export PATH=$OMPI_DIR/bin:$PATH | |||||
| export LD_LIBRARY_PATH=$OMPI_DIR/lib:$LD_LIBRARY_PATH | |||||
| export MANPATH=$OMPI_DIR/share/man:$MANPATH | |||||
| # rm -r tmp/mpi | |||||
| echo "Compiling the MPI application..." | |||||
| cd /opt && mpicc -o mpi_pingpong mpi_pingpong.c | |||||
| @@ -1,65 +0,0 @@ | |||||
| #include <mpi.h> | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #include <time.h> | |||||
| #define MAX_ITER 1000 | |||||
| int main (int argc, char **argv) { | |||||
| int rc; | |||||
| int size; | |||||
| int myrank; | |||||
| size_t max_send = 1<<22; | |||||
| char *send_buf = (char*)malloc(sizeof(char)*max_send); | |||||
| char *recv_buf = (char*)malloc(sizeof(char)*max_send); | |||||
| size_t send_size; | |||||
| clock_t start, end; | |||||
| rc = MPI_Init (&argc, &argv); | |||||
| if (rc != MPI_SUCCESS) { | |||||
| fprintf (stderr, "MPI_Init() failed"); | |||||
| return EXIT_FAILURE; | |||||
| } | |||||
| rc = MPI_Comm_size (MPI_COMM_WORLD, &size); | |||||
| if (rc != MPI_SUCCESS) { | |||||
| fprintf (stderr, "MPI_Comm_size() failed"); | |||||
| goto exit_with_error; | |||||
| } | |||||
| if(size!= 2) { | |||||
| fprintf(stderr, "This process requieres exact two processes\n"); | |||||
| } | |||||
| rc = MPI_Comm_rank (MPI_COMM_WORLD, &myrank); | |||||
| if (rc != MPI_SUCCESS) { | |||||
| fprintf (stderr, "MPI_Comm_rank() failed"); | |||||
| goto exit_with_error; | |||||
| } | |||||
| if(myrank==0) | |||||
| fprintf (stdout, "Size\t Time(ms)\n"); | |||||
| for(send_size=1 ; send_size<= max_send; send_size*=2){ | |||||
| for (int i = 0; i<MAX_ITER+2; i++) { | |||||
| if(i == 2) | |||||
| start = clock(); | |||||
| if(myrank == 0){ | |||||
| MPI_Send(send_buf, send_size, MPI_CHAR, 1, 0x4, MPI_COMM_WORLD); | |||||
| MPI_Recv(recv_buf, send_size, MPI_CHAR, 1, 0x5, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |||||
| } | |||||
| else { | |||||
| MPI_Recv(recv_buf, send_size, MPI_CHAR, 0, 0x4, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |||||
| MPI_Send(send_buf, send_size, MPI_CHAR, 0, 0x5, MPI_COMM_WORLD); | |||||
| } | |||||
| } | |||||
| end= clock(); | |||||
| double time_taken = (double)(end-start)/CLOCKS_PER_SEC; | |||||
| if(myrank == 0 ) | |||||
| fprintf(stdout, "%ld\t %f\n", send_size, time_taken); | |||||
| } | |||||
| MPI_Finalize(); | |||||
| return EXIT_SUCCESS; | |||||
| exit_with_error: | |||||
| MPI_Finalize(); | |||||
| return EXIT_FAILURE; | |||||
| } | |||||
| @@ -1,18 +0,0 @@ | |||||
| Bootstrap: docker | |||||
| From: ubuntu:latest | |||||
| %files | |||||
| openmp_example.c /opt | |||||
| %environment | |||||
| export OMPI_DIR=/home0/opt/openmpi | |||||
| export SINGULARITY_OMPI_DIR=$OMPI_DIR | |||||
| export SINGULARITYENV_APPEND_PATH=$OMPI_DIR/bin | |||||
| export SINGULAIRTYENV_APPEND_LD_LIBRARY_PATH=$OMPI_DIR/lib | |||||
| %post | |||||
| echo "Installing required packages..." | |||||
| apt-get update && apt-get install -y wget git bash gcc gfortran g++ make file | |||||
| echo "Compiling the MPI application..." | |||||
| cd /opt && gcc -o openmp_example -fopenmp openmp_example.c | |||||
| @@ -1,14 +0,0 @@ | |||||
| #include <stdio.h> | |||||
| #include <omp.h> | |||||
| int main() { | |||||
| #pragma omp parallel | |||||
| { | |||||
| int id = omp_get_thread_num(); | |||||
| int data = id; | |||||
| int total = omp_get_num_threads(); | |||||
| printf("Greetings from thread %d out of %d with Data %d\n", id, total, data); | |||||
| } | |||||
| printf("parallel for ends.\n"); | |||||
| return 0; | |||||
| } | |||||
| @@ -1,38 +0,0 @@ | |||||
| package main | |||||
| import ( | |||||
| "code.gitlink.org.cn/JCCE/PCM.git/adaptor/pcm_slurm/cgo_tianhe/src/slurm" | |||||
| "code.gitlink.org.cn/JCCE/PCM.git/adaptor/pcm_slurm/cgo_tianhe/src/slurm/submitjob" | |||||
| ) | |||||
| import "os/user" | |||||
| import "os" | |||||
| import "strconv" | |||||
| import "fmt" | |||||
| func main() { | |||||
| job_desc := submit_job.Job_descriptor{} | |||||
| job_desc.Script = "#! /bin/bash\n hostname \n env | grep SLURM " | |||||
| dir, _ := os.Getwd() | |||||
| user, _ := user.Current() | |||||
| userid, _ := strconv.Atoi(user.Uid) | |||||
| job_desc.User_id = uint32(userid) | |||||
| groupid, _ := strconv.Atoi(user.Gid) | |||||
| job_desc.Group_id = uint32(groupid) | |||||
| job_desc.Name = "test_job" | |||||
| job_desc.Partition = "long" | |||||
| job_desc.Time_limit = uint32(2) | |||||
| job_desc.Min_nodes = uint32(1) | |||||
| job_desc.Std_out = ("./out-%j.txt") | |||||
| job_desc.Std_err = ("./err-%j.txt") | |||||
| job_desc.Work_dir = dir | |||||
| job_desc.Environment = []string{"SLURM_GO_JOB=TRUE", "SLURM_CONTAINER_JOB=FALSE"} | |||||
| answer := submit_job.Submit_job(&job_desc) | |||||
| if answer.Error_code != 0 { | |||||
| msg := slurm.GetErrorString(answer.Error_code) | |||||
| fmt.Printf("Error: %s\n", msg) | |||||
| return | |||||
| } | |||||
| fmt.Printf("Submitted Job %d\n", answer.Job_id) | |||||
| } | |||||
| @@ -1,128 +0,0 @@ | |||||
| package main | |||||
| // | |||||
| // | |||||
| //import "slurm/submitjob" | |||||
| //import "slurm" | |||||
| //import "os" | |||||
| //import "strconv" | |||||
| //import "fmt" | |||||
| //import "os/exec" | |||||
| //import "path/filepath" | |||||
| //import "slurm/jobinfo" | |||||
| //import "time" | |||||
| //import "os/user" | |||||
| // | |||||
| // | |||||
| // | |||||
| //func fileExists(filename string) bool { | |||||
| // info, err := os.Stat(filename) | |||||
| // if os.IsNotExist(err) { | |||||
| // return false | |||||
| // } | |||||
| // return !info.IsDir() | |||||
| //} | |||||
| //func build_container(file_name,container_name string){ | |||||
| // | |||||
| // cmd := exec.Command("sudo", "/usr/local/bin/singularity", "build",container_name, file_name) | |||||
| // fmt.Print("Now build new container") | |||||
| // fmt.Printf("%s\n", cmd.String()) | |||||
| // stdoutStderr, err := cmd.CombinedOutput() | |||||
| // if err != nil { | |||||
| // fmt.Printf("error in creating container %s \n", err); | |||||
| //// return | |||||
| // } | |||||
| // fmt.Printf("%s\n", stdoutStderr) | |||||
| //} | |||||
| // | |||||
| //func main(){ | |||||
| // job_desc := submit_job.Job_descriptor{} | |||||
| // | |||||
| // dir, _ := os.Getwd() | |||||
| // container := filepath.Join(dir, "mpi_container.img") | |||||
| // definition := filepath.Join(dir, "mpi_container.def") | |||||
| // if !fileExists(container){ | |||||
| // build_container(definition,container) | |||||
| // } | |||||
| // | |||||
| // if !fileExists(container){ | |||||
| // return | |||||
| // } | |||||
| // /* use Cmd to create our script */ | |||||
| // | |||||
| // job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n srun hostname \n" | |||||
| // cmd := exec.Command( "/home0/opt/openmpi/bin/mpirun", "-mca btl_tcp_if_include eth1", "/usr/local/bin/singularity", "exec",container, "/opt/mpi_pingpong" ) | |||||
| // job_desc.Script+= cmd.String() | |||||
| // fmt.Printf("cmd %s\n", job_desc.Script) | |||||
| // user, _:= user.Current() | |||||
| // userid , _ := strconv.Atoi(user.Uid) | |||||
| // job_desc.User_id= uint32(userid) | |||||
| // groupid , _ := strconv.Atoi(user.Gid) | |||||
| // | |||||
| // job_desc.Group_id= uint32(groupid) | |||||
| // job_desc.Name = "flex_mpi_job" | |||||
| // job_desc.Partition="long" | |||||
| // job_desc.Time_limit = uint32(60) | |||||
| // job_desc.Ntasks_per_node = uint16(1) | |||||
| // job_desc.Num_tasks = uint32(2) | |||||
| // job_desc.Std_out = ("./%j-out.txt") | |||||
| // job_desc.Std_err = ("./%j-err.txt") | |||||
| // job_desc.Work_dir = dir | |||||
| // | |||||
| // time.Sleep(3 * time.Second) | |||||
| // answer := submit_job.Submit_job(&job_desc) | |||||
| // if(answer.Error_code != 0) { | |||||
| // msg := slurm.GetErrorString(answer.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // } | |||||
| // fmt.Printf("Submitted Job %d\n", answer.Job_id) | |||||
| // | |||||
| // time.Sleep(5 * time.Second) | |||||
| // | |||||
| // job_list := job_info.Get_job(answer.Job_id) | |||||
| // if job_list.Error_code != 0 { | |||||
| // msg := slurm.GetErrorString(job_list.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // | |||||
| // } | |||||
| // job := job_list.Job_list[0] | |||||
| // | |||||
| // fmt.Printf("job %d is %s\n", answer.Job_id, job.Job_stateS) | |||||
| // state := job.Job_stateS | |||||
| // if state == "Pending" { | |||||
| // fmt.Printf("Move job %d to another partition \n", answer.Job_id) | |||||
| // var ops submit_job.Update_job_options | |||||
| // | |||||
| // ops.Qos = "shortjobs" | |||||
| // ops.Partition = "short" | |||||
| // err2 := submit_job.Update_job(ops, uint32(answer.Job_id)) | |||||
| // if err2!= uint32(0) { | |||||
| // fmt.Printf("error %s \n", slurm.GetErrorString(err2)) | |||||
| // } | |||||
| // } | |||||
| // | |||||
| // for state == "Pending" || state == "Running" { | |||||
| // time.Sleep(2 * time.Second) | |||||
| // job_list = job_info.Get_job(answer.Job_id) | |||||
| // if job_list.Error_code != 0 { | |||||
| // msg := slurm.GetErrorString(job_list.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // | |||||
| // } | |||||
| // job = job_list.Job_list[0] | |||||
| // | |||||
| // state = job.Job_stateS | |||||
| // | |||||
| // fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS) | |||||
| // | |||||
| // | |||||
| // } | |||||
| // | |||||
| // fmt.Printf("Total runtime Job %d %s\n",job.Job_id, job_info.Get_job_runtime(job).String() ) | |||||
| //} | |||||
| // | |||||
| // | |||||
| // | |||||
| @@ -1,112 +0,0 @@ | |||||
| package main | |||||
| // | |||||
| //import "slurm/submitjob" | |||||
| //import "slurm" | |||||
| //import "os/user" | |||||
| //import "os" | |||||
| //import "strconv" | |||||
| //import "fmt" | |||||
| //import "os/exec" | |||||
| //import "path/filepath" | |||||
| //import "slurm/jobinfo" | |||||
| //import "time" | |||||
| // | |||||
| // | |||||
| //func fileExists(filename string) bool { | |||||
| // info, err := os.Stat(filename) | |||||
| // if os.IsNotExist(err) { | |||||
| // return false | |||||
| // } | |||||
| // return !info.IsDir() | |||||
| //} | |||||
| //func build_container(file_name,container_name string){ | |||||
| // | |||||
| // cmd := exec.Command("sudo","/usr/local/bin/singularity", "build",container_name, file_name) | |||||
| // fmt.Print("Now build new container") | |||||
| // fmt.Printf("%s\n", cmd.String()) | |||||
| // stdoutStderr, err := cmd.CombinedOutput() | |||||
| // if err != nil { | |||||
| // fmt.Printf("error in creating container %s \n", err) | |||||
| // | |||||
| // fmt.Printf("%s\n", stdoutStderr) | |||||
| //// return | |||||
| // } | |||||
| // fmt.Printf("%s\n", stdoutStderr) | |||||
| //} | |||||
| // | |||||
| //func main(){ | |||||
| // job_desc := submit_job.Job_descriptor{} | |||||
| // | |||||
| // dir, _ := os.Getwd() | |||||
| // container := filepath.Join(dir, "mpi_container.img") | |||||
| // definition := filepath.Join(dir, "mpi_container.def") | |||||
| // if !fileExists(container){ | |||||
| // build_container(definition,container) | |||||
| // } | |||||
| // | |||||
| // if !fileExists(container){ | |||||
| // return | |||||
| // } | |||||
| // /* use Cmd to create our script */ | |||||
| // | |||||
| // job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n" | |||||
| // cmd := exec.Command( "/home0/opt/openmpi/bin/mpirun", "-mca btl_tcp_if_include eth1", "/usr/local/bin/singularity", "exec",container, "/opt/mpi_pingpong" ) | |||||
| // job_desc.Script+= cmd.String() | |||||
| // fmt.Printf("cmd %s\n", job_desc.Script) | |||||
| // user, _:= user.Current() | |||||
| // userid , _ := strconv.Atoi(user.Uid) | |||||
| // job_desc.User_id= uint32(userid) | |||||
| // groupid , _ := strconv.Atoi(user.Gid) | |||||
| // | |||||
| // job_desc.Group_id= uint32(groupid) | |||||
| // job_desc.Name = "mpi_job" | |||||
| // job_desc.Partition="long" | |||||
| // job_desc.Time_limit = uint32(60) | |||||
| // job_desc.Min_nodes =uint32(2) | |||||
| // job_desc.Num_tasks = uint32(2) | |||||
| // job_desc.Std_out = ("./%j-out.txt") | |||||
| // job_desc.Std_err = ("./%j-err.txt") | |||||
| // job_desc.Work_dir = dir | |||||
| // | |||||
| // answer := submit_job.Submit_job(&job_desc) | |||||
| // if(answer.Error_code != 0) { | |||||
| // msg := slurm.GetErrorString(answer.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // } | |||||
| // fmt.Printf("Submitted Job %d\n", answer.Job_id) | |||||
| // | |||||
| // | |||||
| // job_list := job_info.Get_job(answer.Job_id) | |||||
| // if job_list.Error_code != 0 { | |||||
| // msg := slurm.GetErrorString(job_list.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // | |||||
| // } | |||||
| // job := job_list.Job_list[0] | |||||
| // | |||||
| // fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS) | |||||
| // state := job.Job_stateS | |||||
| // for state == "Pending" || state == "Running" { | |||||
| // time.Sleep(2 * time.Second) | |||||
| // job_list = job_info.Get_job(answer.Job_id) | |||||
| // if job_list.Error_code != 0 { | |||||
| // msg := slurm.GetErrorString(job_list.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // | |||||
| // } | |||||
| // job = job_list.Job_list[0] | |||||
| // | |||||
| // state = job.Job_stateS | |||||
| // | |||||
| // fmt.Printf("job %d is %s\n",answer.Job_id, job.Job_stateS) | |||||
| // | |||||
| // | |||||
| // } | |||||
| // | |||||
| // fmt.Printf("Total runtime Job %d: %s\n",job.Job_id, job_info.Get_job_runtime(job).String() ) | |||||
| // | |||||
| //} | |||||
| @@ -1,162 +0,0 @@ | |||||
| package main | |||||
| //import "slurm/submitjob" | |||||
| //import "slurm" | |||||
| //import "os/user" | |||||
| //import "os" | |||||
| //import "strconv" | |||||
| //import "fmt" | |||||
| //import "os/exec" | |||||
| //import "path/filepath" | |||||
| //import "slurm/jobinfo" | |||||
| //import "time" | |||||
| // | |||||
| // | |||||
| //func fileExists(filename string) bool { | |||||
| // info, err := os.Stat(filename) | |||||
| // if os.IsNotExist(err) { | |||||
| // return false | |||||
| // } | |||||
| // return !info.IsDir() | |||||
| //} | |||||
| //func build_container(file_name,container_name string){ | |||||
| // | |||||
| // cmd := exec.Command("sudo", "/usr/local/bin/singularity", "build",container_name, file_name) | |||||
| // fmt.Print("Now build new container") | |||||
| // fmt.Printf("%s\n", cmd.String()) | |||||
| // stdoutStderr, err := cmd.CombinedOutput() | |||||
| // if err != nil { | |||||
| // fmt.Printf("error in creating container %s \n", err) | |||||
| // | |||||
| // fmt.Printf("%s\n", stdoutStderr) | |||||
| //// return | |||||
| // } | |||||
| // fmt.Printf("%s\n", stdoutStderr) | |||||
| //} | |||||
| // | |||||
| //func main(){ | |||||
| // job_desc := submit_job.Job_descriptor{} | |||||
| // | |||||
| // dir, _ := os.Getwd() | |||||
| // container := filepath.Join(dir, "openmp_container.img") | |||||
| // definition := filepath.Join(dir, "openmp_container.def") | |||||
| // if !fileExists(container){ | |||||
| // build_container(definition,container) | |||||
| // } | |||||
| // | |||||
| // if !fileExists(container){ | |||||
| // return | |||||
| // } | |||||
| // /* use Cmd to create our script */ | |||||
| // | |||||
| // job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n" | |||||
| // job_desc.Script+= "export OMP_NUM_THREADS=$SLURM_JOB_CPUS_PER_NODE\n" | |||||
| // cmd := exec.Command( "/usr/local/bin/singularity", "exec",container, "/opt/openmp_example" ) | |||||
| // | |||||
| // job_desc.Script+= cmd.String() | |||||
| // fmt.Printf("cmd %s\n", job_desc.Script) | |||||
| // user, _:= user.Current() | |||||
| // userid , _ := strconv.Atoi(user.Uid) | |||||
| // job_desc.User_id= uint32(userid) | |||||
| // groupid , _ := strconv.Atoi(user.Gid) | |||||
| // | |||||
| // job_desc.Group_id= uint32(groupid) | |||||
| // job_desc.Name = "test_job" | |||||
| // job_desc.Partition="long" | |||||
| // job_desc.Time_limit = uint32(60) | |||||
| // job_desc.Min_nodes =uint32(1) | |||||
| // job_desc.Num_tasks = uint32(1) | |||||
| // | |||||
| // job_desc.Cpus_per_task = uint16(2) | |||||
| // job_desc.Std_out = ("./%j-out.txt") | |||||
| // job_desc.Std_err = ("./%j-err.txt") | |||||
| // job_desc.Work_dir = dir | |||||
| // | |||||
| // answer := submit_job.Submit_job(&job_desc) | |||||
| // if(answer.Error_code != 0) { | |||||
| // msg := slurm.GetErrorString(answer.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // } | |||||
| // fmt.Printf("Submitted Job %d\n", answer.Job_id) | |||||
| // | |||||
| // /*Now, we submit the same jon again, ut with some oversubsciption */ | |||||
| // job_desc.Script = "#!/bin/bash\n export PATH=$PATH:/usr/local/bin\n hostname \n" | |||||
| // job_desc.Script+= "export OMP_NUM_THREADS=4\n" | |||||
| // | |||||
| // job_desc.Script+= cmd.String() | |||||
| // fmt.Printf("cmd %s\n", job_desc.Script) | |||||
| // answer2 := submit_job.Submit_job(&job_desc) | |||||
| // if(answer2.Error_code != 0) { | |||||
| // msg := slurm.GetErrorString(answer.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // } | |||||
| // fmt.Printf("Submitted Job %d\n", answer2.Job_id) | |||||
| // | |||||
| // | |||||
| // | |||||
| // job_list := job_info.Get_job(answer.Job_id) | |||||
| // if job_list.Error_code != 0 { | |||||
| // msg := slurm.GetErrorString(job_list.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // | |||||
| // } | |||||
| // job := job_list.Job_list[0] | |||||
| // | |||||
| // fmt.Printf("job is %s\n",job.Job_stateS) | |||||
| // state := job.Job_stateS | |||||
| // for state == "Pending" || state == "Running" { | |||||
| // time.Sleep(2 * time.Second) | |||||
| // job_list = job_info.Get_job(answer.Job_id) | |||||
| // if job_list.Error_code != 0 { | |||||
| // msg := slurm.GetErrorString(job_list.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // | |||||
| // } | |||||
| // job = job_list.Job_list[0] | |||||
| // | |||||
| // state = job.Job_stateS | |||||
| // | |||||
| // fmt.Printf("job is %s\n",job.Job_stateS) | |||||
| // | |||||
| // | |||||
| // } | |||||
| // | |||||
| // fmt.Printf("Total runtime first job %s\n",job_info.Get_job_runtime(job).String() ) | |||||
| // /*wait for second job */ | |||||
| // job_list = job_info.Get_job(answer2.Job_id) | |||||
| // if job_list.Error_code != 0 { | |||||
| // msg := slurm.GetErrorString(job_list.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // | |||||
| // } | |||||
| // job = job_list.Job_list[0] | |||||
| // | |||||
| // fmt.Printf("job is %s\n",job.Job_stateS) | |||||
| // state = job.Job_stateS | |||||
| // for state == "Pending" || state == "Running" { | |||||
| // time.Sleep(2 * time.Second) | |||||
| // job_list = job_info.Get_job(answer2.Job_id) | |||||
| // if job_list.Error_code != 0 { | |||||
| // msg := slurm.GetErrorString(job_list.Error_code) | |||||
| // fmt.Printf("Error: %s\n" ,msg) | |||||
| // return | |||||
| // | |||||
| // } | |||||
| // job = job_list.Job_list[0] | |||||
| // | |||||
| // state = job.Job_stateS | |||||
| // | |||||
| // fmt.Printf("job is %s\n",job.Job_stateS) | |||||
| // | |||||
| // | |||||
| // } | |||||
| // | |||||
| // | |||||
| // fmt.Printf("Total runtime second job %s\n",job_info.Get_job_runtime(job).String() ) | |||||
| // | |||||
| //} | |||||
| @@ -1,30 +0,0 @@ | |||||
| package main | |||||
| import ( | |||||
| "code.gitlink.org.cn/JCCE/PCM.git/adaptor/pcm_slurm/cgo_tianhe/src/slurm" | |||||
| submit_job "code.gitlink.org.cn/JCCE/PCM.git/adaptor/pcm_slurm/cgo_tianhe/src/slurm/submitjob" | |||||
| ) | |||||
| import "os" | |||||
| import "strconv" | |||||
| import "fmt" | |||||
| func main() { | |||||
| if len(os.Args) < 4 { | |||||
| fmt.Printf("Synthax specify JobID, qos and partition \n") | |||||
| return | |||||
| } | |||||
| var ops submit_job.Update_job_options | |||||
| id, err := strconv.Atoi(os.Args[1]) | |||||
| if err != nil { | |||||
| fmt.Printf("Invalid job id (no int) %s\n", os.Args[1]) | |||||
| return | |||||
| } | |||||
| ops.Qos = os.Args[2] | |||||
| ops.Partition = os.Args[3] | |||||
| err2 := submit_job.Update_job(ops, uint32(id)) | |||||
| if err2 != uint32(0) { | |||||
| fmt.Printf("error %s \n", slurm.GetErrorString(err2)) | |||||
| } | |||||
| } | |||||