|
|
|
@@ -6,6 +6,7 @@ import ( |
|
|
|
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" |
|
|
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" |
|
|
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" |
|
|
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" |
|
|
|
"strings" |
|
|
|
) |
|
|
|
|
|
|
|
@@ -16,9 +17,95 @@ type ShuguangHpc struct { |
|
|
|
} |
|
|
|
|
|
|
|
const ( |
|
|
|
SHUGUANGHPC_CUSTOM_RESOURCE_ID = "10240 // 10G" |
|
|
|
GAP_WALL_TIME_24H = "24:00:00" |
|
|
|
TASK_SHUGUANG_PREFIX = "ShuguangHPC" |
|
|
|
NEWLINE = "\n" |
|
|
|
JOBNAME = "JOBNAME" |
|
|
|
GAP_CMD_FILE = "cmd" |
|
|
|
GAP_NNODE = "1" // 节点个数 |
|
|
|
GAP_NODE_STRING = "" |
|
|
|
GAP_APPNAME = "BASE" |
|
|
|
GAP_QUEUE = "wzhdtest" |
|
|
|
GAP_WORK_DIR = "/work/home/acgnnmfbwo/BASE/JOBNAME" |
|
|
|
GAP_STD_OUT_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.out.%j" |
|
|
|
GAP_STD_ERR_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.err.%j" |
|
|
|
StrJobManagerID = 1637920656 |
|
|
|
Apptype = "BASIC" |
|
|
|
EXPORT = "export" |
|
|
|
GAP_NPROC = "1" |
|
|
|
GAP_NDCU = "1" |
|
|
|
GAP_EXCLUSIVE = "" |
|
|
|
GAP_PPN = "" |
|
|
|
GAP_NGPU = "" |
|
|
|
GAP_MULTI_SUB = "" |
|
|
|
) |
|
|
|
|
|
|
|
var RESOURCEMAP = map[string]ResourceSpec{ |
|
|
|
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": { |
|
|
|
GAP_NNODE: "1", |
|
|
|
GAP_NPROC: "2", |
|
|
|
GAP_NDCU: "1", |
|
|
|
}, |
|
|
|
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": { |
|
|
|
GAP_NNODE: "1", |
|
|
|
GAP_NPROC: "4", |
|
|
|
GAP_NDCU: "2", |
|
|
|
}, |
|
|
|
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": { |
|
|
|
GAP_NNODE: "1", |
|
|
|
GAP_NPROC: "8", |
|
|
|
GAP_NDCU: "4", |
|
|
|
}, |
|
|
|
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": { |
|
|
|
GAP_NNODE: "1", |
|
|
|
GAP_NPROC: "16", |
|
|
|
GAP_NDCU: "4", |
|
|
|
}, |
|
|
|
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": { |
|
|
|
GAP_NNODE: "1", |
|
|
|
GAP_NPROC: "32", |
|
|
|
GAP_NDCU: "4", |
|
|
|
}, |
|
|
|
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": { |
|
|
|
GAP_NNODE: "2", |
|
|
|
GAP_NPROC: "4", |
|
|
|
GAP_NDCU: "2", |
|
|
|
}, |
|
|
|
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": { |
|
|
|
GAP_NNODE: "2", |
|
|
|
GAP_NPROC: "8", |
|
|
|
GAP_NDCU: "4", |
|
|
|
}, |
|
|
|
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": { |
|
|
|
GAP_NNODE: "2", |
|
|
|
GAP_NPROC: "16", |
|
|
|
GAP_NDCU: "4", |
|
|
|
}, |
|
|
|
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": { |
|
|
|
GAP_NNODE: "2", |
|
|
|
GAP_NPROC: "32", |
|
|
|
GAP_NDCU: "8", |
|
|
|
}, |
|
|
|
} |
|
|
|
|
|
|
|
var RESOURCESPECS = map[string]string{ |
|
|
|
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU", |
|
|
|
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU", |
|
|
|
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": "1*NODE, CPU:8, 4*DCU", |
|
|
|
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": "1*NODE, CPU:16, 4*DCU", |
|
|
|
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": "1*NODE, CPU:32, 4*DCU", |
|
|
|
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": "2*NODE, CPU:4, 2*DCU", |
|
|
|
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": "2*NODE, CPU:8, 4*DCU", |
|
|
|
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": "2*NODE, CPU:16, 4*DCU", |
|
|
|
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": "2*NODE, CPU:32, 8*DCU", |
|
|
|
} |
|
|
|
|
|
|
|
type ResourceSpec struct { |
|
|
|
GAP_NNODE string |
|
|
|
GAP_NPROC string |
|
|
|
GAP_NDCU string |
|
|
|
} |
|
|
|
|
|
|
|
func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangHpc { |
|
|
|
return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, participant: participant} |
|
|
|
} |
|
|
|
@@ -39,7 +126,8 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param |
|
|
|
// shuguangHpc提交任务 |
|
|
|
|
|
|
|
//判断是否resourceId匹配自定义资源Id |
|
|
|
if resourceId != SHUGUANGAI_CUSTOM_RESOURCE_ID { |
|
|
|
_, isMapContainsKey := RESOURCESPECS[resourceId] |
|
|
|
if !isMapContainsKey { |
|
|
|
return nil, errors.New("shuguangHpc资源Id不存在") |
|
|
|
} |
|
|
|
|
|
|
|
@@ -47,37 +135,43 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param |
|
|
|
var env string |
|
|
|
for _, e := range envs { |
|
|
|
s := strings.Split(e, COMMA) |
|
|
|
env += s[0] + "=" + s[1] + SPACE |
|
|
|
env += EXPORT + SPACE + s[0] + EQUAL + s[1] + NEWLINE |
|
|
|
} |
|
|
|
|
|
|
|
//请求 |
|
|
|
taskName := TASK_SHUGUANG_PREFIX + UNDERSCORE + UNDERSCORE + utils.RandomString(10) |
|
|
|
GAP_WORK_DIR := strings.Replace(GAP_WORK_DIR, JOBNAME, taskName, -1) |
|
|
|
GAP_STD_OUT_FILE := strings.Replace(GAP_STD_OUT_FILE, JOBNAME, taskName, -1) |
|
|
|
GAP_STD_ERR_FILE := strings.Replace(GAP_STD_ERR_FILE, JOBNAME, taskName, -1) |
|
|
|
|
|
|
|
req := &hpcAC.SubmitJobReq{ |
|
|
|
Apptype: "", |
|
|
|
Appname: "", |
|
|
|
StrJobManagerID: 0, |
|
|
|
Apptype: Apptype, |
|
|
|
Appname: GAP_APPNAME, |
|
|
|
StrJobManagerID: StrJobManagerID, |
|
|
|
MapAppJobInfo: &hpcAC.MapAppJobInfo{ |
|
|
|
GAP_CMD_FILE: "echo $TESTDIR; echo $TESTENV; sleep 30", |
|
|
|
GAP_NNODE: "1", |
|
|
|
GAP_NODE_STRING: "", |
|
|
|
GAP_SUBMIT_TYPE: "cmd", |
|
|
|
GAP_JOB_NAME: "testSlurmjob1", |
|
|
|
GAP_WORK_DIR: "/work/home/acgnnmfbwo/BASE/testSlurmjob1", |
|
|
|
GAP_QUEUE: "wzhdtest", |
|
|
|
GAP_NPROC: "1", |
|
|
|
GAP_PPN: "", |
|
|
|
GAP_NGPU: "", |
|
|
|
GAP_NDCU: "1", |
|
|
|
GAP_WALL_TIME: "01:00:00", |
|
|
|
GAP_EXCLUSIVE: "", |
|
|
|
GAP_APPNAME: "BASE", |
|
|
|
GAP_MULTI_SUB: "", |
|
|
|
GAP_STD_OUT_FILE: "/work/home/acgnnmfbwo/BASE/testSlurmjob1/std.out.%j", |
|
|
|
GAP_STD_ERR_FILE: "/work/home/acgnnmfbwo/BASE/testSlurmjob1/std.err.%j", |
|
|
|
GAP_SCHEDULER_OPT_WEB: "export TESTDIR=/bin/emacs\nexport TESTENV=12345", |
|
|
|
GAP_CMD_FILE: cmd, |
|
|
|
GAP_NNODE: GAP_NNODE, |
|
|
|
GAP_NODE_STRING: GAP_NODE_STRING, |
|
|
|
GAP_SUBMIT_TYPE: GAP_CMD_FILE, |
|
|
|
GAP_JOB_NAME: taskName, |
|
|
|
GAP_WORK_DIR: GAP_WORK_DIR, |
|
|
|
GAP_QUEUE: GAP_QUEUE, |
|
|
|
GAP_NPROC: GAP_NPROC, |
|
|
|
GAP_PPN: GAP_PPN, |
|
|
|
GAP_NGPU: GAP_NGPU, |
|
|
|
GAP_NDCU: GAP_NDCU, |
|
|
|
GAP_WALL_TIME: GAP_WALL_TIME_24H, |
|
|
|
GAP_EXCLUSIVE: GAP_EXCLUSIVE, |
|
|
|
GAP_APPNAME: GAP_APPNAME, |
|
|
|
GAP_MULTI_SUB: GAP_MULTI_SUB, |
|
|
|
GAP_STD_OUT_FILE: GAP_STD_OUT_FILE, |
|
|
|
GAP_STD_ERR_FILE: GAP_STD_ERR_FILE, |
|
|
|
GAP_SCHEDULER_OPT_WEB: env, |
|
|
|
}, |
|
|
|
} |
|
|
|
|
|
|
|
updateRequestByResouceId(resourceId, req) |
|
|
|
|
|
|
|
resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
@@ -107,3 +201,10 @@ func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) { |
|
|
|
//TODO implement me |
|
|
|
panic("implement me") |
|
|
|
} |
|
|
|
|
|
|
|
func updateRequestByResouceId(resourceId string, req *hpcAC.SubmitJobReq) { |
|
|
|
spec := RESOURCEMAP[resourceId] |
|
|
|
req.MapAppJobInfo.GAP_NNODE = spec.GAP_NNODE |
|
|
|
req.MapAppJobInfo.GAP_NPROC = spec.GAP_NPROC |
|
|
|
req.MapAppJobInfo.GAP_NDCU = spec.GAP_NDCU |
|
|
|
} |