|
|
@@ -28,8 +28,7 @@ import ( |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
const ( |
|
|
const ( |
|
|
RAM_SIZE_1G = 1024 // 1G |
|
|
|
|
|
WORKER_RAM_SIZE = 10240 // 10G |
|
|
|
|
|
|
|
|
RAM_SIZE_1G = 1024 // 1G |
|
|
WORKER_NUMBER = 1 |
|
|
WORKER_NUMBER = 1 |
|
|
WORKER_CPU_NUMBER = 5 |
|
|
WORKER_CPU_NUMBER = 5 |
|
|
WORKER_GPU_NUMBER = 1 |
|
|
WORKER_GPU_NUMBER = 1 |
|
|
@@ -46,7 +45,7 @@ const ( |
|
|
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset" |
|
|
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset" |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
var RESOURCESGMAP = map[string]ResourceSpecSG{ |
|
|
|
|
|
|
|
|
var RESOURCESGAIMAP = map[string]ResourceSpecSGAI{ |
|
|
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": { |
|
|
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": { |
|
|
CPU: 1, |
|
|
CPU: 1, |
|
|
GPU: 1, |
|
|
GPU: 1, |
|
|
@@ -82,7 +81,7 @@ var RESOURCESPECSAI = map[string]string{ |
|
|
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:2, RAM:10G", |
|
|
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:2, RAM:10G", |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
type ResourceSpecSG struct { |
|
|
|
|
|
|
|
|
type ResourceSpecSGAI struct { |
|
|
CPU int64 |
|
|
CPU int64 |
|
|
GPU int64 |
|
|
GPU int64 |
|
|
RAM int64 |
|
|
RAM int64 |
|
|
@@ -123,7 +122,8 @@ func (s *ShuguangAi) QueryImageList() (interface{}, error) { |
|
|
|
|
|
|
|
|
func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { |
|
|
func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { |
|
|
//判断是否resourceId匹配自定义资源Id |
|
|
//判断是否resourceId匹配自定义资源Id |
|
|
if resourceId != SHUGUANGAI_CUSTOM_RESOURCE_ID { |
|
|
|
|
|
|
|
|
_, isMapContainsKey := RESOURCESPECSAI[resourceId] |
|
|
|
|
|
if !isMapContainsKey { |
|
|
return nil, errors.New("shuguangAi资源Id不存在") |
|
|
return nil, errors.New("shuguangAi资源Id不存在") |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
@@ -159,15 +159,15 @@ func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string |
|
|
Version: imageResp.Image.Version, |
|
|
Version: imageResp.Image.Version, |
|
|
ImagePath: imageResp.Image.Path, |
|
|
ImagePath: imageResp.Image.Path, |
|
|
WorkerNumber: WORKER_NUMBER, |
|
|
WorkerNumber: WORKER_NUMBER, |
|
|
WorkerCpuNumber: WORKER_CPU_NUMBER, |
|
|
|
|
|
WorkerGpuNumber: WORKER_GPU_NUMBER, |
|
|
|
|
|
WorkerRamSize: WORKER_RAM_SIZE, |
|
|
|
|
|
ResourceGroup: RESOURCE_GROUP, |
|
|
ResourceGroup: RESOURCE_GROUP, |
|
|
TimeoutLimit: TimeoutLimit, |
|
|
TimeoutLimit: TimeoutLimit, |
|
|
PythonCodePath: PythonCodePath, |
|
|
PythonCodePath: PythonCodePath, |
|
|
PythonArg: pythonArg, |
|
|
PythonArg: pythonArg, |
|
|
}, |
|
|
}, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
updateSGAIRequestByResourceId(resourceId, req) |
|
|
|
|
|
|
|
|
resp, err := s.svcCtx.ACRpc.SubmitPytorchTask(s.ctx, req) |
|
|
resp, err := s.svcCtx.ACRpc.SubmitPytorchTask(s.ctx, req) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
return nil, err |
|
|
return nil, err |
|
|
@@ -176,6 +176,13 @@ func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string |
|
|
return resp, nil |
|
|
return resp, nil |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func updateSGAIRequestByResourceId(resourceId string, req *hpcAC.SubmitPytorchTaskReq) { |
|
|
|
|
|
spec := RESOURCESGAIMAP[resourceId] |
|
|
|
|
|
req.Params.WorkerCpuNumber = spec.CPU |
|
|
|
|
|
req.Params.WorkerGpuNumber = spec.GPU |
|
|
|
|
|
req.Params.WorkerRamSize = spec.RAM |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
func (s *ShuguangAi) SubmitTensorflowTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { |
|
|
func (s *ShuguangAi) SubmitTensorflowTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { |
|
|
//req := &hpcAC.SubmitTensorflowTaskReq{ |
|
|
//req := &hpcAC.SubmitTensorflowTaskReq{ |
|
|
// Params: &hpcAC.SubmitTensorflowTaskParams{ |
|
|
// Params: &hpcAC.SubmitTensorflowTaskParams{ |
|
|
|