|
|
|
@@ -25,33 +25,34 @@ import ( |
|
|
|
) |
|
|
|
|
|
|
|
const ( |
|
|
|
RESOURCE_POOL = "grampus-pool" |
|
|
|
Param_Token = "token" |
|
|
|
Param_Addr = "addr" |
|
|
|
Forward_Slash = "/" |
|
|
|
COMMA = "," |
|
|
|
UNDERSCORE = "_" |
|
|
|
TASK_NAME_PREFIX = "trainJob" |
|
|
|
Python = "python " |
|
|
|
SemiColon = ";" |
|
|
|
BALANCE = "balance" |
|
|
|
RATE = "rate" |
|
|
|
PERHOUR = "per-hour" |
|
|
|
NUMBER = "number" |
|
|
|
KILOBYTE = "kb" |
|
|
|
GIGABYTE = "gb" |
|
|
|
CPUCORE = "core" |
|
|
|
STORAGE = "STORAGE" |
|
|
|
DISK = "disk" |
|
|
|
MEMORY = "memory" |
|
|
|
RAM = "ram" |
|
|
|
VRAM = "vram" |
|
|
|
RMB = "rmb" |
|
|
|
POINT = "point" |
|
|
|
RUNNINGTASK = "RUNNING_TASK" |
|
|
|
RUNNING = "RUNNING" |
|
|
|
CPU = "cpu" |
|
|
|
Gi = "Gi" |
|
|
|
RESOURCE_POOL = "grampus-pool" |
|
|
|
Param_Token = "token" |
|
|
|
Param_Addr = "addr" |
|
|
|
Forward_Slash = "/" |
|
|
|
COMMA = "," |
|
|
|
UNDERSCORE = "_" |
|
|
|
TASK_NAME_PREFIX = "trainJob" |
|
|
|
Python = "python " |
|
|
|
SemiColon = ";" |
|
|
|
BALANCE = "balance" |
|
|
|
RATE = "rate" |
|
|
|
PERHOUR = "per-hour" |
|
|
|
NUMBER = "number" |
|
|
|
KILOBYTE = "kb" |
|
|
|
GIGABYTE = "gb" |
|
|
|
CPUCORE = "core" |
|
|
|
STORAGE = "STORAGE" |
|
|
|
DISK = "disk" |
|
|
|
MEMORY = "memory" |
|
|
|
RAM = "ram" |
|
|
|
VRAM = "vram" |
|
|
|
RMB = "rmb" |
|
|
|
POINT = "point" |
|
|
|
RUNNINGTASK = "RUNNING_TASK" |
|
|
|
RUNNING = "RUNNING" |
|
|
|
CPU = "cpu" |
|
|
|
Gi = "Gi" |
|
|
|
AlgorithmRecordOnlyVersion = "V1" |
|
|
|
) |
|
|
|
|
|
|
|
const ( |
|
|
|
@@ -60,6 +61,7 @@ const ( |
|
|
|
|
|
|
|
const ( |
|
|
|
MyAlgorithmListUrl = "api/v1/algorithm/myAlgorithmList" |
|
|
|
CreateAlgorithm = "api/v1/algorithm/create" |
|
|
|
ResourcespecsUrl = "api/v1/resource/specs" |
|
|
|
CreateTrainJobUrl = "api/v1/job/create" |
|
|
|
TrainJobDetail = "api/v1/job/detail" |
|
|
|
@@ -120,7 +122,53 @@ func (o *OctopusHttp) Execute(ctx context.Context, option *option.AiOption, mode |
|
|
|
option.Cmd = Python + option.AlgorithmId |
|
|
|
} |
|
|
|
|
|
|
|
// algorithm |
|
|
|
param := &omodel.CreateMyAlgorithmParam{ |
|
|
|
AlgorithmName: option.AlgorithmId, |
|
|
|
ModelName: option.AlgorithmId, |
|
|
|
} |
|
|
|
algorithm, err := o.createAlgorithm(ctx, param) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
if algorithm.Code != http.StatusOK { |
|
|
|
if algorithm.Data != nil { |
|
|
|
marshal, err := json.Marshal(algorithm.Data) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
|
|
|
|
errormdl := &omodel.Error{} |
|
|
|
err = json.Unmarshal(marshal, errormdl) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
return nil, errors.New(errormdl.Message) |
|
|
|
} |
|
|
|
} else { |
|
|
|
if algorithm.Data != nil { |
|
|
|
result := &entity.OctCreateAlgorithm{} |
|
|
|
marshal, err := json.Marshal(algorithm.Data) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
err = json.Unmarshal(marshal, result) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
if result.AlgorithmId == "" { |
|
|
|
return nil, errors.New("createAlgorithm failed") |
|
|
|
} |
|
|
|
option.AlgorithmId = result.AlgorithmId |
|
|
|
} else { |
|
|
|
return nil, errors.New("createAlgorithm failed") |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// resource |
|
|
|
option.ResourceId = "964fdee2db544928bfea74dac12a924f" |
|
|
|
|
|
|
|
// submit |
|
|
|
task, err := o.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
@@ -169,12 +217,12 @@ func (o *OctopusHttp) SubmitTask(ctx context.Context, imageId string, cmd string |
|
|
|
param := &omodel.CreateTrainJobParam{ |
|
|
|
//DataSetId: datasetsId, |
|
|
|
//DataSetVersion: VERSION, |
|
|
|
//AlgorithmId: algorithmId, |
|
|
|
//AlgorithmVersion: VERSION, |
|
|
|
Name: TASK_NAME_PREFIX + UNDERSCORE + utils.RandomString(10), |
|
|
|
ImageId: imageId, |
|
|
|
IsDistributed: false, |
|
|
|
ResourcePool: RESOURCE_POOL, |
|
|
|
AlgorithmId: algorithmId, |
|
|
|
AlgorithmVersion: AlgorithmRecordOnlyVersion, |
|
|
|
Name: TASK_NAME_PREFIX + UNDERSCORE + utils.RandomString(10), |
|
|
|
ImageId: imageId, |
|
|
|
IsDistributed: false, |
|
|
|
ResourcePool: RESOURCE_POOL, |
|
|
|
Config: []*omodel.CreateTrainJobConf{ |
|
|
|
{ |
|
|
|
Command: cmd, |
|
|
|
@@ -182,8 +230,8 @@ func (o *OctopusHttp) SubmitTask(ctx context.Context, imageId string, cmd string |
|
|
|
MinFailedTaskCount: 1, |
|
|
|
MinSucceededTaskCount: 1, |
|
|
|
TaskNumber: 1, |
|
|
|
//Parameters: prms, |
|
|
|
Envs: envMap, |
|
|
|
Parameters: prms, |
|
|
|
Envs: envMap, |
|
|
|
}, |
|
|
|
}, |
|
|
|
} |
|
|
|
@@ -206,6 +254,28 @@ func (o *OctopusHttp) SubmitTask(ctx context.Context, imageId string, cmd string |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
func (o *OctopusHttp) createAlgorithm(ctx context.Context, param *omodel.CreateMyAlgorithmParam) (*entity.OctResp, error) { |
|
|
|
createAlgorithmUrl := o.server + CreateAlgorithm |
|
|
|
token, err := o.token.Get() |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
resp := &entity.OctResp{} |
|
|
|
|
|
|
|
req := common.GetRestyRequest(common.TIMEOUT) |
|
|
|
_, err = req. |
|
|
|
SetHeader("Authorization", "Bearer "+token). |
|
|
|
SetBody(param). |
|
|
|
SetResult(resp). |
|
|
|
Post(createAlgorithmUrl) |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
|
|
|
|
return resp, nil |
|
|
|
} |
|
|
|
|
|
|
|
// collector |
|
|
|
func (o *OctopusHttp) resourceSpecs(ctx context.Context) (*entity.OctResp, error) { |
|
|
|
resourcespecsUrl := o.server + ResourcespecsUrl |
|
|
|
@@ -447,7 +517,7 @@ func (o *OctopusHttp) UploadAlgorithmCode(ctx context.Context, resourceType stri |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func (o OctopusHttp) GetComputeCards(ctx context.Context) ([]string, error) { |
|
|
|
func (o *OctopusHttp) GetComputeCards(ctx context.Context) ([]string, error) { |
|
|
|
return nil, errors.New(NotImplementError) |
|
|
|
} |
|
|
|
|
|
|
|
|