|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207 |
- package octopusHttp
-
- import (
- "bytes"
- "context"
- "encoding/json"
- "errors"
- "fmt"
- common2 "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/entity"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
- "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
- "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
- omodel "gitlink.org.cn/JointCloud/pcm-octopus/http/model"
- "gitlink.org.cn/JointCloud/pcm-openi/common"
- "mime/multipart"
- "net/http"
- "strconv"
- "strings"
- "time"
- )
-
- const (
- Param_Token = "token"
- Param_Addr = "addr"
- Forward_Slash = "/"
- COMMA = ","
- UNDERSCORE = "_"
- TASK_NAME_PREFIX = "trainJob"
- Python = "python "
- SemiColon = ";"
- BALANCE = "balance"
- RATE = "rate"
- PERHOUR = "per-hour"
- NUMBER = "number"
- KILOBYTE = "kb"
- GIGABYTE = "gb"
- CPUCORE = "core"
- STORAGE = "STORAGE"
- DISK = "disk"
- MEMORY = "memory"
- RAM = "ram"
- VRAM = "vram"
- RMB = "rmb"
- POINT = "point"
- RUNNINGTASK = "RUNNING_TASK"
- RUNNING = "RUNNING"
- CPU = "cpu"
- Gi = "Gi"
- AlgorithmRecordOnlyVersion = "V1"
- )
-
- const (
- NotImplementError = "not implemented"
- )
-
- const (
- MyAlgorithmListUrl = "api/v1/algorithm/myAlgorithmList"
- CreateAlgorithm = "api/v1/algorithm/create"
- ResourcespecsUrl = "api/v1/resource/specs"
- CreateTrainJobUrl = "api/v1/job/create"
- TrainJobDetail = "api/v1/job/detail"
- TrainJobLog = "api/v1/job/log"
- )
-
- // compute source
- var (
- ComputeSourceToCardType = map[string]string{
- "nvidia-a100": "GPU",
- "nvidia-a100-80g": "GPU",
- "mr-v100": "ILUVATAR-GPGPU",
- "bi-v100": "ILUVATAR-GPGPU",
- "MR-V50": "ILUVATAR-GPGPU",
- "BI-V100": "ILUVATAR-GPGPU",
- "BI-V150": "ILUVATAR-GPGPU",
- "MR-V100": "ILUVATAR-GPGPU",
-
- "cambricon.com/mlu": "MLU",
- "hygon.com/dcu": "DCU",
-
- "huawei.com/Ascend910": "NPU",
- "enflame.com/gcu": "GCU",
- "ILUVATAR-GPGPU": "ILUVATAR-GPGPU",
- "MXN260": "METAX-GPGPU",
- }
- )
-
- type OctopusHttp struct {
- server string
- host string
- platform string
- participantId int64
- token *Token
- resourcePool string
- }
-
- func NewOctopusHttp(id int64, resourcePool, name, server, host, user, pwd string) *OctopusHttp {
- token := &Token{
- user: user,
- pwd: pwd,
- server: server,
- host: host,
- }
- return &OctopusHttp{resourcePool: resourcePool, platform: name, participantId: id, server: server, host: host, token: token}
- }
-
- // executor
- func (o *OctopusHttp) Execute(ctx context.Context, option *option.AiOption, mode int) (interface{}, error) {
- switch mode {
- case executor.SUBMIT_MODE_JOINT_CLOUD:
-
- case executor.SUBMIT_MODE_STORAGE_SCHEDULE:
- // cmd
- if option.AlgorithmId == "" {
- return nil, errors.New("algorithmId is empty")
- }
- if option.Cmd != "" {
- option.Cmd = option.Cmd + SemiColon + Python + option.AlgorithmId
- } else {
- option.Cmd = Python + option.AlgorithmId
- }
-
- // algorithm
- //param := &omodel.CreateMyAlgorithmParam{
- // AlgorithmName: option.AlgorithmId,
- // ModelName: option.AlgorithmId,
- //}
- //algorithm, err := o.createAlgorithm(ctx, param)
- //if err != nil {
- // return nil, err
- //}
- //if algorithm.Code != http.StatusOK {
- // if algorithm.Data != nil {
- // marshal, err := json.Marshal(algorithm.Data)
- // if err != nil {
- // return nil, err
- // }
- //
- // errormdl := &omodel.Error{}
- // err = json.Unmarshal(marshal, errormdl)
- // if err != nil {
- // return nil, err
- // }
- // return nil, errors.New(errormdl.Message)
- // } else {
- // return nil, errors.New(algorithm.Msg)
- // }
- //} else {
- // if algorithm.Data != nil {
- // result := &entity.OctCreateAlgorithm{}
- // marshal, err := json.Marshal(algorithm.Data)
- // if err != nil {
- // return nil, err
- // }
- // err = json.Unmarshal(marshal, result)
- // if err != nil {
- // return nil, err
- // }
- // if result.AlgorithmId == "" {
- // return nil, errors.New("createAlgorithm failed")
- // }
- // option.AlgorithmId = result.AlgorithmId
- // } else {
- // return nil, errors.New("createAlgorithm failed")
- // }
- //}
-
- // resource
- resp, err := o.resourceSpecs(ctx)
- if err != nil {
- return nil, err
- }
-
- id, err := matchResource(resp, option.ResourcesRequired)
- if err != nil {
- return nil, err
- }
-
- if id == nil {
- return nil, errors.New("resource id is nil")
- }
-
- option.ResourceId = *id
-
- // submit
- task, err := o.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType)
- if err != nil {
- return nil, err
- }
- return task, nil
- }
- return nil, nil
- }
-
- func (o *OctopusHttp) Stop(ctx context.Context, id string) error {
- return nil
- }
-
- func (o *OctopusHttp) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
- // octopus提交任务
- reqUrl := o.server + CreateTrainJobUrl
-
- token, err := o.token.Get()
- if err != nil {
- return nil, err
- }
-
- // python参数
- var prms []struct {
- Key string `json:"key"`
- Value string `json:"value"`
- }
- for _, param := range params {
- var p struct {
- Key string `json:"key"`
- Value string `json:"value"`
- }
- s := strings.Split(param, COMMA)
- p.Key = s[0]
- p.Value = s[1]
- prms = append(prms, p)
- }
-
- //环境变量
- envMap := make(map[string]string)
- for _, env := range envs {
- s := strings.Split(env, COMMA)
- envMap[s[0]] = s[1]
- }
-
- param := &omodel.CreateTrainJobParam{
- //DataSetId: datasetsId,
- //DataSetVersion: VERSION,
- //AlgorithmId: algorithmId,
- //AlgorithmVersion: AlgorithmRecordOnlyVersion,
- Name: TASK_NAME_PREFIX + UNDERSCORE + utils.RandomString(10),
- ImageId: imageId,
- IsDistributed: false,
- ResourcePool: o.resourcePool,
- Config: []*omodel.CreateTrainJobConf{
- {
- Command: cmd,
- ResourceSpecId: resourceId,
- MinFailedTaskCount: 1,
- MinSucceededTaskCount: 1,
- TaskNumber: 1,
- //Parameters: prms,
- Envs: envMap,
- },
- },
- }
-
- resp := &entity.OctResp{}
-
- req := common.GetRestyRequest(common.TIMEOUT)
- _, err = req.
- SetHeader("Authorization", "Bearer "+token).
- SetQueryString("token=" + token).
- SetQueryString("addr=" + o.host).
- SetBody(param).
- SetResult(resp).
- Post(reqUrl)
-
- if err != nil {
- return nil, err
- }
- return resp, nil
-
- }
-
- func (o *OctopusHttp) createAlgorithm(ctx context.Context, param *omodel.CreateMyAlgorithmParam) (*entity.OctResp, error) {
- createAlgorithmUrl := o.server + CreateAlgorithm
- token, err := o.token.Get()
- if err != nil {
- return nil, err
- }
- resp := &entity.OctResp{}
-
- req := common.GetRestyRequest(common.TIMEOUT)
- _, err = req.
- SetHeader("Authorization", "Bearer "+token).
- SetQueryString("token=" + token).
- SetQueryString("addr=" + o.host).
- SetBody(param).
- SetResult(resp).
- Post(createAlgorithmUrl)
-
- if err != nil {
- return nil, err
- }
-
- return resp, nil
- }
-
- // collector
- func (o *OctopusHttp) resourceSpecs(ctx context.Context) (*entity.OctResp, error) {
- resourcespecsUrl := o.server + ResourcespecsUrl
- token, err := o.token.Get()
- if err != nil {
- return nil, err
- }
-
- param := omodel.ResourceSpecParam{
- ResourcePool: o.resourcePool,
- }
-
- b, _ := json.Marshal(param)
- byt := bytes.NewBuffer(b)
-
- resp := &entity.OctResp{}
-
- req := common.GetRestyRequest(common.TIMEOUT)
- r, _ := http.NewRequest("GET", resourcespecsUrl, byt)
- req.RawRequest = r
- req.URL = resourcespecsUrl
-
- _, err = req.
- SetHeader("Content-Type", "application/json").
- SetQueryParam(Param_Token, token).
- SetQueryParam(Param_Addr, o.host).
- SetBody(byt).
- SetResult(resp).
- Send()
-
- if err != nil {
- return nil, err
- }
-
- return resp, nil
- }
-
- func (o *OctopusHttp) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
- resp, err := o.resourceSpecs(ctx)
- if err != nil {
- return nil, err
- }
- if resp.Code != http.StatusOK {
- if resp.Data != nil {
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return nil, err
- }
-
- errormdl := &omodel.Error{}
- err = json.Unmarshal(marshal, errormdl)
- if err != nil {
- return nil, err
- }
- return nil, errors.New(errormdl.Message)
- }
- } else {
- if resp.Data != nil {
- spec := &entity.OctResourceSpecs{}
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return nil, err
- }
- err = json.Unmarshal(marshal, spec)
- if err != nil {
- return nil, err
- }
- }
- }
-
- return nil, nil
- }
-
- func (o *OctopusHttp) GetDatasetsSpecs(ctx context.Context) ([]*collector.DatasetsSpecs, error) {
- return nil, nil
- }
-
- func (o *OctopusHttp) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, error) {
- return nil, errors.New(NotImplementError)
- }
-
- func (o *OctopusHttp) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
- taskDetailsUrl := o.server + TrainJobLog
- token, err := o.token.Get()
- if err != nil {
- return "", err
- }
-
- param := omodel.TrainJobLog{
- JobId: taskId,
- }
-
- b, _ := json.Marshal(param)
- byt := bytes.NewBuffer(b)
-
- resp := &entity.OctResp{}
-
- req := common.GetRestyRequest(common.TIMEOUT)
- r, _ := http.NewRequest("GET", taskDetailsUrl, byt)
- req.RawRequest = r
- req.URL = taskDetailsUrl
-
- _, err = req.
- SetHeader("Content-Type", "application/json").
- SetQueryParam(Param_Token, token).
- SetQueryParam(Param_Addr, o.host).
- SetBody(byt).
- SetResult(resp).
- Send()
-
- if err != nil {
- return "", errors.New("failed to invoke taskDetails")
- }
-
- if resp.Code != http.StatusOK {
- return "", errors.New("failed to invoke taskDetails")
- }
-
- var log string
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return "", err
- }
- log = string(marshal)
-
- if strings.Contains(log, "404 Not Found") || log == "" {
- log = "waiting for logs..."
- }
-
- return log, nil
- }
-
- func (o *OctopusHttp) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) {
- if taskId == "" {
- return nil, errors.New("empty taskId")
- }
-
- resp, err := o.getTrainingTask(ctx, taskId)
- if err != nil {
- return nil, err
- }
-
- if resp.Code != http.StatusOK {
- if resp.Data != nil {
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return nil, err
- }
-
- errormdl := &omodel.Error{}
- err = json.Unmarshal(marshal, errormdl)
- if err != nil {
- return nil, err
- }
- return nil, errors.New(errormdl.Message)
- }
- } else {
- if resp.Data != nil {
- job := &entity.OctTrainJob{}
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return nil, err
- }
- err = json.Unmarshal(marshal, job)
- if err != nil {
- return nil, err
- }
-
- var task collector.Task
- task.Id = job.TrainJob.Id
- if job.TrainJob.StartedAt != 0 {
- task.Start = time.Unix(int64(job.TrainJob.StartedAt), 0).Format(constants.Layout)
- }
- if job.TrainJob.CompletedAt != 0 {
- task.End = time.Unix(int64(job.TrainJob.CompletedAt), 0).Format(constants.Layout)
- }
- switch job.TrainJob.Status {
- case "succeeded":
- task.Status = constants.Completed
- case "failed":
- task.Status = constants.Failed
- case "running":
- task.Status = constants.Running
- case "stopped":
- task.Status = constants.Stopped
- case "pending":
- task.Status = constants.Pending
- default:
- task.Status = "undefined"
- }
-
- return &task, nil
- }
- }
- return nil, errors.New("failed to get trainjob")
- }
-
- func (o *OctopusHttp) getTrainingTask(ctx context.Context, taskId string) (*entity.OctResp, error) {
- taskDetailsUrl := o.server + TrainJobDetail
- token, err := o.token.Get()
- if err != nil {
- return nil, err
- }
-
- param := omodel.TrainJobDetailParam{
- JobId: taskId,
- }
-
- b, _ := json.Marshal(param)
- byt := bytes.NewBuffer(b)
-
- resp := &entity.OctResp{}
-
- req := common.GetRestyRequest(common.TIMEOUT)
- r, _ := http.NewRequest("GET", taskDetailsUrl, byt)
- req.RawRequest = r
- req.URL = taskDetailsUrl
-
- _, err = req.
- SetHeader("Content-Type", "application/json").
- SetQueryParam(Param_Token, token).
- SetQueryParam(Param_Addr, o.host).
- SetBody(byt).
- SetResult(resp).
- Send()
-
- if err != nil {
- return nil, errors.New("failed to invoke taskDetails")
- }
-
- return resp, nil
- }
-
- func (o *OctopusHttp) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
- return "", errors.New(NotImplementError)
- }
-
- func (o *OctopusHttp) UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error {
- return nil
- }
-
- func (o *OctopusHttp) GetComputeCards(ctx context.Context) ([]string, error) {
- return nil, errors.New(NotImplementError)
- }
-
- func (o *OctopusHttp) GetUserBalance(ctx context.Context) (float64, error) {
- return 0, errors.New(NotImplementError)
- }
-
- func (o *OctopusHttp) GetResourceSpecs(ctx context.Context, resrcType string) (*collector.ResourceSpec, error) {
- resp, err := o.resourceSpecs(ctx)
- if err != nil {
- return nil, err
- }
-
- res := &collector.ResourceSpec{
- ClusterId: strconv.FormatInt(o.participantId, 10),
- Tag: resrcType,
- }
-
- if resp.Code != http.StatusOK {
- if resp.Data != nil {
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return nil, err
- }
-
- errormdl := &omodel.Error{}
- err = json.Unmarshal(marshal, errormdl)
- if err != nil {
- return nil, err
- }
- return nil, errors.New(errormdl.Message)
- }
- } else {
- if resp.Data != nil {
- specs := &entity.OctResourceSpecs{}
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return nil, err
- }
- err = json.Unmarshal(marshal, specs)
- if err != nil {
- return nil, err
- }
- clusterResources, err := genSpecs(specs, resrcType, nil)
- if err != nil {
- return nil, err
- }
- res.Resources = clusterResources
- }
- }
-
- return res, nil
- }
-
- func findSpecId(cType string, cNum string, s *omodel.Spec, resourcesRequired []map[string]interface{}) (*string, error) {
- var id string
- for _, res := range resourcesRequired {
- //typeName, ok := res["type"]
- //if !ok {
- // continue
- //}
- name, ok := res["name"]
- if !ok {
- continue
- }
- if str, ok := name.(string); ok {
- name = strings.ToLower(str)
- } else {
- continue
- }
-
- num, ok := res["number"]
- if !ok {
- continue
- }
- if str, ok := num.(string); ok {
- num = strings.ToLower(str)
- } else {
- continue
- }
-
- if cType == name && cNum == num {
- id = s.Id
- return &id, nil
- }
- }
-
- return nil, nil
- }
-
- func matchResource(resp *entity.OctResp, resourcesRequired []map[string]interface{}) (*string, error) {
- if resp.Code != http.StatusOK {
- if resp.Data != nil {
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return nil, err
- }
-
- errormdl := &omodel.Error{}
- err = json.Unmarshal(marshal, errormdl)
- if err != nil {
- return nil, err
- }
- return nil, errors.New(errormdl.Message)
- }
- } else {
- if resp.Data != nil {
- spec := &entity.OctResourceSpecs{}
- marshal, err := json.Marshal(resp.Data)
- if err != nil {
- return nil, err
- }
- err = json.Unmarshal(marshal, spec)
- if err != nil {
- return nil, err
- }
-
- res, err := genSpecs(spec, "Train", resourcesRequired)
- if err != nil {
- return nil, err
- }
-
- if len(res) != 1 {
- return nil, errors.New("resource not found")
- }
-
- if str, ok := res[0].(*string); ok {
- return str, nil
- }
- }
- }
-
- return nil, errors.New("matchResource failed")
- }
-
- func genSpecs(specs *entity.OctResourceSpecs, resrcType string, resourcesRequired []map[string]interface{}) ([]interface{}, error) {
- res := make([]interface{}, 0)
- if resrcType == "Inference" {
- return res, nil
- } else if resrcType == "Train" {
- if specs.MapResourceSpecIdList.Train.ResourceSpecs == nil {
- return res, nil
- } else {
- for _, s := range specs.MapResourceSpecIdList.Train.ResourceSpecs {
- spec := &omodel.Spec{}
- marshal, err := json.Marshal(s)
- if err != nil {
- return nil, err
- }
- err = json.Unmarshal(marshal, spec)
- if err != nil {
- return nil, err
- }
-
- resType, err := chooseResourceType(spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if resType == nil {
- continue
- }
- res = append(res, resType)
- }
- }
- }
-
- return res, nil
- }
-
- func chooseResourceType(spec *omodel.Spec, resourcesRequired []map[string]interface{}) (interface{}, error) {
- if spec.ResourceQuantity.NvidiaA100 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "NvidiaA100")
- if err != nil {
- return nil, err
- }
-
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.NvidiaA100, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaA100, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
-
- return cres, nil
- } else if spec.ResourceQuantity.NvidiaA10080G != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "NvidiaA10080G")
- if err != nil {
- return nil, err
- }
-
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.NvidiaA10080G, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaA10080G, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaA10080G, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.MrV100 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MrV100")
- if err != nil {
- return nil, err
- }
-
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.MrV100, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.MrV100, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.MrV100, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.BiV100 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "BiV100")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.BiV100, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.BiV100, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.BiV100, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.MRV50 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MRV50")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.MRV50, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.MRV50, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.MRV50, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.BIV100 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "NvidiaA100")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.NvidiaA100, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaA100, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaA100, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.BIV150 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "BIV150")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.BIV150, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.BIV150, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.BIV150, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.MRV100 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MRV100")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.MRV100, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.MRV100, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.MRV100, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.CambriconComMlu != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "CambriconComMlu")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.CambriconComMlu, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.CambriconComMlu, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.CambriconComMlu, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.HygonComDcu != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "HygonComDcu")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.HygonComDcu, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.HygonComDcu, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.HygonComDcu, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.HuaweiComAscend910 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "HuaweiComAscend910")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.HuaweiComAscend910, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.HuaweiComAscend910, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.HuaweiComAscend910, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.EnflameComGcu != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "EnflameComGcu")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.EnflameComGcu, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.EnflameComGcu, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.EnflameComGcu, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.MXN260 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MXN260")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.MXN260, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.MXN260, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.MXN260, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.NvidiaV100 != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "NvidiaV100")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.NvidiaV100, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaV100, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.NvidiaV100, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- } else if spec.ResourceQuantity.MetaxTechComGpu != "" {
- tag, err := common2.GetJSONTag(spec.ResourceQuantity, "MetaxTechComGpu")
- if err != nil {
- return nil, err
- }
- var cres interface{}
- if resourcesRequired != nil {
- id, err := findSpecId(tag, spec.ResourceQuantity.MetaxTechComGpu, spec, resourcesRequired)
- if err != nil {
- return nil, err
- }
- if id != nil {
- cres = id
- }
- } else {
- res, err := genClusterResources(tag, spec.ResourceQuantity.MetaxTechComGpu, spec)
- if err != nil {
- return nil, err
- }
- cres = res
- }
- //cres, err := genClusterResources(tag, spec.ResourceQuantity.MetaxTechComGpu, spec)
- //if err != nil {
- // return nil, err
- //}
- return cres, nil
- }
-
- return nil, nil
- }
-
- func genClusterResources(cType string, cNum string, s *omodel.Spec) (*collector.ClusterResource, error) {
- cres := &collector.ClusterResource{}
- bres := make([]*collector.Usage, 0)
-
- var cardNum int64
- var cpuCore int64
- var memGi int64
-
- // card
- if cNum == "" {
- return nil, nil
- }
- cardNum, err := strconv.ParseInt(cNum, 10, 64)
- if err != nil {
- return nil, nil
- }
-
- // cpu
- if s.ResourceQuantity.Cpu == "" {
- return nil, nil
- }
- cpuCore, err = strconv.ParseInt(s.ResourceQuantity.Cpu, 10, 64)
- if err != nil {
- return nil, nil
- }
-
- //memory
- if s.ResourceQuantity.Memory != "" {
- gi := strings.Split(s.ResourceQuantity.Memory, Gi)
- if len(gi) != 2 {
- return nil, fmt.Errorf("s.ResourceQuantity.Memory convert error: %s", s.ResourceQuantity.Memory)
- }
-
- mGi, err := strconv.ParseInt(gi[0], 10, 64)
- if err != nil {
- memGi = 0
- } else {
- memGi = mGi
- }
- } else {
- return nil, nil
- }
-
- card := &collector.Usage{
- Type: ComputeSourceToCardType[cType],
- Name: cNum + "*" + strings.ToUpper(cType),
- Total: &collector.UnitValue{Unit: NUMBER, Value: cardNum},
- Available: &collector.UnitValue{Unit: NUMBER, Value: cardNum},
- }
- cpu := &collector.Usage{
- Type: strings.ToUpper(CPU),
- Name: strings.ToUpper(CPU),
- Total: &collector.UnitValue{Unit: CPUCORE, Value: cpuCore},
- Available: &collector.UnitValue{Unit: CPUCORE, Value: cpuCore},
- }
- mem := &collector.Usage{
- Type: strings.ToUpper(MEMORY),
- Name: strings.ToUpper(RAM),
- Total: &collector.UnitValue{Unit: GIGABYTE, Value: memGi},
- Available: &collector.UnitValue{Unit: GIGABYTE, Value: memGi},
- }
-
- bres = append(bres, cpu)
- bres = append(bres, mem)
-
- cres.Resource = card
- cres.BaseResources = bres
-
- return cres, nil
- }
-
- // inference
- func (o *OctopusHttp) GetClusterInferUrl(ctx context.Context, option *option.InferOption) (*inference.ClusterInferUrl, error) {
- return nil, errors.New(NotImplementError)
- }
-
- func (o *OctopusHttp) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
- return nil, errors.New(NotImplementError)
- }
-
- func (o *OctopusHttp) StartInferDeployInstance(ctx context.Context, id string) bool {
- return false
- }
-
- func (o *OctopusHttp) StopInferDeployInstance(ctx context.Context, id string) bool {
- return false
- }
-
- func (o *OctopusHttp) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
- return nil, errors.New(NotImplementError)
- }
-
- func (o *OctopusHttp) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
- return "", errors.New(NotImplementError)
- }
-
- func (o *OctopusHttp) CheckModelExistence(ctx context.Context, modelName string, modelType string) bool {
- return false
- }
-
- func (o *OctopusHttp) GetImageInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
- return "", errors.New(NotImplementError)
- }
|