| @@ -204,6 +204,7 @@ type ( | |||||
| CodeDistribute { | CodeDistribute { | ||||
| DataName string `json:"dataName,optional"` | DataName string `json:"dataName,optional"` | ||||
| PackageID int64 `json:"packageID"` | PackageID int64 `json:"packageID"` | ||||
| Output string `json:"output"` | |||||
| Clusters []*ClusterScheduled `json:"clusters"` | Clusters []*ClusterScheduled `json:"clusters"` | ||||
| } | } | ||||
| @@ -81,4 +81,7 @@ BlockChain: | |||||
| ContractAddress: 0x22ac23bf2d2cf1b4d8fec9cb4d279c7da6718e35 | ContractAddress: 0x22ac23bf2d2cf1b4d8fec9cb4d279c7da6718e35 | ||||
| FunctionName: "storeEvidence" | FunctionName: "storeEvidence" | ||||
| MemberName: "pcm" | MemberName: "pcm" | ||||
| Type: "2" | |||||
| Type: "2" | |||||
| JcsMiddleware: | |||||
| Url: 101.201.215.196:7891 | |||||
| @@ -528,14 +528,6 @@ gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20250107025835-8fc888b1d170 h1:/n3pl6WuH | |||||
| gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20250107025835-8fc888b1d170/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY= | gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20250107025835-8fc888b1d170/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY= | ||||
| gitlink.org.cn/JointCloud/pcm-hpc v0.0.0-20241125115811-72f3568255a4 h1:WIs/189lRLNMXF2ui/Wm1+Y55eJ53BVGx+4+gdn9cls= | gitlink.org.cn/JointCloud/pcm-hpc v0.0.0-20241125115811-72f3568255a4 h1:WIs/189lRLNMXF2ui/Wm1+Y55eJ53BVGx+4+gdn9cls= | ||||
| gitlink.org.cn/JointCloud/pcm-hpc v0.0.0-20241125115811-72f3568255a4/go.mod h1:YbuoRgF9sEVvNJPQtGRjdocX7Du6NBOTLn+GVwqRVjo= | gitlink.org.cn/JointCloud/pcm-hpc v0.0.0-20241125115811-72f3568255a4/go.mod h1:YbuoRgF9sEVvNJPQtGRjdocX7Du6NBOTLn+GVwqRVjo= | ||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250108072048-9adf0597b07c h1:9LphS29VNfoWT73eqhgwKV1nG8PcoDUNu7dRev845wA= | |||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250108072048-9adf0597b07c/go.mod h1:V19vFg8dWRAbaskASoSj70dgpacswOqZu/SaI02dxac= | |||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250304023304-d556ce8161c7 h1:pv1WX3+ttqsHs7nr7+lfYNkvzUp1KIJQ0XzWbVetj6w= | |||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250304023304-d556ce8161c7/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | |||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250304035519-da6ab53b969d h1:EfAxN4oaCVIRsnM3pnC7NskifFRjM/THBUiMGtQQzfg= | |||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250304035519-da6ab53b969d/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | |||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306022112-4ed1f08d3170 h1:NsHFtWPpcL8nF0s4v0DHuHuPaPFgMO9xITQCMM7Du1E= | |||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306022112-4ed1f08d3170/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | |||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306073530-56ecf1273207 h1:korhOkFl0x1wuQBKoKTsQHeFboDwLFRWwR2G9IPPfNg= | gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306073530-56ecf1273207 h1:korhOkFl0x1wuQBKoKTsQHeFboDwLFRWwR2G9IPPfNg= | ||||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306073530-56ecf1273207/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306073530-56ecf1273207/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | ||||
| gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 h1:GaXwr5sgDh0raHjUf9IewTvnRvajYea7zbLsaerYyXo= | gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 h1:GaXwr5sgDh0raHjUf9IewTvnRvajYea7zbLsaerYyXo= | ||||
| @@ -51,6 +51,8 @@ type Config struct { | |||||
| SnowflakeConf SnowflakeConf | SnowflakeConf SnowflakeConf | ||||
| Monitoring Monitoring | Monitoring Monitoring | ||||
| JcsMiddleware JcsMiddleware | |||||
| } | } | ||||
| type Monitoring struct { | type Monitoring struct { | ||||
| PromUrl string | PromUrl string | ||||
| @@ -61,3 +63,7 @@ type Monitoring struct { | |||||
| type SnowflakeConf struct { | type SnowflakeConf struct { | ||||
| MachineId int64 `json:"machineId"` | MachineId int64 `json:"machineId"` | ||||
| } | } | ||||
| type JcsMiddleware struct { | |||||
| Url string | |||||
| } | |||||
| @@ -29,7 +29,6 @@ import ( | |||||
| "gorm.io/gorm" | "gorm.io/gorm" | ||||
| "sigs.k8s.io/yaml" | "sigs.k8s.io/yaml" | ||||
| "strings" | "strings" | ||||
| "sync" | |||||
| ) | ) | ||||
| type Scheduler struct { | type Scheduler struct { | ||||
| @@ -40,7 +39,6 @@ type Scheduler struct { | |||||
| result []string //pID:子任务yamlstring 键值对 | result []string //pID:子任务yamlstring 键值对 | ||||
| AiStorages *database.AiStorage | AiStorages *database.AiStorage | ||||
| AiService *service.AiService | AiService *service.AiService | ||||
| mu sync.RWMutex | |||||
| } | } | ||||
| type SubSchedule interface { | type SubSchedule interface { | ||||
| @@ -26,6 +26,7 @@ import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/jcs" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy/param" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy/param" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | ||||
| @@ -256,6 +257,13 @@ func (as *AiScheduler) handleErrors(errs []interface{}, clusters []*strategy.Ass | |||||
| return err | return err | ||||
| } | } | ||||
| //report msg | |||||
| report := &jcs.JobStatusReportReq{ | |||||
| TaskName: "", | |||||
| TaskID: strconv.FormatInt(taskId, 10), | |||||
| Messages: make([]*jcs.ReportMessage, 0), | |||||
| } | |||||
| var errmsg string | var errmsg string | ||||
| for _, err := range errs { | for _, err := range errs { | ||||
| e := (err).(struct { | e := (err).(struct { | ||||
| @@ -271,6 +279,15 @@ func (as *AiScheduler) handleErrors(errs []interface{}, clusters []*strategy.Ass | |||||
| if err != nil { | if err != nil { | ||||
| return errors.New("database add failed: " + err.Error()) | return errors.New("database add failed: " + err.Error()) | ||||
| } | } | ||||
| //add report msg | |||||
| jobMsg := &jcs.ReportMessage{ | |||||
| Status: false, | |||||
| Message: msg, | |||||
| ClusterID: e.clusterId, | |||||
| Output: "", | |||||
| } | |||||
| report.Messages = append(report.Messages, jobMsg) | |||||
| } | } | ||||
| for _, s := range results { | for _, s := range results { | ||||
| as.option.ComputeCard = s.Card //execute card | as.option.ComputeCard = s.Card //execute card | ||||
| @@ -291,7 +308,21 @@ func (as *AiScheduler) handleErrors(errs []interface{}, clusters []*strategy.Ass | |||||
| return errors.New("database add failed: " + err.Error()) | return errors.New("database add failed: " + err.Error()) | ||||
| } | } | ||||
| } | } | ||||
| //add report msg | |||||
| jobMsg := &jcs.ReportMessage{ | |||||
| Status: false, | |||||
| Message: s.Msg, | |||||
| ClusterID: s.ClusterId, | |||||
| Output: "", | |||||
| } | |||||
| report.Messages = append(report.Messages, jobMsg) | |||||
| } | |||||
| //report status | |||||
| if mode == executor.SUBMIT_MODE_STORAGE_SCHEDULE { | |||||
| _ = jcs.StatusReport(as.AiService.Conf.JcsMiddleware.Url, report) | |||||
| } | } | ||||
| logx.Errorf(errors.New(errmsg).Error()) | logx.Errorf(errors.New(errmsg).Error()) | ||||
| return errors.New(errmsg) | return errors.New(errmsg) | ||||
| } | } | ||||
| @@ -29,6 +29,7 @@ type AiService struct { | |||||
| InferenceAdapterMap map[string]map[string]inference.ICluster | InferenceAdapterMap map[string]map[string]inference.ICluster | ||||
| Storage *database.AiStorage | Storage *database.AiStorage | ||||
| LocalCache map[string]interface{} | LocalCache map[string]interface{} | ||||
| Conf *config.Config | |||||
| } | } | ||||
| func NewAiService(conf *config.Config, storages *database.AiStorage, localCache map[string]interface{}) (*AiService, error) { | func NewAiService(conf *config.Config, storages *database.AiStorage, localCache map[string]interface{}) (*AiService, error) { | ||||
| @@ -43,6 +44,7 @@ func NewAiService(conf *config.Config, storages *database.AiStorage, localCache | |||||
| InferenceAdapterMap: make(map[string]map[string]inference.ICluster), | InferenceAdapterMap: make(map[string]map[string]inference.ICluster), | ||||
| Storage: storages, | Storage: storages, | ||||
| LocalCache: localCache, | LocalCache: localCache, | ||||
| Conf: conf, | |||||
| } | } | ||||
| for _, id := range adapterIds { | for _, id := range adapterIds { | ||||
| clusters, err := storages.GetClustersByAdapterId(id) | clusters, err := storages.GetClustersByAdapterId(id) | ||||
| @@ -0,0 +1,38 @@ | |||||
| package jcs | |||||
| import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-openi/common" | |||||
| ) | |||||
| type JobStatusReportReq struct { | |||||
| TaskName string `json:"taskName"` | |||||
| TaskID string `json:"taskID"` | |||||
| Messages []*ReportMessage `json:"messages"` | |||||
| } | |||||
| type ReportMessage struct { | |||||
| Status bool `json:"status"` | |||||
| Message string `json:"message"` | |||||
| ClusterID string `json:"clusterID"` | |||||
| Output string `json:"output"` | |||||
| } | |||||
| func StatusReport(url string, report *JobStatusReportReq) error { | |||||
| resp := struct { | |||||
| Code int `json:"code"` | |||||
| Msg string `json:"msg"` | |||||
| Data interface{} `json:"data"` | |||||
| }{} | |||||
| req := common.GetRestyRequest(common.TIMEOUT) | |||||
| _, err := req. | |||||
| SetHeader("Content-Type", "application/json"). | |||||
| SetBody(&report). | |||||
| SetResult(&resp). | |||||
| Post(url) | |||||
| if err != nil { | |||||
| return err | |||||
| } | |||||
| return nil | |||||
| } | |||||
| @@ -4,6 +4,7 @@ import ( | |||||
| "errors" | "errors" | ||||
| "fmt" | "fmt" | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/jcs" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | ||||
| @@ -61,6 +62,7 @@ func UpdateTaskStatus(svc *svc.ServiceContext, tasklist []*types.TaskModel) { | |||||
| if len(aiTask) == 1 { | if len(aiTask) == 1 { | ||||
| if aiTask[0].Status == constants.Completed { | if aiTask[0].Status == constants.Completed { | ||||
| task.Status = constants.Succeeded | task.Status = constants.Succeeded | ||||
| _ = reportStatusMessages(svc, task, aiTask[0]) | |||||
| } else { | } else { | ||||
| task.Status = aiTask[0].Status | task.Status = aiTask[0].Status | ||||
| } | } | ||||
| @@ -142,6 +144,26 @@ func UpdateTaskStatus(svc *svc.ServiceContext, tasklist []*types.TaskModel) { | |||||
| } | } | ||||
| } | } | ||||
| func reportStatusMessages(svc *svc.ServiceContext, task *types.TaskModel, aiTask *models.TaskAi) error { | |||||
| report := &jcs.JobStatusReportReq{ | |||||
| TaskName: task.Name, | |||||
| TaskID: strconv.FormatInt(task.Id, 10), | |||||
| Messages: make([]*jcs.ReportMessage, 0), | |||||
| } | |||||
| //add report msg | |||||
| jobMsg := &jcs.ReportMessage{ | |||||
| Status: true, | |||||
| Message: "", | |||||
| ClusterID: strconv.FormatInt(aiTask.ClusterId, 10), | |||||
| Output: aiTask.JobId, | |||||
| } | |||||
| report.Messages = append(report.Messages, jobMsg) | |||||
| _ = jcs.StatusReport(svc.Scheduler.AiService.Conf.JcsMiddleware.Url, report) | |||||
| return nil | |||||
| } | |||||
| func updateInferTaskStatus(svc *svc.ServiceContext, task types.TaskModel) { | func updateInferTaskStatus(svc *svc.ServiceContext, task types.TaskModel) { | ||||
| aiTask, err := svc.Scheduler.AiStorages.GetAiTaskListById(task.Id) | aiTask, err := svc.Scheduler.AiStorages.GetAiTaskListById(task.Id) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -5975,6 +5975,7 @@ type DatasetDistribute struct { | |||||
| type CodeDistribute struct { | type CodeDistribute struct { | ||||
| DataName string `json:"dataName,optional"` | DataName string `json:"dataName,optional"` | ||||
| PackageID int64 `json:"packageID"` | PackageID int64 `json:"packageID"` | ||||
| Output string `json:"output"` | |||||
| Clusters []*ClusterScheduled `json:"clusters"` | Clusters []*ClusterScheduled `json:"clusters"` | ||||
| } | } | ||||