| @@ -204,6 +204,7 @@ type ( | |||
| CodeDistribute { | |||
| DataName string `json:"dataName,optional"` | |||
| PackageID int64 `json:"packageID"` | |||
| Output string `json:"output"` | |||
| Clusters []*ClusterScheduled `json:"clusters"` | |||
| } | |||
| @@ -81,4 +81,7 @@ BlockChain: | |||
| ContractAddress: 0x22ac23bf2d2cf1b4d8fec9cb4d279c7da6718e35 | |||
| FunctionName: "storeEvidence" | |||
| MemberName: "pcm" | |||
| Type: "2" | |||
| Type: "2" | |||
| JcsMiddleware: | |||
| Url: 101.201.215.196:7891 | |||
| @@ -528,14 +528,6 @@ gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20250107025835-8fc888b1d170 h1:/n3pl6WuH | |||
| gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20250107025835-8fc888b1d170/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY= | |||
| gitlink.org.cn/JointCloud/pcm-hpc v0.0.0-20241125115811-72f3568255a4 h1:WIs/189lRLNMXF2ui/Wm1+Y55eJ53BVGx+4+gdn9cls= | |||
| gitlink.org.cn/JointCloud/pcm-hpc v0.0.0-20241125115811-72f3568255a4/go.mod h1:YbuoRgF9sEVvNJPQtGRjdocX7Du6NBOTLn+GVwqRVjo= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250108072048-9adf0597b07c h1:9LphS29VNfoWT73eqhgwKV1nG8PcoDUNu7dRev845wA= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250108072048-9adf0597b07c/go.mod h1:V19vFg8dWRAbaskASoSj70dgpacswOqZu/SaI02dxac= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250304023304-d556ce8161c7 h1:pv1WX3+ttqsHs7nr7+lfYNkvzUp1KIJQ0XzWbVetj6w= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250304023304-d556ce8161c7/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250304035519-da6ab53b969d h1:EfAxN4oaCVIRsnM3pnC7NskifFRjM/THBUiMGtQQzfg= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250304035519-da6ab53b969d/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306022112-4ed1f08d3170 h1:NsHFtWPpcL8nF0s4v0DHuHuPaPFgMO9xITQCMM7Du1E= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306022112-4ed1f08d3170/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306073530-56ecf1273207 h1:korhOkFl0x1wuQBKoKTsQHeFboDwLFRWwR2G9IPPfNg= | |||
| gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20250306073530-56ecf1273207/go.mod h1:MxtnJJcU8S4zfGKZVcg2MOXGtwucKy7MMDwA0IemBd0= | |||
| gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 h1:GaXwr5sgDh0raHjUf9IewTvnRvajYea7zbLsaerYyXo= | |||
| @@ -51,6 +51,8 @@ type Config struct { | |||
| SnowflakeConf SnowflakeConf | |||
| Monitoring Monitoring | |||
| JcsMiddleware JcsMiddleware | |||
| } | |||
| type Monitoring struct { | |||
| PromUrl string | |||
| @@ -61,3 +63,7 @@ type Monitoring struct { | |||
| type SnowflakeConf struct { | |||
| MachineId int64 `json:"machineId"` | |||
| } | |||
| type JcsMiddleware struct { | |||
| Url string | |||
| } | |||
| @@ -29,7 +29,6 @@ import ( | |||
| "gorm.io/gorm" | |||
| "sigs.k8s.io/yaml" | |||
| "strings" | |||
| "sync" | |||
| ) | |||
| type Scheduler struct { | |||
| @@ -40,7 +39,6 @@ type Scheduler struct { | |||
| result []string //pID:子任务yamlstring 键值对 | |||
| AiStorages *database.AiStorage | |||
| AiService *service.AiService | |||
| mu sync.RWMutex | |||
| } | |||
| type SubSchedule interface { | |||
| @@ -26,6 +26,7 @@ import ( | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/jcs" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy/param" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | |||
| @@ -256,6 +257,13 @@ func (as *AiScheduler) handleErrors(errs []interface{}, clusters []*strategy.Ass | |||
| return err | |||
| } | |||
| //report msg | |||
| report := &jcs.JobStatusReportReq{ | |||
| TaskName: "", | |||
| TaskID: strconv.FormatInt(taskId, 10), | |||
| Messages: make([]*jcs.ReportMessage, 0), | |||
| } | |||
| var errmsg string | |||
| for _, err := range errs { | |||
| e := (err).(struct { | |||
| @@ -271,6 +279,15 @@ func (as *AiScheduler) handleErrors(errs []interface{}, clusters []*strategy.Ass | |||
| if err != nil { | |||
| return errors.New("database add failed: " + err.Error()) | |||
| } | |||
| //add report msg | |||
| jobMsg := &jcs.ReportMessage{ | |||
| Status: false, | |||
| Message: msg, | |||
| ClusterID: e.clusterId, | |||
| Output: "", | |||
| } | |||
| report.Messages = append(report.Messages, jobMsg) | |||
| } | |||
| for _, s := range results { | |||
| as.option.ComputeCard = s.Card //execute card | |||
| @@ -291,7 +308,21 @@ func (as *AiScheduler) handleErrors(errs []interface{}, clusters []*strategy.Ass | |||
| return errors.New("database add failed: " + err.Error()) | |||
| } | |||
| } | |||
| //add report msg | |||
| jobMsg := &jcs.ReportMessage{ | |||
| Status: false, | |||
| Message: s.Msg, | |||
| ClusterID: s.ClusterId, | |||
| Output: "", | |||
| } | |||
| report.Messages = append(report.Messages, jobMsg) | |||
| } | |||
| //report status | |||
| if mode == executor.SUBMIT_MODE_STORAGE_SCHEDULE { | |||
| _ = jcs.StatusReport(as.AiService.Conf.JcsMiddleware.Url, report) | |||
| } | |||
| logx.Errorf(errors.New(errmsg).Error()) | |||
| return errors.New(errmsg) | |||
| } | |||
| @@ -29,6 +29,7 @@ type AiService struct { | |||
| InferenceAdapterMap map[string]map[string]inference.ICluster | |||
| Storage *database.AiStorage | |||
| LocalCache map[string]interface{} | |||
| Conf *config.Config | |||
| } | |||
| func NewAiService(conf *config.Config, storages *database.AiStorage, localCache map[string]interface{}) (*AiService, error) { | |||
| @@ -43,6 +44,7 @@ func NewAiService(conf *config.Config, storages *database.AiStorage, localCache | |||
| InferenceAdapterMap: make(map[string]map[string]inference.ICluster), | |||
| Storage: storages, | |||
| LocalCache: localCache, | |||
| Conf: conf, | |||
| } | |||
| for _, id := range adapterIds { | |||
| clusters, err := storages.GetClustersByAdapterId(id) | |||
| @@ -0,0 +1,38 @@ | |||
| package jcs | |||
| import ( | |||
| "gitlink.org.cn/JointCloud/pcm-openi/common" | |||
| ) | |||
| type JobStatusReportReq struct { | |||
| TaskName string `json:"taskName"` | |||
| TaskID string `json:"taskID"` | |||
| Messages []*ReportMessage `json:"messages"` | |||
| } | |||
| type ReportMessage struct { | |||
| Status bool `json:"status"` | |||
| Message string `json:"message"` | |||
| ClusterID string `json:"clusterID"` | |||
| Output string `json:"output"` | |||
| } | |||
| func StatusReport(url string, report *JobStatusReportReq) error { | |||
| resp := struct { | |||
| Code int `json:"code"` | |||
| Msg string `json:"msg"` | |||
| Data interface{} `json:"data"` | |||
| }{} | |||
| req := common.GetRestyRequest(common.TIMEOUT) | |||
| _, err := req. | |||
| SetHeader("Content-Type", "application/json"). | |||
| SetBody(&report). | |||
| SetResult(&resp). | |||
| Post(url) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| return nil | |||
| } | |||
| @@ -4,6 +4,7 @@ import ( | |||
| "errors" | |||
| "fmt" | |||
| "github.com/zeromicro/go-zero/core/logx" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/jcs" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | |||
| @@ -61,6 +62,7 @@ func UpdateTaskStatus(svc *svc.ServiceContext, tasklist []*types.TaskModel) { | |||
| if len(aiTask) == 1 { | |||
| if aiTask[0].Status == constants.Completed { | |||
| task.Status = constants.Succeeded | |||
| _ = reportStatusMessages(svc, task, aiTask[0]) | |||
| } else { | |||
| task.Status = aiTask[0].Status | |||
| } | |||
| @@ -142,6 +144,26 @@ func UpdateTaskStatus(svc *svc.ServiceContext, tasklist []*types.TaskModel) { | |||
| } | |||
| } | |||
| func reportStatusMessages(svc *svc.ServiceContext, task *types.TaskModel, aiTask *models.TaskAi) error { | |||
| report := &jcs.JobStatusReportReq{ | |||
| TaskName: task.Name, | |||
| TaskID: strconv.FormatInt(task.Id, 10), | |||
| Messages: make([]*jcs.ReportMessage, 0), | |||
| } | |||
| //add report msg | |||
| jobMsg := &jcs.ReportMessage{ | |||
| Status: true, | |||
| Message: "", | |||
| ClusterID: strconv.FormatInt(aiTask.ClusterId, 10), | |||
| Output: aiTask.JobId, | |||
| } | |||
| report.Messages = append(report.Messages, jobMsg) | |||
| _ = jcs.StatusReport(svc.Scheduler.AiService.Conf.JcsMiddleware.Url, report) | |||
| return nil | |||
| } | |||
| func updateInferTaskStatus(svc *svc.ServiceContext, task types.TaskModel) { | |||
| aiTask, err := svc.Scheduler.AiStorages.GetAiTaskListById(task.Id) | |||
| if err != nil { | |||
| @@ -5975,6 +5975,7 @@ type DatasetDistribute struct { | |||
| type CodeDistribute struct { | |||
| DataName string `json:"dataName,optional"` | |||
| PackageID int64 `json:"packageID"` | |||
| Output string `json:"output"` | |||
| Clusters []*ClusterScheduled `json:"clusters"` | |||
| } | |||