| @@ -59,9 +59,9 @@ type ( | |||
| Type int64 `json:"type"` // 租户所属(0数算,1超算,2智算) | |||
| DeletedFlag int64 `json:"deletedFlag"` // 是否删除 | |||
| CreatedBy int64 `json:"createdBy"` // 创建人 | |||
| CreateTime string `json:"createdTime"` // 创建时间 | |||
| CreateTime string `json:"createdTime"` // 创建时间 | |||
| UpdatedBy int64 `json:"updatedBy"` // 更新人 | |||
| UpdateTime string `json:"updated_time"` // 更新时间 | |||
| UpdateTime string `json:"updated_time"` // 更新时间 | |||
| } | |||
| UpdateTenantReq { | |||
| @@ -103,6 +103,7 @@ type DataSet { | |||
| type cloudListResp { | |||
| Clouds []Cloud `json:"clouds"` | |||
| } | |||
| type Cloud { | |||
| Id int64 `json:"id"` // id | |||
| TaskId int64 `json:"taskId"` // 任务id | |||
| @@ -115,6 +116,13 @@ type Cloud { | |||
| StartTime string `json:"startTime"` // 开始时间 | |||
| RunningTime int64 `json:"runningTime"` // 运行时长 | |||
| CreatedBy int64 `json:"createdBy"` // 创建人 | |||
| CreateTime string `json:"createdTime"` // 创建时间 | |||
| CreateTime string `json:"createdTime"` // 创建时间 | |||
| Result string `json:"result"` | |||
| } | |||
| type PodsListReq { | |||
| ClusterName string `form:"clusterName"` | |||
| } | |||
| type PodsListResp { | |||
| Data []interface{} `json:"data"` | |||
| } | |||
| @@ -128,6 +128,7 @@ type ( | |||
| PodsUtilisation float64 `json:"podsUtilisation,optional"` | |||
| PodsCount int64 `json:"podsCount,optional"` | |||
| PodsTotal int64 `json:"podsTotal,optional"` | |||
| NodeCount float64 `json:"nodeCount,optional"` | |||
| } | |||
| ) | |||
| @@ -1455,7 +1456,7 @@ type EditResourceReq { | |||
| CostType string `json:"costType" gorm:"column:cost_type"` //计费类型(hourly, daily, monthly,perUse) | |||
| Type string `json:"type,optional" gorm:"column:type"` | |||
| // 基础资源规格 | |||
| // 基础资源规格 | |||
| StorageValue string `json:"storageValue,optional"` | |||
| StorageUnit string `json:"storageUnit,optional"` | |||
| CpuValue string `json:"cpuValue,optional"` | |||
| @@ -203,7 +203,7 @@ service pcm { | |||
| @doc "删除资源规格" | |||
| @handler deleteResourceSpecHandler | |||
| delete /core/ai/resourceSpec/delete/:id (DeletePathId) returns (CommonResp) | |||
| //集群资源规格----- 结束 | |||
| //集群资源规格----- 结束 | |||
| } | |||
| //hpc二级接口 | |||
| @@ -289,6 +289,9 @@ service pcm { | |||
| @handler podLogs | |||
| post /cloud/pod/logs (PodLogsReq) returns (string) | |||
| @handler podsList | |||
| get /cloud/pods/list (PodsListReq) returns (PodsListResp) | |||
| } | |||
| //智算二级接口 | |||
| @@ -441,7 +444,7 @@ service pcm { | |||
| @doc "文本识别" | |||
| @handler ChatHandler | |||
| post /ai/chat (ChatReq) returns (ChatResult) | |||
| /******chat end***********/ | |||
| /******chat end***********/ | |||
| } | |||
| //screen接口 | |||
| @@ -1130,5 +1133,4 @@ service pcm { | |||
| @handler scheduleSituationHandler | |||
| get /monitoring/schedule/situation returns (scheduleSituationResp) | |||
| } | |||
| } | |||
| @@ -12,6 +12,7 @@ require ( | |||
| github.com/golang-jwt/jwt/v5 v5.2.2 | |||
| github.com/jinzhu/copier v0.4.0 | |||
| github.com/json-iterator/go v1.1.12 | |||
| github.com/mitchellh/mapstructure v1.5.0 | |||
| github.com/pkg/errors v0.9.1 | |||
| github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 | |||
| github.com/prometheus/alertmanager v0.27.0 | |||
| @@ -35,6 +36,7 @@ require ( | |||
| gorm.io/datatypes v1.2.0 | |||
| gorm.io/driver/mysql v1.5.7 | |||
| gorm.io/gorm v1.25.12 | |||
| k8s.io/api v0.31.4 | |||
| k8s.io/apimachinery v0.31.4 | |||
| k8s.io/client-go v0.31.4 | |||
| sigs.k8s.io/yaml v1.4.0 | |||
| @@ -120,7 +122,6 @@ require ( | |||
| github.com/mattn/go-isatty v0.0.20 // indirect | |||
| github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect | |||
| github.com/miekg/dns v1.1.58 // indirect | |||
| github.com/mitchellh/mapstructure v1.5.0 // indirect | |||
| github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect | |||
| github.com/modern-go/reflect2 v1.0.2 // indirect | |||
| github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect | |||
| @@ -186,7 +187,6 @@ require ( | |||
| google.golang.org/protobuf v1.36.5 // indirect | |||
| gopkg.in/inf.v0 v0.9.1 // indirect | |||
| gopkg.in/ini.v1 v1.67.0 // indirect | |||
| k8s.io/api v0.31.4 // indirect | |||
| k8s.io/klog/v2 v2.130.1 // indirect | |||
| k8s.io/kube-openapi v0.0.0-20241127205056-99599406b04f // indirect | |||
| k8s.io/utils v0.0.0-20241210054802-24370beab758 // indirect | |||
| @@ -39,7 +39,7 @@ func NewCloudListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CloudLi | |||
| func (l *CloudListLogic) CloudList() (resp *types.CloudListResp, err error) { | |||
| // 查询数据库中数算任务列表 | |||
| var clouds []*models.Cloud | |||
| var clouds []*models.TaskCloud | |||
| tx := l.svcCtx.DbEngin.Find(&clouds) | |||
| if tx.Error != nil { | |||
| return nil, tx.Error | |||
| @@ -47,8 +47,8 @@ func (l *DeleteTaskLogic) DeleteTask(req *types.DeleteTaskReq) error { | |||
| return tx.Error | |||
| } | |||
| // 将子任务状态修改为待删除 | |||
| tx = l.svcCtx.DbEngin.Model(&models.Cloud{}).Where("task_id", req.Id).Update("status", constants.WaitDelete) | |||
| l.svcCtx.DbEngin.Where("task_id = ?", req.Id).Delete(&models.Cloud{}, req.Id) | |||
| tx = l.svcCtx.DbEngin.Model(&models.TaskCloud{}).Where("task_id", req.Id).Update("status", constants.WaitDelete) | |||
| l.svcCtx.DbEngin.Where("task_id = ?", req.Id).Delete(&models.TaskCloud{}, req.Id) | |||
| if tx.Error != nil { | |||
| return tx.Error | |||
| } | |||
| @@ -19,10 +19,6 @@ import ( | |||
| "github.com/zeromicro/go-zero/core/logx" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/helper/enum" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/httputils" | |||
| "k8s.io/apimachinery/pkg/util/json" | |||
| ) | |||
| type JobTotalLogic struct { | |||
| @@ -55,60 +51,60 @@ func NewJobTotalLogic(ctx context.Context, svcCtx *svc.ServiceContext) *JobTotal | |||
| } | |||
| func (l *JobTotalLogic) JobTotal() (resp *types.JobTotalResp, err error) { | |||
| // 获取任务时间信息 | |||
| resp = &types.JobTotalResp{} | |||
| bytes, err := httputils.HttpGet("GET", "http://grampus.openi.org.cn/openapi/v1/sharescreen/computepower/alljobinfo") | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| json.Unmarshal(bytes, resp) | |||
| // 获取其他任务信息 | |||
| jobs := &Job{} | |||
| jobBytes, err := httputils.HttpGet("GET", "http://grampus.openi.org.cn/openapi/v1/sharescreen/trainjob?pageIndex=1&pageSize=10") | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| json.Unmarshal(jobBytes, jobs) | |||
| for _, job := range jobs.OtJobs { | |||
| trainJob := types.TrainJob{ | |||
| Name: job.Name, | |||
| Status: enum.ExternalStatus(job.Status).String(), | |||
| Strategy: 0, | |||
| SynergyStatus: "未协同", | |||
| } | |||
| if job.Tasks[0].CenterName != nil { | |||
| trainJob.ParticipantName = job.Tasks[0].CenterName[0] | |||
| } | |||
| resp.TrainJobs = append(resp.TrainJobs, trainJob) | |||
| } | |||
| var tasks []models.Task | |||
| tx := l.svcCtx.DbEngin.Find(&tasks) | |||
| if tx.Error != nil { | |||
| logx.Error(err) | |||
| return nil, tx.Error | |||
| } | |||
| if len(tasks) == 0 { | |||
| return nil, nil | |||
| } | |||
| for _, task := range tasks { | |||
| var participantName string | |||
| tx := l.svcCtx.DbEngin.Raw("SELECT name from sc_participant_phy_info where id in (SELECT CONCAT_WS(',',GROUP_CONCAT(DISTINCT h.participant_id) ,GROUP_CONCAT(DISTINCT a.participant_id) ,GROUP_CONCAT(DISTINCT c.participant_id))as service_name from task t left join hpc h on t.id = h.task_id left join cloud c on t.id = c.task_id left join ai a on t.id = a.task_id where t.id = ?)", task.Id).Scan(&participantName) | |||
| if tx.Error != nil { | |||
| logx.Error(err) | |||
| return nil, tx.Error | |||
| } | |||
| // 承接方转义 | |||
| resp.TrainJobs = append(resp.TrainJobs, types.TrainJob{ | |||
| ParticipantName: participantName, | |||
| Name: task.Name, | |||
| Strategy: int(task.Strategy), | |||
| SynergyStatus: enum.SynergyStatus(task.SynergyStatus).String(), | |||
| Status: task.Status, | |||
| }) | |||
| } | |||
| //// 获取任务时间信息 | |||
| //resp = &types.JobTotalResp{} | |||
| //bytes, err := httputils.HttpGet("http://grampus.openi.org.cn/openapi/v1/sharescreen/computepower/alljobinfo") | |||
| //if err != nil { | |||
| // return nil, err | |||
| //} | |||
| //json.Unmarshal(bytes, resp) | |||
| // | |||
| //// 获取其他任务信息 | |||
| //jobs := &Job{} | |||
| //jobBytes, err := httputils.HttpGet("http://grampus.openi.org.cn/openapi/v1/sharescreen/trainjob?pageIndex=1&pageSize=10") | |||
| //if err != nil { | |||
| // return nil, err | |||
| //} | |||
| //json.Unmarshal(jobBytes, jobs) | |||
| // | |||
| //for _, job := range jobs.OtJobs { | |||
| // trainJob := types.TrainJob{ | |||
| // Name: job.Name, | |||
| // Status: enum.ExternalStatus(job.Status).String(), | |||
| // Strategy: 0, | |||
| // SynergyStatus: "未协同", | |||
| // } | |||
| // if job.Tasks[0].CenterName != nil { | |||
| // trainJob.ParticipantName = job.Tasks[0].CenterName[0] | |||
| // } | |||
| // resp.TrainJobs = append(resp.TrainJobs, trainJob) | |||
| //} | |||
| // | |||
| //var tasks []models.Task | |||
| //tx := l.svcCtx.DbEngin.Find(&tasks) | |||
| //if tx.Error != nil { | |||
| // logx.Error(err) | |||
| // return nil, tx.Error | |||
| //} | |||
| //if len(tasks) == 0 { | |||
| // return nil, nil | |||
| //} | |||
| //for _, task := range tasks { | |||
| // var participantName string | |||
| // tx := l.svcCtx.DbEngin.Raw("SELECT name from sc_participant_phy_info where id in (SELECT CONCAT_WS(',',GROUP_CONCAT(DISTINCT h.participant_id) ,GROUP_CONCAT(DISTINCT a.participant_id) ,GROUP_CONCAT(DISTINCT c.participant_id))as service_name from task t left join hpc h on t.id = h.task_id left join cloud c on t.id = c.task_id left join ai a on t.id = a.task_id where t.id = ?)", task.Id).Scan(&participantName) | |||
| // if tx.Error != nil { | |||
| // logx.Error(err) | |||
| // return nil, tx.Error | |||
| // } | |||
| // // 承接方转义 | |||
| // resp.TrainJobs = append(resp.TrainJobs, types.TrainJob{ | |||
| // ParticipantName: participantName, | |||
| // Name: task.Name, | |||
| // Strategy: int(task.Strategy), | |||
| // SynergyStatus: enum.SynergyStatus(task.SynergyStatus).String(), | |||
| // Status: task.Status, | |||
| // }) | |||
| // | |||
| //} | |||
| return resp, nil | |||
| } | |||
| @@ -27,7 +27,7 @@ func NewClustersLoadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Clus | |||
| func (l *ClustersLoadLogic) ClustersLoad(req *types.ClustersLoadReq) (resp *types.ClustersLoadResp, err error) { | |||
| resp = &types.ClustersLoadResp{} | |||
| metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_disk_utilisation", "cluster_disk_avail", "cluster_disk_total", "cluster_pod_utilisation"} | |||
| metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_disk_utilisation", "cluster_disk_avail", "cluster_disk_total", "cluster_pod_utilisation", "cluster_node_count"} | |||
| result := l.svcCtx.PromClient.GetNamedMetrics(metrics, time.Now(), tracker.ClusterOption{ClusterName: req.ClusterName}) | |||
| resp.Data = result | |||
| return resp, nil | |||
| @@ -14,7 +14,7 @@ import ( | |||
| ) | |||
| var ( | |||
| cloudFieldNames = builder.RawFieldNames(&Cloud{}) | |||
| cloudFieldNames = builder.RawFieldNames(&TaskCloud{}) | |||
| cloudRows = strings.Join(cloudFieldNames, ",") | |||
| cloudRowsExpectAutoSet = strings.Join(stringx.Remove(cloudFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",") | |||
| cloudRowsWithPlaceHolder = strings.Join(stringx.Remove(cloudFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?" | |||
| @@ -22,10 +22,10 @@ var ( | |||
| type ( | |||
| cloudModel interface { | |||
| Insert(ctx context.Context, data *Cloud) (sql.Result, error) | |||
| FindOne(ctx context.Context, id int64) (*Cloud, error) | |||
| FindOneByNamespaceNameServiceName(ctx context.Context, namespace sql.NullString, name sql.NullString, serviceName sql.NullString) (*Cloud, error) | |||
| Update(ctx context.Context, data *Cloud) error | |||
| Insert(ctx context.Context, data *TaskCloud) (sql.Result, error) | |||
| FindOne(ctx context.Context, id int64) (*TaskCloud, error) | |||
| FindOneByNamespaceNameServiceName(ctx context.Context, namespace sql.NullString, name sql.NullString, serviceName sql.NullString) (*TaskCloud, error) | |||
| Update(ctx context.Context, data *TaskCloud) error | |||
| Delete(ctx context.Context, id int64) error | |||
| } | |||
| @@ -34,7 +34,7 @@ type ( | |||
| table string | |||
| } | |||
| Cloud struct { | |||
| TaskCloud struct { | |||
| Id int64 `db:"id"` // id | |||
| TaskId int64 `db:"task_id"` // 任务id | |||
| ParticipantId int64 `db:"participant_id"` // 集群静态信息id | |||
| @@ -56,7 +56,7 @@ type ( | |||
| func newCloudModel(conn sqlx.SqlConn) *defaultCloudModel { | |||
| return &defaultCloudModel{ | |||
| conn: conn, | |||
| table: "`cloud`", | |||
| table: "`task_cloud`", | |||
| } | |||
| } | |||
| @@ -31,6 +31,7 @@ var promQLTemplates = map[string]string{ | |||
| "cluster_memory_avail": "cluster_memory_avail{$1}", | |||
| "cluster_disk_avail": "cluster_disk_avail{$1}", | |||
| "cluster_pod_utilisation": "cluster_pod_utilisation{$1}", | |||
| "cluster_node_count": `cluster_pod_utilisation{$1}`, | |||
| // center | |||
| "center_cpu_utilisation": "(sum by (adapter_id)(cluster_cpu_total{$1})-sum by (adapter_id)(cluster_cpu_avail{$1}))/sum by (adapter_id)(cluster_cpu_total{$1})", | |||
| @@ -90,6 +90,10 @@ var ( | |||
| Name: "cluster_gpu_avail", | |||
| Help: "Cluster Gpu Available.", | |||
| }, []string{"cluster_name", "adapter_id"}) | |||
| ClusterNodeCountGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ | |||
| Name: "cluster_node_count", | |||
| Help: "Cluster Node Count.", | |||
| }, []string{"cluster_name", "adapter_id"}) | |||
| metrics = []prometheus.Collector{ | |||
| ClusterCpuUtilisationGauge, | |||
| @@ -104,6 +108,7 @@ var ( | |||
| ClusterPodUtilisationGauge, | |||
| ClusterPodCountGauge, | |||
| ClusterPodTotalGauge, | |||
| ClusterNodeCountGauge, | |||
| } | |||
| ) | |||
| @@ -122,6 +127,7 @@ type ClusterLoadRecord struct { | |||
| PodsUtilisation float64 `json:"podsUtilisation,optional"` | |||
| PodsCount int64 `json:"podsCount,optional"` | |||
| PodsTotal int64 `json:"podsTotal,optional"` | |||
| NodeCount float64 `json:"nodeCount,optional"` | |||
| } | |||
| func init() { | |||
| @@ -333,7 +339,9 @@ func SyncClusterLoad(record ClusterLoadRecord) { | |||
| ClusterDiskAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskAvail) | |||
| ClusterDiskTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskTotal) | |||
| ClusterPodUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.PodsUtilisation) | |||
| ClusterPodCountGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsCount)) | |||
| ClusterPodTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsTotal)) | |||
| //ClusterPodUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.PodsUtilisation) | |||
| //ClusterPodCountGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsCount)) | |||
| //ClusterPodTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsTotal)) | |||
| ClusterNodeCountGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.NodeCount) | |||
| } | |||
| @@ -62,13 +62,6 @@ func NewHttpsClient() *resty.Client { | |||
| return c | |||
| } | |||
| func GetHttpRequest() *resty.Request { | |||
| client := resty.New() | |||
| request := client.R() | |||
| return request | |||
| } | |||
| func HttpClient(method string, url string, payload io.Reader, token string) ([]byte, error) { | |||
| request, err := http.NewRequest(method, url, payload) | |||
| request.Header.Add("Content-Type", "application/json") | |||
| @@ -88,20 +81,17 @@ func HttpClient(method string, url string, payload io.Reader, token string) ([]b | |||
| return body, err | |||
| } | |||
| func HttpGet(method string, url string) ([]byte, error) { | |||
| request, err := http.NewRequest(method, url, nil) | |||
| client := &http.Client{} | |||
| res, err := client.Do(request) | |||
| if err != nil { | |||
| log.Fatal(err) | |||
| } | |||
| defer res.Body.Close() | |||
| body, err := io.ReadAll(res.Body) | |||
| if err != nil { | |||
| log.Fatal(err) | |||
| func HttpGetWithResult(params map[string]string, url string, result interface{}) error { | |||
| client := NewHttpsClient() | |||
| req := client.R() | |||
| // 添加查询参数 | |||
| for k, v := range params { | |||
| req.SetQueryParam(k, v) | |||
| } | |||
| return body, err | |||
| _, err := req.SetResult(result).Get(url) | |||
| return err | |||
| } | |||
| // 发送POST请求 | |||