| @@ -11,6 +11,12 @@ type CreateAlertRuleReq { | |||||
| AlertType string `json:"alertType"` | AlertType string `json:"alertType"` | ||||
| } | } | ||||
| type DeleteAlertRuleReq { | |||||
| Id int64 `form:"id"` | |||||
| ClusterName string `form:"clusterName"` | |||||
| Name string `form:"name"` | |||||
| } | |||||
| type ( | type ( | ||||
| AlertRulesReq { | AlertRulesReq { | ||||
| AlertType string `form:"alertType"` | AlertType string `form:"alertType"` | ||||
| @@ -73,36 +79,35 @@ type ( | |||||
| ) | ) | ||||
| type ( | type ( | ||||
| adapterInfoReq{ | |||||
| adapterInfoReq { | |||||
| clusterId string `form:"clusterId"` | clusterId string `form:"clusterId"` | ||||
| } | } | ||||
| adapterInfoResp{ | |||||
| adapterInfoResp { | |||||
| name string `json:"name"` | name string `json:"name"` | ||||
| version string `json:"version"` | version string `json:"version"` | ||||
| } | } | ||||
| ) | ) | ||||
| type ( | type ( | ||||
| scheduleSituationResp{ | |||||
| nodes []NodeRegion `json:"nodes"` | |||||
| links []Link `json:"links"` | |||||
| categories []Category `json:"categories"` | |||||
| } | |||||
| NodeRegion{ | |||||
| id string `json:"id"` | |||||
| name string `json:"name"` | |||||
| category int `json:"category"` | |||||
| value int `json:"value"` | |||||
| } | |||||
| scheduleSituationResp { | |||||
| nodes []NodeRegion `json:"nodes"` | |||||
| links []Link `json:"links"` | |||||
| categories []Category `json:"categories"` | |||||
| } | |||||
| Link{ | |||||
| source string `json:"source"` | |||||
| target string `json:"target"` | |||||
| } | |||||
| NodeRegion { | |||||
| id string `json:"id"` | |||||
| name string `json:"name"` | |||||
| category int `json:"category"` | |||||
| value int `json:"value"` | |||||
| } | |||||
| Category{ | |||||
| name string `json:"name"` | |||||
| } | |||||
| Link { | |||||
| source string `json:"source"` | |||||
| target string `json:"target"` | |||||
| } | |||||
| Category { | |||||
| name string `json:"name"` | |||||
| } | |||||
| ) | ) | ||||
| @@ -219,6 +219,7 @@ service pcm { | |||||
| @doc "Create cloud computing common tasks" | @doc "Create cloud computing common tasks" | ||||
| @handler commitGeneralTask | @handler commitGeneralTask | ||||
| post /cloud/task/create (GeneralTaskReq) returns () | post /cloud/task/create (GeneralTaskReq) returns () | ||||
| } | } | ||||
| //智算二级接口 | //智算二级接口 | ||||
| @@ -1003,6 +1004,9 @@ service pcm { | |||||
| @handler CreateAlertRuleHandler | @handler CreateAlertRuleHandler | ||||
| post /monitoring/alert/rule (CreateAlertRuleReq) | post /monitoring/alert/rule (CreateAlertRuleReq) | ||||
| @handler DeleteAlertRuleHandler | |||||
| delete /monitoring/alert/rule (DeleteAlertRuleReq) | |||||
| @doc "alert rules" | @doc "alert rules" | ||||
| @handler alertRulesHandler | @handler alertRulesHandler | ||||
| get /monitoring/alert/rule (AlertRulesReq) returns (AlertRulesResp) | get /monitoring/alert/rule (AlertRulesReq) returns (AlertRulesResp) | ||||
| @@ -0,0 +1,25 @@ | |||||
| package monitoring | |||||
| import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | |||||
| "net/http" | |||||
| "github.com/zeromicro/go-zero/rest/httpx" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" | |||||
| ) | |||||
| func DeleteAlertRuleHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | |||||
| return func(w http.ResponseWriter, r *http.Request) { | |||||
| var req types.DeleteAlertRuleReq | |||||
| if err := httpx.Parse(r, &req); err != nil { | |||||
| httpx.ErrorCtx(r.Context(), w, err) | |||||
| return | |||||
| } | |||||
| l := monitoring.NewDeleteAlertRuleLogic(r.Context(), svcCtx) | |||||
| err := l.DeleteAlertRule(&req) | |||||
| result.HttpResult(r, w, nil, err) | |||||
| } | |||||
| } | |||||
| @@ -1262,6 +1262,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { | |||||
| Path: "/monitoring/alert/rule", | Path: "/monitoring/alert/rule", | ||||
| Handler: monitoring.CreateAlertRuleHandler(serverCtx), | Handler: monitoring.CreateAlertRuleHandler(serverCtx), | ||||
| }, | }, | ||||
| { | |||||
| Method: http.MethodDelete, | |||||
| Path: "/monitoring/alert/rule", | |||||
| Handler: monitoring.DeleteAlertRuleHandler(serverCtx), | |||||
| }, | |||||
| { | { | ||||
| Method: http.MethodGet, | Method: http.MethodGet, | ||||
| Path: "/monitoring/alert/rule", | Path: "/monitoring/alert/rule", | ||||
| @@ -46,7 +46,7 @@ func (l *CreateAlertRuleLogic) CreateAlertRule(req *types.CreateAlertRuleReq) er | |||||
| return tx.Error | return tx.Error | ||||
| } | } | ||||
| // query server http url. | |||||
| // query cluster http url. | |||||
| var server string | var server string | ||||
| l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server) | l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server) | ||||
| @@ -0,0 +1,66 @@ | |||||
| package monitoring | |||||
| import ( | |||||
| "context" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" | |||||
| "github.com/zeromicro/go-zero/core/logx" | |||||
| ) | |||||
| type DeleteAlertRuleLogic struct { | |||||
| logx.Logger | |||||
| ctx context.Context | |||||
| svcCtx *svc.ServiceContext | |||||
| } | |||||
| func NewDeleteAlertRuleLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DeleteAlertRuleLogic { | |||||
| return &DeleteAlertRuleLogic{ | |||||
| Logger: logx.WithContext(ctx), | |||||
| ctx: ctx, | |||||
| svcCtx: svcCtx, | |||||
| } | |||||
| } | |||||
| func (l *DeleteAlertRuleLogic) DeleteAlertRule(req *types.DeleteAlertRuleReq) error { | |||||
| // Delete data from the database | |||||
| l.svcCtx.DbEngin.Delete(&types.AlertRule{}, "id = ?", req.Id) | |||||
| // query cluster http url. | |||||
| var server string | |||||
| l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server) | |||||
| // create prometheus rule | |||||
| response, err := l.svcCtx.HttpClient.R(). | |||||
| SetBody(&CrdStruct{ | |||||
| ClusterName: req.ClusterName, | |||||
| Name: req.Name, | |||||
| Grv: Grv{ | |||||
| Group: "monitoring.coreos.com", | |||||
| Version: "v1", | |||||
| Resource: "prometheusrules", | |||||
| }, | |||||
| }). | |||||
| ForceContentType("application/json"). | |||||
| Delete(server + "/api/v1/crd") | |||||
| if err != nil { | |||||
| return err | |||||
| } | |||||
| if err != nil || response.IsError() { | |||||
| return err | |||||
| } | |||||
| return nil | |||||
| } | |||||
| type Grv struct { | |||||
| Group string `json:"group"` | |||||
| Version string `json:"version"` | |||||
| Resource string `json:"resource"` | |||||
| } | |||||
| type CrdStruct struct { | |||||
| ClusterName string `json:"clusterName"` | |||||
| Grv Grv `json:"grv"` | |||||
| Name string `json:"name"` | |||||
| } | |||||
| @@ -33,28 +33,28 @@ func (l *ScheduleSituationLogic) ScheduleSituation() (resp *types.ScheduleSituat | |||||
| // hpc | // hpc | ||||
| var hpcLinks []string | var hpcLinks []string | ||||
| tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_hpc WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&hpcLinks) | |||||
| tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT( distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_hpc WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&hpcLinks) | |||||
| if tx.Error != nil { | if tx.Error != nil { | ||||
| return nil, tx.Error | return nil, tx.Error | ||||
| } | } | ||||
| LinksHandler(hpcLinks, resp) | LinksHandler(hpcLinks, resp) | ||||
| // cloud | // cloud | ||||
| var cloudLinks []string | var cloudLinks []string | ||||
| tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_cloud WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&cloudLinks) | |||||
| tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_cloud WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&cloudLinks) | |||||
| if tx.Error != nil { | if tx.Error != nil { | ||||
| return nil, tx.Error | return nil, tx.Error | ||||
| } | } | ||||
| LinksHandler(cloudLinks, resp) | LinksHandler(cloudLinks, resp) | ||||
| // ai | // ai | ||||
| var aiLinks []string | var aiLinks []string | ||||
| tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_ai WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&aiLinks) | |||||
| tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_ai WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&aiLinks) | |||||
| if tx.Error != nil { | if tx.Error != nil { | ||||
| return nil, tx.Error | return nil, tx.Error | ||||
| } | } | ||||
| LinksHandler(aiLinks, resp) | LinksHandler(aiLinks, resp) | ||||
| // vm | // vm | ||||
| var vmLinks []string | var vmLinks []string | ||||
| tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_vm WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&vmLinks) | |||||
| tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_vm WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&vmLinks) | |||||
| if tx.Error != nil { | if tx.Error != nil { | ||||
| return nil, tx.Error | return nil, tx.Error | ||||
| } | } | ||||
| @@ -5644,6 +5644,12 @@ type CreateAlertRuleReq struct { | |||||
| AlertType string `json:"alertType"` | AlertType string `json:"alertType"` | ||||
| } | } | ||||
| type DeleteAlertRuleReq struct { | |||||
| Id int64 `form:"id"` | |||||
| ClusterName string `form:"clusterName"` | |||||
| Name string `form:"name"` | |||||
| } | |||||
| type AlertRulesReq struct { | type AlertRulesReq struct { | ||||
| AlertType string `form:"alertType"` | AlertType string `form:"alertType"` | ||||
| AdapterId string `form:"adapterId,optional"` | AdapterId string `form:"adapterId,optional"` | ||||
| @@ -78,6 +78,18 @@ var ( | |||||
| Name: "cluster_pod_total", | Name: "cluster_pod_total", | ||||
| Help: "Cluster Pod total.", | Help: "Cluster Pod total.", | ||||
| }, []string{"cluster_name", "adapter_id"}) | }, []string{"cluster_name", "adapter_id"}) | ||||
| ClusterCpuCoreHoursGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ | |||||
| Name: "cluster_cpu_core_hours", | |||||
| Help: "Cluster Cpu Core Hours.", | |||||
| }, []string{"cluster_name", "adapter_id"}) | |||||
| ClusterCardsAvailGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ | |||||
| Name: "cluster_cards_avail", | |||||
| Help: "Cluster Cards Available.", | |||||
| }, []string{"cluster_name", "adapter_id"}) | |||||
| ClusterGpuAvailGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ | |||||
| Name: "cluster_gpu_avail", | |||||
| Help: "Cluster Gpu Available.", | |||||
| }, []string{"cluster_name", "adapter_id"}) | |||||
| metrics = []prometheus.Collector{ | metrics = []prometheus.Collector{ | ||||
| ClusterCpuUtilisationGauge, | ClusterCpuUtilisationGauge, | ||||