|
- /*
-
- Copyright (c) [2023] [pcm]
- [pcm-coordinator] is licensed under Mulan PSL v2.
- You can use this software according to the terms and conditions of the Mulan PSL v2.
- You may obtain a copy of Mulan PSL v2 at:
- http://license.coscl.org.cn/MulanPSL2
- THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
- EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
- MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
- See the Mulan PSL v2 for more details.
-
- */
-
- package tracker
-
- import (
- "fmt"
- "strings"
- )
-
- const (
- StatefulSet = "StatefulSet"
- DaemonSet = "DaemonSet"
- Deployment = "Deployment"
- )
-
- var promQLTemplates = map[string]string{
-
- //namespace
- "namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`,
- "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`,
- "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`,
- "namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
- "namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
- "namespace_pod_count": `sum by (namespace) (kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
- "namespace_pod_running_count": `sum by (namespace) (kube_pod_status_phase{phase="Running", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
- "namespace_pod_succeeded_count": `sum by (namespace) (kube_pod_status_phase{phase="Succeeded", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
- "namespace_pod_abnormal_count": `namespace:pod_abnormal:count{namespace!="", $1}`,
- "namespace_pod_abnormal_ratio": `namespace:pod_abnormal:ratio{namespace!="", $1}`,
- "namespace_memory_limit_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="limits.memory"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_cpu_limit_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="limits.cpu"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_pod_count_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="count/pods"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_cronjob_count": `sum by (namespace) (kube_cronjob_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_pvc_count": `sum by (namespace) (kube_persistentvolumeclaim_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_daemonset_count": `sum by (namespace) (kube_daemonset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_deployment_count": `sum by (namespace) (kube_deployment_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_endpoint_count": `sum by (namespace) (kube_endpoint_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_hpa_count": `sum by (namespace) (kube_horizontalpodautoscaler_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_job_count": `sum by (namespace) (kube_job_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_statefulset_count": `sum by (namespace) (kube_statefulset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_replicaset_count": `count by (namespace) (kube_replicaset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_service_count": `sum by (namespace) (kube_service_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_secret_count": `sum by (namespace) (kube_secret_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_configmap_count": `sum by (namespace) (kube_configmap_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_ingresses_extensions_count": `sum by (namespace) (kube_ingress_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
- "namespace_s2ibuilder_count": `sum by (namespace) (s2i_s2ibuilder_created{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
-
- "controller_cpu_usage_rate": `round(sum by (owner_name) (sum by (owner_name, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", $1, image!=""}[5m]))/ sum by (owner_name,pod) (kube_pod_container_resource_limits{resource="cpu"}))/count(kube_pod_info{$2}) by (owner_name),0.0001)`,
- "controller_memory_usage_rate": `round(sum by (owner_name) (sum by (owner_name, pod) (irate(container_memory_usage_bytes{job="kubelet", $1, image!=""}[5m]))/ sum by (owner_name,pod) (kube_pod_container_resource_limits{resource="memory"}))/count(kube_pod_info{$2}) by (owner_name),0.0001)`,
- // pod
- "pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`,
- "pod_cpu_usage_rate": `round(sum by ( pod) (irate(container_cpu_usage_seconds_total{job="kubelet", $1, image!=""}[5m]))/sum by (pod) (kube_pod_container_resource_limits{resource="cpu"}), 0.0001)`,
- "pod_memory_usage_rate": `round(sum by ( pod) (irate(container_memory_usage_bytes{job="kubelet", $1, image!=""}[5m]))/sum by (pod) (kube_pod_container_resource_limits{resource="memory"}), 0.0001)`,
- "pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
- "pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
- "pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
- "pod_net_bytes_received": `sum by (namespace, pod) (irate(container_network_receive_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
- "pod_cpu_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="cpu",unit="core"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
- "pod_memory_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="memory",unit="byte"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
-
- // container
- "container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", $1}[5m])), 0.001)`,
- "container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
- "container_memory_usage_wo_cache": `sum by (namespace, pod, container) (container_memory_working_set_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
- "container_processes_usage": `sum by (namespace, pod, container) (container_processes{job="kubelet", container!="POD", container!="", image!="", $1})`,
- "container_threads_usage": `sum by (namespace, pod, container) (container_threads {job="kubelet", container!="POD", container!="", image!="", $1})`,
- }
-
- func makeExpr(metric string, opts QueryOptions) string {
- tmpl := promQLTemplates[metric]
- switch opts.Level {
- case LevelCluster:
- return tmpl
- case LevelNode:
- return makeNodeMetricExpr(tmpl, opts)
- case LevelWorkspace:
- return makeWorkspaceMetricExpr(tmpl, opts)
- case LevelNamespace:
- return makeNamespaceMetricExpr(tmpl, opts)
- case LevelController:
- return makeControllerMetricExpr(tmpl, opts)
- case LevelPod:
- return makePodMetricExpr(tmpl, opts)
- case LevelContainer:
- return makeContainerMetricExpr(tmpl, opts)
- case LevelPVC:
- return makePVCMetricExpr(tmpl, opts)
- case LevelIngress:
- return makeIngressMetricExpr(tmpl, opts)
- case LevelComponent:
- return tmpl
- default:
- return tmpl
- }
- }
-
- func makeNodeMetricExpr(tmpl string, o QueryOptions) string {
- var nodeSelector string
- if o.NodeName != "" {
- nodeSelector = fmt.Sprintf(`node="%s"`, o.NodeName)
- } else {
- nodeSelector = fmt.Sprintf(`node=~"%s"`, o.ResourceFilter)
- }
- return strings.Replace(tmpl, "$1", nodeSelector, -1)
- }
-
- func makeWorkspaceMetricExpr(tmpl string, o QueryOptions) string {
- var workspaceSelector string
- if o.WorkspaceName != "" {
- workspaceSelector = fmt.Sprintf(`workspace="%s"`, o.WorkspaceName)
- } else {
- workspaceSelector = fmt.Sprintf(`workspace=~"%s", workspace!=""`, o.ResourceFilter)
- }
- return strings.Replace(tmpl, "$1", workspaceSelector, -1)
- }
-
- func makeNamespaceMetricExpr(tmpl string, o QueryOptions) string {
- var namespaceSelector string
-
- // For monitoring namespaces in the specific workspace
- // GET /workspaces/{workspace}/namespaces
- if o.WorkspaceName != "" {
- namespaceSelector = fmt.Sprintf(`workspace="%s", namespace=~"%s"`, o.WorkspaceName, o.ResourceFilter)
- return strings.Replace(tmpl, "$1", namespaceSelector, -1)
- }
-
- // For monitoring the specific namespaces
- // GET /namespaces/{namespace} or
- // GET /namespaces
- if o.Namespace != "" {
- namespaceSelector = fmt.Sprintf(`namespace="%s"`, o.Namespace)
- } else {
- namespaceSelector = fmt.Sprintf(`namespace=~"%s"`, o.ResourceFilter)
- }
- return strings.Replace(tmpl, "$1", namespaceSelector, -1)
- }
-
- func makeControllerMetricExpr(tmpl string, o QueryOptions) string {
- var podName string
-
- podName = fmt.Sprintf(`pod=~"%s"`, o.PodName)
- return strings.NewReplacer("$1", podName, "$2", podName).Replace(tmpl)
- }
-
- func makePodMetricExpr(tmpl string, o QueryOptions) string {
- var podSelector, workloadSelector string
-
- // For monitoriong pods of the specific workload
- // GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods
-
- // For monitoring pods in the specific namespace
- // GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods or
- // GET /namespaces/{namespace}/pods/{pod} or
- // GET /namespaces/{namespace}/pods
- if o.Namespace != "" {
- if o.PodName != "" {
- podSelector = fmt.Sprintf(`pod="%s", namespace="%s"`, o.PodName, o.Namespace)
- } else {
- podSelector = fmt.Sprintf(`pod=~"%s", namespace="%s"`, o.ResourceFilter, o.Namespace)
- }
- } else {
- var namespaces, pods []string
- if o.NamespacedResourcesFilter != "" {
- for _, np := range strings.Split(o.NamespacedResourcesFilter, "|") {
- if nparr := strings.SplitN(np, "/", 2); len(nparr) > 1 {
- namespaces = append(namespaces, nparr[0])
- pods = append(pods, nparr[1])
- } else {
- pods = append(pods, np)
- }
- }
- }
- // For monitoring pods on the specific node
- // GET /nodes/{node}/pods/{pod}
- // GET /nodes/{node}/pods
- if o.NodeName != "" {
- if o.PodName != "" {
- if nparr := strings.SplitN(o.PodName, "/", 2); len(nparr) > 1 {
- podSelector = fmt.Sprintf(`namespace="%s",pod="%s", node="%s"`, nparr[0], nparr[1], o.NodeName)
- } else {
- podSelector = fmt.Sprintf(`pod="%s", node="%s"`, o.PodName, o.NodeName)
- }
- } else {
- var ps []string
- ps = append(ps, fmt.Sprintf(`node="%s"`, o.NodeName))
- if o.ResourceFilter != "" {
- ps = append(ps, fmt.Sprintf(`pod=~"%s"`, o.ResourceFilter))
- }
-
- if len(namespaces) > 0 {
- ps = append(ps, fmt.Sprintf(`namespace=~"%s"`, strings.Join(namespaces, "|")))
- }
- if len(pods) > 0 {
- ps = append(ps, fmt.Sprintf(`pod=~"%s"`, strings.Join(pods, "|")))
- }
- podSelector = strings.Join(ps, ",")
- }
- } else {
- // For monitoring pods in the whole cluster
- // Get /pods
- var ps []string
- if len(namespaces) > 0 {
- ps = append(ps, fmt.Sprintf(`namespace=~"%s"`, strings.Join(namespaces, "|")))
- }
- if len(pods) > 0 {
- ps = append(ps, fmt.Sprintf(`pod=~"%s"`, strings.Join(pods, "|")))
- }
- if len(ps) > 0 {
- podSelector = strings.Join(ps, ",")
- }
- }
- }
-
- return strings.NewReplacer("$1", workloadSelector, "$2", podSelector).Replace(tmpl)
- }
-
- func makeContainerMetricExpr(tmpl string, o QueryOptions) string {
- var containerSelector string
- if o.ContainerName != "" {
- containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container="%s"`, o.PodName, o.Namespace, o.ContainerName)
- } else {
- containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container=~"%s"`, o.PodName, o.Namespace, o.ResourceFilter)
- }
- return strings.Replace(tmpl, "$1", containerSelector, -1)
- }
-
- func makePVCMetricExpr(tmpl string, o QueryOptions) string {
- var pvcSelector string
-
- // For monitoring persistentvolumeclaims in the specific namespace
- // GET /namespaces/{namespace}/persistentvolumeclaims/{persistentvolumeclaim} or
- // GET /namespaces/{namespace}/persistentvolumeclaims
- if o.Namespace != "" {
- if o.PersistentVolumeClaimName != "" {
- pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim="%s"`, o.Namespace, o.PersistentVolumeClaimName)
- } else {
- pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim=~"%s"`, o.Namespace, o.ResourceFilter)
- }
- return strings.Replace(tmpl, "$1", pvcSelector, -1)
- }
-
- // For monitoring persistentvolumeclaims of the specific storageclass
- // GET /storageclasses/{storageclass}/persistentvolumeclaims
- if o.StorageClassName != "" {
- pvcSelector = fmt.Sprintf(`storageclass="%s", persistentvolumeclaim=~"%s"`, o.StorageClassName, o.ResourceFilter)
- }
- return strings.Replace(tmpl, "$1", pvcSelector, -1)
- }
-
- func makeIngressMetricExpr(tmpl string, o QueryOptions) string {
- var ingressSelector string
- var jobSelector string
- duration := "5m"
-
- // parse Range Vector Selectors metric{key=value}[duration]
- if o.Duration != nil {
- duration = o.Duration.String()
- }
-
- // job is a reqiuried filter
- // GET /namespaces/{namespace}/ingress?job=xxx&pod=xxx
- if o.Job != "" {
- jobSelector = fmt.Sprintf(`job="%s"`, o.Job)
- if o.PodName != "" {
- jobSelector = fmt.Sprintf(`%s,controller_pod="%s"`, jobSelector, o.PodName)
- }
- }
-
- tmpl = strings.Replace(tmpl, "$1", ingressSelector, -1)
- tmpl = strings.Replace(tmpl, "$2", jobSelector, -1)
- return strings.Replace(tmpl, "$3", duration, -1)
- }
|