You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

promql.go 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /*
  2. Copyright (c) [2023] [pcm]
  3. [pcm-coordinator] is licensed under Mulan PSL v2.
  4. You can use this software according to the terms and conditions of the Mulan PSL v2.
  5. You may obtain a copy of Mulan PSL v2 at:
  6. http://license.coscl.org.cn/MulanPSL2
  7. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
  8. EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
  9. MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
  10. See the Mulan PSL v2 for more details.
  11. */
  12. package tracker
  13. import (
  14. "fmt"
  15. "strings"
  16. )
  17. const (
  18. StatefulSet = "StatefulSet"
  19. DaemonSet = "DaemonSet"
  20. Deployment = "Deployment"
  21. )
  22. var promQLTemplates = map[string]string{
  23. "cluster_cpu_usage": "sum by (cluster_name)(cluster_cpu_usage{$1})",
  24. "cluster_memory_usage": "sum by (cluster_name)(cluster_memory_usage{$1})",
  25. "cluster_disk_usage": "sum by (cluster_name)(cluster_disk_usage{$1})",
  26. "resource_top3": "topk(3,sum by (cluster_name)(cluster_cpu_usage +cluster_memory_usage +cluster_disk_usage)/3)",
  27. "namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`,
  28. "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`,
  29. "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`,
  30. "controller_cpu_usage_rate": `sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="cpu"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
  31. "controller_memory_usage_rate": `sum( container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", container!="", image!=""} * on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="memory"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
  32. // pod
  33. "pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`,
  34. "pod_cpu_usage_rate": `sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{ $1}) by (pod) / sum(kube_pod_container_resource_limits{ $1,unit="core"}) by (pod)`,
  35. "pod_memory_usage_rate": `sum(container_memory_working_set_bytes{job="kubelet", $1, container!="", image!=""}) by (pod) / sum(kube_pod_container_resource_limits{ $1,unit="byte"}) by (pod)`,
  36. "pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  37. "pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  38. "pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  39. "pod_net_bytes_received": `sum by (namespace, pod) (irate(container_network_receive_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  40. "pod_cpu_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="cpu",unit="core"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  41. "pod_memory_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="memory",unit="byte"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  42. // container
  43. "container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", $1}[5m])), 0.001)`,
  44. "container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  45. "container_memory_usage_wo_cache": `sum by (namespace, pod, container) (container_memory_working_set_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  46. "container_processes_usage": `sum by (namespace, pod, container) (container_processes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  47. "container_threads_usage": `sum by (namespace, pod, container) (container_threads {job="kubelet", container!="POD", container!="", image!="", $1})`,
  48. }
  49. func makeExpr(metric string, opts QueryOptions) string {
  50. tmpl := promQLTemplates[metric]
  51. switch opts.Level {
  52. case LevelCluster:
  53. return makeClusterMetricExpr(tmpl, opts)
  54. case LevelNode:
  55. return makeNodeMetricExpr(tmpl, opts)
  56. case LevelWorkspace:
  57. return makeWorkspaceMetricExpr(tmpl, opts)
  58. case LevelNamespace:
  59. return makeNamespaceMetricExpr(tmpl, opts)
  60. case LevelController:
  61. return makeControllerMetricExpr(tmpl, opts)
  62. case LevelPod:
  63. return makePodMetricExpr(tmpl, opts)
  64. case LevelContainer:
  65. return makeContainerMetricExpr(tmpl, opts)
  66. case LevelPVC:
  67. return makePVCMetricExpr(tmpl, opts)
  68. case LevelIngress:
  69. return makeIngressMetricExpr(tmpl, opts)
  70. case LevelComponent:
  71. return tmpl
  72. default:
  73. return tmpl
  74. }
  75. }
  76. func makeClusterMetricExpr(tmpl string, o QueryOptions) string {
  77. var clusterSelector string
  78. if o.ClusterName != "" {
  79. clusterSelector = fmt.Sprintf(`cluster_name="%s"`, o.ClusterName)
  80. }
  81. return strings.Replace(tmpl, "$1", clusterSelector, -1)
  82. }
  83. func makeNodeMetricExpr(tmpl string, o QueryOptions) string {
  84. var nodeSelector string
  85. if o.NodeName != "" {
  86. nodeSelector = fmt.Sprintf(`node="%s"`, o.NodeName)
  87. } else {
  88. nodeSelector = fmt.Sprintf(`node=~"%s"`, o.ResourceFilter)
  89. }
  90. return strings.Replace(tmpl, "$1", nodeSelector, -1)
  91. }
  92. func makeWorkspaceMetricExpr(tmpl string, o QueryOptions) string {
  93. var workspaceSelector string
  94. if o.WorkspaceName != "" {
  95. workspaceSelector = fmt.Sprintf(`workspace="%s"`, o.WorkspaceName)
  96. } else {
  97. workspaceSelector = fmt.Sprintf(`workspace=~"%s", workspace!=""`, o.ResourceFilter)
  98. }
  99. return strings.Replace(tmpl, "$1", workspaceSelector, -1)
  100. }
  101. func makeNamespaceMetricExpr(tmpl string, o QueryOptions) string {
  102. var namespaceSelector string
  103. // For monitoring namespaces in the specific workspace
  104. // GET /workspaces/{workspace}/namespaces
  105. if o.WorkspaceName != "" {
  106. namespaceSelector = fmt.Sprintf(`workspace="%s", namespace=~"%s"`, o.WorkspaceName, o.ResourceFilter)
  107. return strings.Replace(tmpl, "$1", namespaceSelector, -1)
  108. }
  109. // For monitoring the specific namespaces
  110. // GET /namespaces/{namespace} or
  111. // GET /namespaces
  112. if o.Namespace != "" {
  113. namespaceSelector = fmt.Sprintf(`namespace="%s"`, o.Namespace)
  114. } else {
  115. namespaceSelector = fmt.Sprintf(`namespace=~"%s"`, o.ResourceFilter)
  116. }
  117. return strings.Replace(tmpl, "$1", namespaceSelector, -1)
  118. }
  119. func makeControllerMetricExpr(tmpl string, o QueryOptions) string {
  120. var workload string
  121. workload = fmt.Sprintf(`workload="%s"`, o.WorkloadName)
  122. return strings.NewReplacer("$1", workload).Replace(tmpl)
  123. }
  124. func makePodMetricExpr(tmpl string, o QueryOptions) string {
  125. var podName string
  126. podName = fmt.Sprintf(`pod="%s"`, o.PodName)
  127. return strings.NewReplacer("$1", podName).Replace(tmpl)
  128. }
  129. func makeContainerMetricExpr(tmpl string, o QueryOptions) string {
  130. var containerSelector string
  131. if o.ContainerName != "" {
  132. containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container="%s"`, o.PodName, o.Namespace, o.ContainerName)
  133. } else {
  134. containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container=~"%s"`, o.PodName, o.Namespace, o.ResourceFilter)
  135. }
  136. return strings.Replace(tmpl, "$1", containerSelector, -1)
  137. }
  138. func makePVCMetricExpr(tmpl string, o QueryOptions) string {
  139. var pvcSelector string
  140. // For monitoring persistentvolumeclaims in the specific namespace
  141. // GET /namespaces/{namespace}/persistentvolumeclaims/{persistentvolumeclaim} or
  142. // GET /namespaces/{namespace}/persistentvolumeclaims
  143. if o.Namespace != "" {
  144. pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim=~"%s"`, o.Namespace, o.ResourceFilter)
  145. return strings.Replace(tmpl, "$1", pvcSelector, -1)
  146. }
  147. return strings.Replace(tmpl, "$1", pvcSelector, -1)
  148. }
  149. func makeIngressMetricExpr(tmpl string, o QueryOptions) string {
  150. var ingressSelector string
  151. var jobSelector string
  152. duration := "5m"
  153. // parse Range Vector Selectors metric{key=value}[duration]
  154. if o.Duration != nil {
  155. duration = o.Duration.String()
  156. }
  157. // job is a reqiuried filter
  158. // GET /namespaces/{namespace}/ingress?job=xxx&pod=xxx
  159. if o.Job != "" {
  160. jobSelector = fmt.Sprintf(`job="%s"`, o.Job)
  161. if o.PodName != "" {
  162. jobSelector = fmt.Sprintf(`%s,controller_pod="%s"`, jobSelector, o.PodName)
  163. }
  164. }
  165. tmpl = strings.Replace(tmpl, "$1", ingressSelector, -1)
  166. tmpl = strings.Replace(tmpl, "$2", jobSelector, -1)
  167. return strings.Replace(tmpl, "$3", duration, -1)
  168. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.