You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

promql.go 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /*
  2. Copyright (c) [2023] [pcm]
  3. [pcm-coordinator] is licensed under Mulan PSL v2.
  4. You can use this software according to the terms and conditions of the Mulan PSL v2.
  5. You may obtain a copy of Mulan PSL v2 at:
  6. http://license.coscl.org.cn/MulanPSL2
  7. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
  8. EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
  9. MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
  10. See the Mulan PSL v2 for more details.
  11. */
  12. package tracker
  13. import (
  14. "fmt"
  15. "strings"
  16. )
  17. const (
  18. StatefulSet = "StatefulSet"
  19. DaemonSet = "DaemonSet"
  20. Deployment = "Deployment"
  21. )
  22. var promQLTemplates = map[string]string{
  23. "cluster_cpu_utilisation": "sum by (cluster_name)(cluster_cpu_usage{$1})",
  24. "cluster_memory_utilisation": "sum by (cluster_name)(cluster_memory_usage{$1})",
  25. "cluster_disk_utilisation": "sum by (cluster_name)(cluster_disk_usage{$1})",
  26. "center_cpu_utilisation": "(sum by (adapter_id)(cluster_cpu_total{$1})-sum by (adapter_id)(cluster_cpu_avail{$1}))/sum by (adapter_id)(cluster_cpu_total{$1})",
  27. "center_memory_utilisation": "(sum by (adapter_id)(cluster_memory_total{$1})-sum by (adapter_id)(cluster_memory_avail{$1}))/sum by (adapter_id)(cluster_memory_total{$1})",
  28. "center_disk_utilisation": "(sum by (adapter_id)(cluster_disk_total{$1})-sum by (adapter_id)(cluster_disk_avail{$1}))/sum by (adapter_id)(cluster_disk_total{$1})",
  29. "center_top3": "topk(3,((sum by (adapter_id)(cluster_cpu_total)-sum by (adapter_id)(cluster_cpu_avail))/sum by (adapter_id)(cluster_cpu_total) + (sum by (adapter_id)(cluster_memory_total) - sum by (adapter_id)(cluster_memory_avail))/sum by (adapter_id)(cluster_memory_total) + (sum by (adapter_id)(cluster_disk_total)-sum by (adapter_id)(cluster_disk_avail))/sum by (adapter_id)(cluster_disk_total))/3)",
  30. "namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`,
  31. "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`,
  32. "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`,
  33. "controller_cpu_usage_rate": `sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="cpu"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
  34. "controller_memory_usage_rate": `sum( container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", container!="", image!=""} * on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="memory"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
  35. // pod
  36. "pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`,
  37. "pod_cpu_usage_rate": `sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{ $1}) by (pod) / sum(kube_pod_container_resource_limits{ $1,unit="core"}) by (pod)`,
  38. "pod_memory_usage_rate": `sum(container_memory_working_set_bytes{job="kubelet", $1, container!="", image!=""}) by (pod) / sum(kube_pod_container_resource_limits{ $1,unit="byte"}) by (pod)`,
  39. "pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  40. "pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  41. "pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  42. "pod_net_bytes_received": `sum by (namespace, pod) (irate(container_network_receive_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  43. "pod_cpu_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="cpu",unit="core"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  44. "pod_memory_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="memory",unit="byte"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  45. // container
  46. "container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", $1}[5m])), 0.001)`,
  47. "container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  48. "container_memory_usage_wo_cache": `sum by (namespace, pod, container) (container_memory_working_set_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  49. "container_processes_usage": `sum by (namespace, pod, container) (container_processes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  50. "container_threads_usage": `sum by (namespace, pod, container) (container_threads {job="kubelet", container!="POD", container!="", image!="", $1})`,
  51. }
  52. func makeExpr(metric string, opts QueryOptions) string {
  53. tmpl := promQLTemplates[metric]
  54. switch opts.Level {
  55. case LevelAdapter:
  56. return makeAdapterMetricExpr(tmpl, opts)
  57. case LevelCluster:
  58. return makeClusterMetricExpr(tmpl, opts)
  59. case LevelNode:
  60. return makeNodeMetricExpr(tmpl, opts)
  61. case LevelWorkspace:
  62. return makeWorkspaceMetricExpr(tmpl, opts)
  63. case LevelNamespace:
  64. return makeNamespaceMetricExpr(tmpl, opts)
  65. case LevelController:
  66. return makeControllerMetricExpr(tmpl, opts)
  67. case LevelPod:
  68. return makePodMetricExpr(tmpl, opts)
  69. case LevelContainer:
  70. return makeContainerMetricExpr(tmpl, opts)
  71. case LevelPVC:
  72. return makePVCMetricExpr(tmpl, opts)
  73. case LevelIngress:
  74. return makeIngressMetricExpr(tmpl, opts)
  75. case LevelComponent:
  76. return tmpl
  77. default:
  78. return tmpl
  79. }
  80. }
  81. func makeClusterMetricExpr(tmpl string, o QueryOptions) string {
  82. var clusterSelector string
  83. if o.ClusterName != "" {
  84. clusterSelector = fmt.Sprintf(`cluster_name="%s"`, o.ClusterName)
  85. }
  86. return strings.Replace(tmpl, "$1", clusterSelector, -1)
  87. }
  88. func makeAdapterMetricExpr(tmpl string, o QueryOptions) string {
  89. var adapterSelector string
  90. if o.AdapterId != 0 {
  91. adapterSelector = fmt.Sprintf(`adapter_id="%d"`, o.AdapterId)
  92. }
  93. return strings.Replace(tmpl, "$1", adapterSelector, -1)
  94. }
  95. func makeNodeMetricExpr(tmpl string, o QueryOptions) string {
  96. var nodeSelector string
  97. if o.NodeName != "" {
  98. nodeSelector = fmt.Sprintf(`node="%s"`, o.NodeName)
  99. } else {
  100. nodeSelector = fmt.Sprintf(`node=~"%s"`, o.ResourceFilter)
  101. }
  102. return strings.Replace(tmpl, "$1", nodeSelector, -1)
  103. }
  104. func makeWorkspaceMetricExpr(tmpl string, o QueryOptions) string {
  105. var workspaceSelector string
  106. if o.WorkspaceName != "" {
  107. workspaceSelector = fmt.Sprintf(`workspace="%s"`, o.WorkspaceName)
  108. } else {
  109. workspaceSelector = fmt.Sprintf(`workspace=~"%s", workspace!=""`, o.ResourceFilter)
  110. }
  111. return strings.Replace(tmpl, "$1", workspaceSelector, -1)
  112. }
  113. func makeNamespaceMetricExpr(tmpl string, o QueryOptions) string {
  114. var namespaceSelector string
  115. // For monitoring namespaces in the specific workspace
  116. // GET /workspaces/{workspace}/namespaces
  117. if o.WorkspaceName != "" {
  118. namespaceSelector = fmt.Sprintf(`workspace="%s", namespace=~"%s"`, o.WorkspaceName, o.ResourceFilter)
  119. return strings.Replace(tmpl, "$1", namespaceSelector, -1)
  120. }
  121. // For monitoring the specific namespaces
  122. // GET /namespaces/{namespace} or
  123. // GET /namespaces
  124. if o.Namespace != "" {
  125. namespaceSelector = fmt.Sprintf(`namespace="%s"`, o.Namespace)
  126. } else {
  127. namespaceSelector = fmt.Sprintf(`namespace=~"%s"`, o.ResourceFilter)
  128. }
  129. return strings.Replace(tmpl, "$1", namespaceSelector, -1)
  130. }
  131. func makeControllerMetricExpr(tmpl string, o QueryOptions) string {
  132. var workload string
  133. workload = fmt.Sprintf(`workload="%s"`, o.WorkloadName)
  134. return strings.NewReplacer("$1", workload).Replace(tmpl)
  135. }
  136. func makePodMetricExpr(tmpl string, o QueryOptions) string {
  137. var podName string
  138. podName = fmt.Sprintf(`pod="%s"`, o.PodName)
  139. return strings.NewReplacer("$1", podName).Replace(tmpl)
  140. }
  141. func makeContainerMetricExpr(tmpl string, o QueryOptions) string {
  142. var containerSelector string
  143. if o.ContainerName != "" {
  144. containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container="%s"`, o.PodName, o.Namespace, o.ContainerName)
  145. } else {
  146. containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container=~"%s"`, o.PodName, o.Namespace, o.ResourceFilter)
  147. }
  148. return strings.Replace(tmpl, "$1", containerSelector, -1)
  149. }
  150. func makePVCMetricExpr(tmpl string, o QueryOptions) string {
  151. var pvcSelector string
  152. // For monitoring persistentvolumeclaims in the specific namespace
  153. // GET /namespaces/{namespace}/persistentvolumeclaims/{persistentvolumeclaim} or
  154. // GET /namespaces/{namespace}/persistentvolumeclaims
  155. if o.Namespace != "" {
  156. pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim=~"%s"`, o.Namespace, o.ResourceFilter)
  157. return strings.Replace(tmpl, "$1", pvcSelector, -1)
  158. }
  159. return strings.Replace(tmpl, "$1", pvcSelector, -1)
  160. }
  161. func makeIngressMetricExpr(tmpl string, o QueryOptions) string {
  162. var ingressSelector string
  163. var jobSelector string
  164. duration := "5m"
  165. // parse Range Vector Selectors metric{key=value}[duration]
  166. if o.Duration != nil {
  167. duration = o.Duration.String()
  168. }
  169. // job is a reqiuried filter
  170. // GET /namespaces/{namespace}/ingress?job=xxx&pod=xxx
  171. if o.Job != "" {
  172. jobSelector = fmt.Sprintf(`job="%s"`, o.Job)
  173. if o.PodName != "" {
  174. jobSelector = fmt.Sprintf(`%s,controller_pod="%s"`, jobSelector, o.PodName)
  175. }
  176. }
  177. tmpl = strings.Replace(tmpl, "$1", ingressSelector, -1)
  178. tmpl = strings.Replace(tmpl, "$2", jobSelector, -1)
  179. return strings.Replace(tmpl, "$3", duration, -1)
  180. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.