You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

promql.go 16 kB


  1. /*
  2. Copyright (c) [2023] [pcm]
  3. [pcm-coordinator] is licensed under Mulan PSL v2.
  4. You can use this software according to the terms and conditions of the Mulan PSL v2.
  5. You may obtain a copy of Mulan PSL v2 at:
  6. http://license.coscl.org.cn/MulanPSL2
  7. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
  8. EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
  9. MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
  10. See the Mulan PSL v2 for more details.
  11. */
  12. package tracker
  13. import (
  14. "fmt"
  15. "strings"
  16. )
  17. const (
  18. StatefulSet = "StatefulSet"
  19. DaemonSet = "DaemonSet"
  20. Deployment = "Deployment"
  21. )
  22. var promQLTemplates = map[string]string{
  23. //namespace
  24. "namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`,
  25. "namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`,
  26. "namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`,
  27. "namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
  28. "namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
  29. "namespace_pod_count": `sum by (namespace) (kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
  30. "namespace_pod_running_count": `sum by (namespace) (kube_pod_status_phase{phase="Running", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
  31. "namespace_pod_succeeded_count": `sum by (namespace) (kube_pod_status_phase{phase="Succeeded", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
  32. "namespace_pod_abnormal_count": `namespace:pod_abnormal:count{namespace!="", $1}`,
  33. "namespace_pod_abnormal_ratio": `namespace:pod_abnormal:ratio{namespace!="", $1}`,
  34. "namespace_memory_limit_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="limits.memory"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  35. "namespace_cpu_limit_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="limits.cpu"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  36. "namespace_pod_count_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="count/pods"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  37. "namespace_cronjob_count": `sum by (namespace) (kube_cronjob_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  38. "namespace_pvc_count": `sum by (namespace) (kube_persistentvolumeclaim_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  39. "namespace_daemonset_count": `sum by (namespace) (kube_daemonset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  40. "namespace_deployment_count": `sum by (namespace) (kube_deployment_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  41. "namespace_endpoint_count": `sum by (namespace) (kube_endpoint_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  42. "namespace_hpa_count": `sum by (namespace) (kube_horizontalpodautoscaler_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  43. "namespace_job_count": `sum by (namespace) (kube_job_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  44. "namespace_statefulset_count": `sum by (namespace) (kube_statefulset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  45. "namespace_replicaset_count": `count by (namespace) (kube_replicaset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  46. "namespace_service_count": `sum by (namespace) (kube_service_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  47. "namespace_secret_count": `sum by (namespace) (kube_secret_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  48. "namespace_configmap_count": `sum by (namespace) (kube_configmap_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  49. "namespace_ingresses_extensions_count": `sum by (namespace) (kube_ingress_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  50. "namespace_s2ibuilder_count": `sum by (namespace) (s2i_s2ibuilder_created{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
  51. "controller_cpu_usage_rate": `round(sum by (owner_name) (sum by (owner_name, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", $1, image!=""}[5m]))/ sum by (owner_name,pod) (kube_pod_container_resource_limits{resource="cpu"}))/count(kube_pod_info{$2}) by (owner_name),0.0001)`,
  52. "controller_memory_usage_rate": `round(sum by (owner_name) (sum by (owner_name, pod) (irate(container_memory_usage_bytes{job="kubelet", $1, image!=""}[5m]))/ sum by (owner_name,pod) (kube_pod_container_resource_limits{resource="memory"}))/count(kube_pod_info{$2}) by (owner_name),0.0001)`,
  53. // pod
  54. "pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`,
  55. "pod_cpu_usage_rate": `round(sum by ( pod) (irate(container_cpu_usage_seconds_total{job="kubelet", $1, image!=""}[5m]))/sum by (pod) (kube_pod_container_resource_limits{resource="cpu"}), 0.0001)`,
  56. "pod_memory_usage_rate": `round(sum by ( pod) (irate(container_memory_usage_bytes{job="kubelet", $1, image!=""}[5m]))/sum by (pod) (kube_pod_container_resource_limits{resource="memory"}), 0.0001)`,
  57. "pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  58. "pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  59. "pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  60. "pod_net_bytes_received": `sum by (namespace, pod) (irate(container_network_receive_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  61. "pod_cpu_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="cpu",unit="core"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  62. "pod_memory_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="memory",unit="byte"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
  63. // container
  64. "container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", $1}[5m])), 0.001)`,
  65. "container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  66. "container_memory_usage_wo_cache": `sum by (namespace, pod, container) (container_memory_working_set_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  67. "container_processes_usage": `sum by (namespace, pod, container) (container_processes{job="kubelet", container!="POD", container!="", image!="", $1})`,
  68. "container_threads_usage": `sum by (namespace, pod, container) (container_threads {job="kubelet", container!="POD", container!="", image!="", $1})`,
  69. }
  70. func makeExpr(metric string, opts QueryOptions) string {
  71. tmpl := promQLTemplates[metric]
  72. switch opts.Level {
  73. case LevelCluster:
  74. return tmpl
  75. case LevelNode:
  76. return makeNodeMetricExpr(tmpl, opts)
  77. case LevelWorkspace:
  78. return makeWorkspaceMetricExpr(tmpl, opts)
  79. case LevelNamespace:
  80. return makeNamespaceMetricExpr(tmpl, opts)
  81. case LevelController:
  82. return makeControllerMetricExpr(tmpl, opts)
  83. case LevelPod:
  84. return makePodMetricExpr(tmpl, opts)
  85. case LevelContainer:
  86. return makeContainerMetricExpr(tmpl, opts)
  87. case LevelPVC:
  88. return makePVCMetricExpr(tmpl, opts)
  89. case LevelIngress:
  90. return makeIngressMetricExpr(tmpl, opts)
  91. case LevelComponent:
  92. return tmpl
  93. default:
  94. return tmpl
  95. }
  96. }
  97. func makeNodeMetricExpr(tmpl string, o QueryOptions) string {
  98. var nodeSelector string
  99. if o.NodeName != "" {
  100. nodeSelector = fmt.Sprintf(`node="%s"`, o.NodeName)
  101. } else {
  102. nodeSelector = fmt.Sprintf(`node=~"%s"`, o.ResourceFilter)
  103. }
  104. return strings.Replace(tmpl, "$1", nodeSelector, -1)
  105. }
  106. func makeWorkspaceMetricExpr(tmpl string, o QueryOptions) string {
  107. var workspaceSelector string
  108. if o.WorkspaceName != "" {
  109. workspaceSelector = fmt.Sprintf(`workspace="%s"`, o.WorkspaceName)
  110. } else {
  111. workspaceSelector = fmt.Sprintf(`workspace=~"%s", workspace!=""`, o.ResourceFilter)
  112. }
  113. return strings.Replace(tmpl, "$1", workspaceSelector, -1)
  114. }
  115. func makeNamespaceMetricExpr(tmpl string, o QueryOptions) string {
  116. var namespaceSelector string
  117. // For monitoring namespaces in the specific workspace
  118. // GET /workspaces/{workspace}/namespaces
  119. if o.WorkspaceName != "" {
  120. namespaceSelector = fmt.Sprintf(`workspace="%s", namespace=~"%s"`, o.WorkspaceName, o.ResourceFilter)
  121. return strings.Replace(tmpl, "$1", namespaceSelector, -1)
  122. }
  123. // For monitoring the specific namespaces
  124. // GET /namespaces/{namespace} or
  125. // GET /namespaces
  126. if o.Namespace != "" {
  127. namespaceSelector = fmt.Sprintf(`namespace="%s"`, o.Namespace)
  128. } else {
  129. namespaceSelector = fmt.Sprintf(`namespace=~"%s"`, o.ResourceFilter)
  130. }
  131. return strings.Replace(tmpl, "$1", namespaceSelector, -1)
  132. }
  133. func makeControllerMetricExpr(tmpl string, o QueryOptions) string {
  134. var podName string
  135. podName = fmt.Sprintf(`pod=~"%s"`, o.PodName)
  136. return strings.NewReplacer("$1", podName, "$2", podName).Replace(tmpl)
  137. }
  138. func makePodMetricExpr(tmpl string, o QueryOptions) string {
  139. var podSelector, workloadSelector string
  140. // For monitoriong pods of the specific workload
  141. // GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods
  142. // For monitoring pods in the specific namespace
  143. // GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods or
  144. // GET /namespaces/{namespace}/pods/{pod} or
  145. // GET /namespaces/{namespace}/pods
  146. if o.Namespace != "" {
  147. if o.PodName != "" {
  148. podSelector = fmt.Sprintf(`pod="%s", namespace="%s"`, o.PodName, o.Namespace)
  149. } else {
  150. podSelector = fmt.Sprintf(`pod=~"%s", namespace="%s"`, o.ResourceFilter, o.Namespace)
  151. }
  152. } else {
  153. var namespaces, pods []string
  154. if o.NamespacedResourcesFilter != "" {
  155. for _, np := range strings.Split(o.NamespacedResourcesFilter, "|") {
  156. if nparr := strings.SplitN(np, "/", 2); len(nparr) > 1 {
  157. namespaces = append(namespaces, nparr[0])
  158. pods = append(pods, nparr[1])
  159. } else {
  160. pods = append(pods, np)
  161. }
  162. }
  163. }
  164. // For monitoring pods on the specific node
  165. // GET /nodes/{node}/pods/{pod}
  166. // GET /nodes/{node}/pods
  167. if o.NodeName != "" {
  168. if o.PodName != "" {
  169. if nparr := strings.SplitN(o.PodName, "/", 2); len(nparr) > 1 {
  170. podSelector = fmt.Sprintf(`namespace="%s",pod="%s", node="%s"`, nparr[0], nparr[1], o.NodeName)
  171. } else {
  172. podSelector = fmt.Sprintf(`pod="%s", node="%s"`, o.PodName, o.NodeName)
  173. }
  174. } else {
  175. var ps []string
  176. ps = append(ps, fmt.Sprintf(`node="%s"`, o.NodeName))
  177. if o.ResourceFilter != "" {
  178. ps = append(ps, fmt.Sprintf(`pod=~"%s"`, o.ResourceFilter))
  179. }
  180. if len(namespaces) > 0 {
  181. ps = append(ps, fmt.Sprintf(`namespace=~"%s"`, strings.Join(namespaces, "|")))
  182. }
  183. if len(pods) > 0 {
  184. ps = append(ps, fmt.Sprintf(`pod=~"%s"`, strings.Join(pods, "|")))
  185. }
  186. podSelector = strings.Join(ps, ",")
  187. }
  188. } else {
  189. // For monitoring pods in the whole cluster
  190. // Get /pods
  191. var ps []string
  192. if len(namespaces) > 0 {
  193. ps = append(ps, fmt.Sprintf(`namespace=~"%s"`, strings.Join(namespaces, "|")))
  194. }
  195. if len(pods) > 0 {
  196. ps = append(ps, fmt.Sprintf(`pod=~"%s"`, strings.Join(pods, "|")))
  197. }
  198. if len(ps) > 0 {
  199. podSelector = strings.Join(ps, ",")
  200. }
  201. }
  202. }
  203. return strings.NewReplacer("$1", workloadSelector, "$2", podSelector).Replace(tmpl)
  204. }
  205. func makeContainerMetricExpr(tmpl string, o QueryOptions) string {
  206. var containerSelector string
  207. if o.ContainerName != "" {
  208. containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container="%s"`, o.PodName, o.Namespace, o.ContainerName)
  209. } else {
  210. containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container=~"%s"`, o.PodName, o.Namespace, o.ResourceFilter)
  211. }
  212. return strings.Replace(tmpl, "$1", containerSelector, -1)
  213. }
  214. func makePVCMetricExpr(tmpl string, o QueryOptions) string {
  215. var pvcSelector string
  216. // For monitoring persistentvolumeclaims in the specific namespace
  217. // GET /namespaces/{namespace}/persistentvolumeclaims/{persistentvolumeclaim} or
  218. // GET /namespaces/{namespace}/persistentvolumeclaims
  219. if o.Namespace != "" {
  220. if o.PersistentVolumeClaimName != "" {
  221. pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim="%s"`, o.Namespace, o.PersistentVolumeClaimName)
  222. } else {
  223. pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim=~"%s"`, o.Namespace, o.ResourceFilter)
  224. }
  225. return strings.Replace(tmpl, "$1", pvcSelector, -1)
  226. }
  227. // For monitoring persistentvolumeclaims of the specific storageclass
  228. // GET /storageclasses/{storageclass}/persistentvolumeclaims
  229. if o.StorageClassName != "" {
  230. pvcSelector = fmt.Sprintf(`storageclass="%s", persistentvolumeclaim=~"%s"`, o.StorageClassName, o.ResourceFilter)
  231. }
  232. return strings.Replace(tmpl, "$1", pvcSelector, -1)
  233. }
  234. func makeIngressMetricExpr(tmpl string, o QueryOptions) string {
  235. var ingressSelector string
  236. var jobSelector string
  237. duration := "5m"
  238. // parse Range Vector Selectors metric{key=value}[duration]
  239. if o.Duration != nil {
  240. duration = o.Duration.String()
  241. }
  242. // job is a reqiuried filter
  243. // GET /namespaces/{namespace}/ingress?job=xxx&pod=xxx
  244. if o.Job != "" {
  245. jobSelector = fmt.Sprintf(`job="%s"`, o.Job)
  246. if o.PodName != "" {
  247. jobSelector = fmt.Sprintf(`%s,controller_pod="%s"`, jobSelector, o.PodName)
  248. }
  249. }
  250. tmpl = strings.Replace(tmpl, "$1", ingressSelector, -1)
  251. tmpl = strings.Replace(tmpl, "$2", jobSelector, -1)
  252. return strings.Replace(tmpl, "$3", duration, -1)
  253. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.