You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

worker.go 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. package runtime
  2. import (
  3. "context"
  4. "fmt"
  5. sednav1 "github.com/kubeedge/sedna/pkg/apis/sedna/v1alpha1"
  6. autoscalingv2 "k8s.io/api/autoscaling/v2"
  7. "k8s.io/apimachinery/pkg/api/errors"
  8. "path/filepath"
  9. "strconv"
  10. "strings"
  11. appsv1 "k8s.io/api/apps/v1"
  12. v1 "k8s.io/api/core/v1"
  13. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  14. "k8s.io/apimachinery/pkg/labels"
  15. "k8s.io/apimachinery/pkg/util/intstr"
  16. "k8s.io/client-go/kubernetes"
  17. "k8s.io/klog/v2"
  18. "github.com/kubeedge/sedna/pkg/globalmanager/utils"
  19. )
  20. type WorkerMount struct {
  21. Name string
  22. // the url to be mounted
  23. URL *MountURL
  24. // for some cases, there are more than one url to be mounted
  25. URLs []MountURL
  26. // envName indicates the environment key of the mounts injected to the worker
  27. EnvName string
  28. }
  29. // WorkerParam describes the system-defined parameters of worker
  30. type WorkerParam struct {
  31. Mounts []WorkerMount
  32. Env map[string]string
  33. WorkerType string
  34. // if true, force to use hostNetwork
  35. HostNetwork bool
  36. ModelHotUpdate ModelHotUpdate
  37. RestartPolicy v1.RestartPolicy
  38. DNSPolicy v1.DNSPolicy
  39. }
  40. type ModelHotUpdate struct {
  41. Enable bool
  42. PollPeriodSeconds int64
  43. }
  44. // generateLabels generates labels for an object
  45. func generateLabels(object CommonInterface, workerType string) map[string]string {
  46. kind := object.GroupVersionKind().Kind
  47. group := object.GroupVersionKind().Group
  48. keyPrefix := strings.ToLower(kind + "." + group + "/")
  49. labels := make(map[string]string)
  50. labels[keyPrefix+"name"] = object.GetName()
  51. labels[keyPrefix+"uid"] = string(object.GetUID())
  52. if workerType != "" {
  53. labels[keyPrefix+"worker-type"] = strings.ToLower(workerType)
  54. }
  55. return labels
  56. }
  57. // GenerateSelector generates the selector of an object for worker
  58. func GenerateSelector(object CommonInterface) (labels.Selector, error) {
  59. ls := &metav1.LabelSelector{
  60. // select any type workers
  61. MatchLabels: generateLabels(object, ""),
  62. }
  63. return metav1.LabelSelectorAsSelector(ls)
  64. }
  65. // GenerateWorkerSelector generates the selector of an object for specific worker type
  66. func GenerateWorkerSelector(object CommonInterface, workerType string) (labels.Selector, error) {
  67. ls := &metav1.LabelSelector{
  68. // select any type workers
  69. MatchLabels: generateLabels(object, workerType),
  70. }
  71. return metav1.LabelSelectorAsSelector(ls)
  72. }
  73. // CreateKubernetesService creates a k8s service for an object given ip and port
  74. func CreateKubernetesService(kubeClient kubernetes.Interface, object CommonInterface, workerType string, inputPort int32, inputIP string) (int32, error) {
  75. ctx := context.Background()
  76. name := object.GetName()
  77. namespace := object.GetNamespace()
  78. kind := object.GroupVersionKind().Kind
  79. targePort := intstr.IntOrString{
  80. IntVal: inputPort,
  81. }
  82. serviceSpec := &v1.Service{
  83. ObjectMeta: metav1.ObjectMeta{
  84. Namespace: object.GetNamespace(),
  85. Name: strings.ToLower(name + "-" + workerType),
  86. OwnerReferences: []metav1.OwnerReference{
  87. *metav1.NewControllerRef(object, object.GroupVersionKind()),
  88. },
  89. Labels: generateLabels(object, workerType),
  90. },
  91. Spec: v1.ServiceSpec{
  92. Selector: generateLabels(object, workerType),
  93. ExternalIPs: []string{
  94. inputIP,
  95. },
  96. Type: v1.ServiceTypeNodePort,
  97. Ports: []v1.ServicePort{
  98. {
  99. Port: inputPort,
  100. TargetPort: targePort,
  101. },
  102. },
  103. },
  104. }
  105. service, err := kubeClient.CoreV1().Services(namespace).Create(ctx, serviceSpec, metav1.CreateOptions{})
  106. if err != nil {
  107. klog.Warningf("failed to create service for %v %v/%v, err:%s", kind, namespace, name, err)
  108. return 0, err
  109. }
  110. klog.V(2).Infof("Service %s is created successfully for %v %v/%v", service.Name, kind, namespace, name)
  111. return service.Spec.Ports[0].NodePort, nil
  112. }
  113. // injectWorkerParam modifies pod in-place
  114. func injectWorkerParam(pod *v1.Pod, workerParam *WorkerParam, object CommonInterface) {
  115. InjectStorageInitializer(pod, workerParam)
  116. if workerParam.WorkerType == InferencePodType && workerParam.ModelHotUpdate.Enable {
  117. injectModelHotUpdateMount(pod, object)
  118. setModelHotUpdateEnv(workerParam)
  119. }
  120. envs := createEnvVars(workerParam.Env)
  121. for idx := range pod.Spec.Containers {
  122. pod.Spec.Containers[idx].Env = append(
  123. pod.Spec.Containers[idx].Env, envs...,
  124. )
  125. }
  126. // inject our labels
  127. if pod.Labels == nil {
  128. pod.Labels = make(map[string]string)
  129. }
  130. for k, v := range generateLabels(object, workerParam.WorkerType) {
  131. pod.Labels[k] = v
  132. }
  133. pod.GenerateName = object.GetName() + "-" + strings.ToLower(workerParam.WorkerType) + "-"
  134. pod.Namespace = object.GetNamespace()
  135. if workerParam.HostNetwork {
  136. // FIXME
  137. // force to set hostnetwork
  138. pod.Spec.HostNetwork = true
  139. }
  140. if pod.Spec.RestartPolicy == "" {
  141. pod.Spec.RestartPolicy = workerParam.RestartPolicy
  142. }
  143. if workerParam.DNSPolicy != "" {
  144. pod.Spec.DNSPolicy = workerParam.DNSPolicy
  145. }
  146. }
  147. // CreatePodWithTemplate creates and returns a pod object given a crd object, pod template, and workerParam
  148. func CreatePodWithTemplate(client kubernetes.Interface, object CommonInterface, spec *v1.PodTemplateSpec, workerParam *WorkerParam) (*v1.Pod, error) {
  149. objectKind := object.GroupVersionKind()
  150. pod, _ := utils.GetPodFromTemplate(spec, object, metav1.NewControllerRef(object, objectKind))
  151. injectWorkerParam(pod, workerParam, object)
  152. createdPod, err := client.CoreV1().Pods(object.GetNamespace()).Create(context.TODO(), pod, metav1.CreateOptions{})
  153. objectName := object.GetNamespace() + "/" + object.GetName()
  154. if err != nil {
  155. klog.Warningf("failed to create pod(type=%s) for %s %s, err:%s", workerParam.WorkerType, objectKind, objectName, err)
  156. return nil, err
  157. }
  158. klog.V(2).Infof("pod %s is created successfully for %s %s", createdPod.Name, objectKind, objectName)
  159. return createdPod, nil
  160. }
  161. // CreateEdgeMeshService creates a kubeedge edgemesh service for an object, and returns an edgemesh service URL.
  162. // Since edgemesh can realize Cross-Edge-Cloud communication, the service can be created both on the cloud or edge side.
  163. func CreateEdgeMeshService(kubeClient kubernetes.Interface, object CommonInterface, workerType string, servicePort int32) (string, error) {
  164. ctx := context.Background()
  165. name := object.GetName()
  166. namespace := object.GetNamespace()
  167. kind := object.GroupVersionKind().Kind
  168. targetPort := intstr.IntOrString{
  169. IntVal: servicePort,
  170. }
  171. serviceSpec := &v1.Service{
  172. ObjectMeta: metav1.ObjectMeta{
  173. Namespace: namespace,
  174. Name: strings.ToLower(name + "-" + workerType),
  175. OwnerReferences: []metav1.OwnerReference{
  176. *metav1.NewControllerRef(object, object.GroupVersionKind()),
  177. },
  178. Labels: generateLabels(object, workerType),
  179. },
  180. Spec: v1.ServiceSpec{
  181. Selector: generateLabels(object, workerType),
  182. Ports: []v1.ServicePort{
  183. {
  184. // TODO: be clean, Port.Name is currently required by edgemesh(v1.8.0).
  185. // and should be <protocol>-<suffix>
  186. Name: "tcp-0",
  187. Protocol: "TCP",
  188. Port: servicePort,
  189. TargetPort: targetPort,
  190. },
  191. },
  192. },
  193. }
  194. service, err := kubeClient.CoreV1().Services(namespace).Create(ctx, serviceSpec, metav1.CreateOptions{})
  195. if err != nil {
  196. klog.Warningf("failed to create service for %v %v/%v, err:%s", kind, namespace, name, err)
  197. return "", err
  198. }
  199. klog.V(2).Infof("Service %s is created successfully for %v %v/%v", service.Name, kind, namespace, name)
  200. return fmt.Sprintf("%s.%s", service.Name, service.Namespace), nil
  201. }
  202. // CreateDeploymentWithTemplate creates and returns a deployment object given a crd object, deployment template
  203. func CreateDeploymentWithTemplate(client kubernetes.Interface, object CommonInterface, spec *appsv1.DeploymentSpec, workerParam *WorkerParam) (*appsv1.Deployment, error) {
  204. objectKind := object.GroupVersionKind()
  205. objectName := object.GetNamespace() + "/" + object.GetName()
  206. deployment := newDeployment(object, spec, workerParam)
  207. injectDeploymentParam(deployment, workerParam, object)
  208. createdDeployment, err := client.AppsV1().Deployments(object.GetNamespace()).Create(context.TODO(), deployment, metav1.CreateOptions{})
  209. if err != nil {
  210. klog.Warningf("failed to create deployment for %s %s, err:%s", objectKind, objectName, err)
  211. return nil, err
  212. }
  213. klog.V(2).Infof("deployment %s is created successfully for %s %s", createdDeployment.Name, objectKind, objectName)
  214. return createdDeployment, nil
  215. }
  216. // UpdateDeploymentWithTemplate updates an existing deployment object given a crd object, deployment template, and worker parameters
  217. func UpdateDeploymentWithTemplate(client kubernetes.Interface, object CommonInterface, newDeployment *appsv1.Deployment, workerParam *WorkerParam) (*appsv1.Deployment, error) {
  218. objectKind := object.GroupVersionKind()
  219. objectName := object.GetNamespace() + "/" + object.GetName()
  220. // Inject worker parameters.
  221. injectDeploymentParam(newDeployment, workerParam, object)
  222. // Call the Kubernetes API to perform the update.
  223. updatedDeployment, err := client.AppsV1().Deployments(newDeployment.Namespace).Update(context.TODO(), newDeployment, metav1.UpdateOptions{})
  224. if err != nil {
  225. klog.Warningf("failed to update deployment for %s %s, err: %s", objectKind, objectName, err)
  226. return nil, fmt.Errorf("failed to update deployment: %w", err)
  227. }
  228. klog.V(2).Infof("deployment %s is updated successfully for %s %s", updatedDeployment.Name, objectKind, objectName)
  229. return updatedDeployment, nil
  230. }
  231. func CreateHPA(client kubernetes.Interface, object CommonInterface, kind, scaleTargetRefName, workerType string, hpa *sednav1.HPA) error {
  232. hpaName := "hpa-" + scaleTargetRefName
  233. newHPA := &autoscalingv2.HorizontalPodAutoscaler{
  234. ObjectMeta: metav1.ObjectMeta{
  235. Name: hpaName,
  236. Namespace: object.GetNamespace(),
  237. OwnerReferences: []metav1.OwnerReference{
  238. *metav1.NewControllerRef(object, object.GroupVersionKind()),
  239. },
  240. Labels: generateLabels(object, workerType),
  241. },
  242. Spec: autoscalingv2.HorizontalPodAutoscalerSpec{
  243. MaxReplicas: hpa.MaxReplicas,
  244. Metrics: hpa.Metrics,
  245. MinReplicas: hpa.MinReplicas,
  246. ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
  247. APIVersion: "apps/v1",
  248. Kind: kind,
  249. Name: scaleTargetRefName,
  250. },
  251. Behavior: hpa.Behavior,
  252. },
  253. }
  254. _, err := client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Create(context.TODO(), newHPA, metav1.CreateOptions{})
  255. if err != nil {
  256. return fmt.Errorf("failed to create hpa for %s %s, err: %s", kind, hpaName, err)
  257. }
  258. return nil
  259. }
  260. func UpdateHPA(client kubernetes.Interface, object CommonInterface, kind, scaleTargetRefName, workerType string, hpa *sednav1.HPA) error {
  261. // get existing HPA
  262. hpaName := "hpa-" + scaleTargetRefName
  263. existingHPA, err := client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Get(context.TODO(), hpaName, metav1.GetOptions{})
  264. if err != nil {
  265. // create HPA if not found
  266. if errors.IsNotFound(err) {
  267. klog.Info("hpa not found, creating new hpa...")
  268. return CreateHPA(client, object, kind, scaleTargetRefName, workerType, hpa)
  269. }
  270. return fmt.Errorf("failed to get hpa for %s %s, err: %s", kind, hpaName, err)
  271. }
  272. // update HPA
  273. existingHPA.ObjectMeta.Labels = generateLabels(object, workerType)
  274. existingHPA.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
  275. *metav1.NewControllerRef(object, object.GroupVersionKind()),
  276. }
  277. existingHPA.Spec.MaxReplicas = hpa.MaxReplicas
  278. existingHPA.Spec.MinReplicas = hpa.MinReplicas
  279. existingHPA.Spec.Metrics = hpa.Metrics
  280. existingHPA.Spec.ScaleTargetRef = autoscalingv2.CrossVersionObjectReference{
  281. APIVersion: "apps/v1",
  282. Kind: kind,
  283. Name: scaleTargetRefName,
  284. }
  285. existingHPA.Spec.Behavior = hpa.Behavior
  286. // update HPA
  287. _, err = client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Update(context.TODO(), existingHPA, metav1.UpdateOptions{})
  288. if err != nil {
  289. return fmt.Errorf("failed to update hpa for %s %s, err: %s", kind, hpaName, err)
  290. }
  291. return nil
  292. }
  293. func DeleteHPA(client kubernetes.Interface, namespace, name string) error {
  294. // check if HPA exists
  295. _, err := client.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(context.TODO(), name, metav1.GetOptions{})
  296. if err != nil {
  297. // Return nil if HPA not found
  298. if errors.IsNotFound(err) {
  299. return nil
  300. }
  301. return fmt.Errorf("failed to get hpa %s in namespace %s, err: %s", name, namespace, err)
  302. }
  303. // delete HPA
  304. err = client.AutoscalingV2().HorizontalPodAutoscalers(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{})
  305. if err != nil {
  306. return fmt.Errorf("failed to delete hpa %s in namespace %s, err: %s", name, namespace, err)
  307. }
  308. return nil
  309. }
  310. func newDeployment(object CommonInterface, spec *appsv1.DeploymentSpec, workerParam *WorkerParam) *appsv1.Deployment {
  311. nameSpace := object.GetNamespace()
  312. deploymentName := object.GetName() + "-" + "deployment" + "-" + strings.ToLower(workerParam.WorkerType)
  313. matchLabel := make(map[string]string)
  314. return &appsv1.Deployment{
  315. ObjectMeta: metav1.ObjectMeta{
  316. Name: deploymentName,
  317. Namespace: nameSpace,
  318. OwnerReferences: []metav1.OwnerReference{
  319. *metav1.NewControllerRef(object, object.GroupVersionKind()),
  320. },
  321. },
  322. Spec: appsv1.DeploymentSpec{
  323. Replicas: (*spec).Replicas,
  324. Template: (*spec).Template,
  325. Selector: &metav1.LabelSelector{
  326. MatchLabels: matchLabel,
  327. },
  328. },
  329. }
  330. }
  331. // injectDeploymentParam modifies deployment in-place
  332. func injectDeploymentParam(deployment *appsv1.Deployment, workerParam *WorkerParam, object CommonInterface) {
  333. var appLabelKey = "app.sedna.io"
  334. var appLabelValue = object.GetName() + "-" + workerParam.WorkerType + "-" + "svc"
  335. // Injection of the storage variables must be done before loading
  336. // the environment variables!
  337. if workerParam.Mounts != nil {
  338. InjectStorageInitializerDeployment(deployment, workerParam)
  339. }
  340. // inject our labels
  341. if deployment.Labels == nil {
  342. deployment.Labels = make(map[string]string)
  343. }
  344. if deployment.Spec.Template.Labels == nil {
  345. deployment.Spec.Template.Labels = make(map[string]string)
  346. }
  347. if deployment.Spec.Selector.MatchLabels == nil {
  348. deployment.Spec.Selector.MatchLabels = make(map[string]string)
  349. }
  350. for k, v := range generateLabels(object, workerParam.WorkerType) {
  351. deployment.Labels[k] = v
  352. deployment.Spec.Template.Labels[k] = v
  353. deployment.Spec.Selector.MatchLabels[k] = v
  354. }
  355. // Edgemesh part, useful for service mapping (not necessary!)
  356. deployment.Labels[appLabelKey] = appLabelValue
  357. deployment.Spec.Template.Labels[appLabelKey] = appLabelValue
  358. deployment.Spec.Selector.MatchLabels[appLabelKey] = appLabelValue
  359. // Env variables injection
  360. envs := createEnvVars(workerParam.Env)
  361. for idx := range deployment.Spec.Template.Spec.Containers {
  362. deployment.Spec.Template.Spec.Containers[idx].Env = append(
  363. deployment.Spec.Template.Spec.Containers[idx].Env, envs...,
  364. )
  365. }
  366. }
  367. // createEnvVars creates EnvMap for container
  368. // include EnvName and EnvValue map for stage of creating a pod
  369. func createEnvVars(envMap map[string]string) []v1.EnvVar {
  370. var envVars []v1.EnvVar
  371. for envName, envValue := range envMap {
  372. Env := v1.EnvVar{
  373. Name: envName,
  374. Value: envValue,
  375. }
  376. envVars = append(envVars, Env)
  377. }
  378. return envVars
  379. }
  380. // injectModelHotUpdateMount injects volume mounts when worker supports hot update of model
  381. func injectModelHotUpdateMount(pod *v1.Pod, object CommonInterface) {
  382. hostPathType := v1.HostPathDirectoryOrCreate
  383. var volumes []v1.Volume
  384. var volumeMounts []v1.VolumeMount
  385. modelHotUpdateHostDir, _ := filepath.Split(GetModelHotUpdateConfigFile(object, ModelHotUpdateHostPrefix))
  386. volumeName := ConvertK8SValidName(ModelHotUpdateVolumeName)
  387. volumes = append(volumes, v1.Volume{
  388. Name: volumeName,
  389. VolumeSource: v1.VolumeSource{
  390. HostPath: &v1.HostPathVolumeSource{
  391. Path: modelHotUpdateHostDir,
  392. Type: &hostPathType,
  393. },
  394. },
  395. })
  396. volumeMounts = append(volumeMounts, v1.VolumeMount{
  397. MountPath: ModelHotUpdateContainerPrefix,
  398. Name: volumeName,
  399. })
  400. injectVolume(pod, volumes, volumeMounts)
  401. }
  402. func GetModelHotUpdateConfigFile(object CommonInterface, prefix string) string {
  403. return strings.ToLower(filepath.Join(prefix, object.GetNamespace(), object.GetObjectKind().GroupVersionKind().Kind,
  404. object.GetName(), ModelHotUpdateConfigFile))
  405. }
  406. // setModelHotUpdateEnv sets envs of model hot update
  407. func setModelHotUpdateEnv(workerParam *WorkerParam) {
  408. workerParam.Env["MODEL_HOT_UPDATE"] = "true"
  409. workerParam.Env["MODEL_POLL_PERIOD_SECONDS"] = strconv.FormatInt(workerParam.ModelHotUpdate.PollPeriodSeconds, 10)
  410. workerParam.Env["MODEL_HOT_UPDATE_CONFIG"] = filepath.Join(ModelHotUpdateContainerPrefix, ModelHotUpdateConfigFile)
  411. }