You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

downstream.go 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. /*
  2. Copyright 2021 The KubeEdge Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package incrementallearning
  14. import (
  15. "context"
  16. "fmt"
  17. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  18. "k8s.io/apimachinery/pkg/watch"
  19. "k8s.io/klog/v2"
  20. sednav1 "github.com/kubeedge/sedna/pkg/apis/sedna/v1alpha1"
  21. "github.com/kubeedge/sedna/pkg/globalmanager/runtime"
  22. )
  23. // syncModelWithName will sync the model to the specified node.
  24. // Now called when creating the incrementaljob.
  25. func (c *Controller) syncModelWithName(nodeName, modelName, namespace string) error {
  26. model, err := c.client.Models(namespace).Get(context.TODO(), modelName, metav1.GetOptions{})
  27. if err != nil {
  28. // TODO: maybe use err.ErrStatus.Code == 404
  29. return fmt.Errorf("model(%s/%s) not found", namespace, modelName)
  30. }
  31. // Since model.Kind may be empty,
  32. // we need to fix the kind here if missing.
  33. // more details at https://github.com/kubernetes/kubernetes/issues/3030
  34. if len(model.Kind) == 0 {
  35. model.Kind = "Model"
  36. }
  37. runtime.InjectSecretAnnotations(c.kubeClient, model, model.Spec.CredentialName)
  38. c.sendToEdgeFunc(nodeName, watch.Added, model)
  39. return nil
  40. }
  41. func (c *Controller) syncToEdge(eventType watch.EventType, obj interface{}) error {
  42. job, ok := obj.(*sednav1.IncrementalLearningJob)
  43. if !ok {
  44. return nil
  45. }
  46. // Since Kind may be empty,
  47. // we need to fix the kind here if missing.
  48. // more details at https://github.com/kubernetes/kubernetes/issues/3030
  49. job.Kind = KindName
  50. jobConditions := job.Status.Conditions
  51. if len(jobConditions) == 0 {
  52. return nil
  53. }
  54. dataName := job.Spec.Dataset.Name
  55. ds, err := c.client.Datasets(job.Namespace).Get(context.TODO(), dataName, metav1.GetOptions{})
  56. if err != nil {
  57. return fmt.Errorf("dataset(%s/%s) not found", job.Namespace, dataName)
  58. }
  59. // LC has dataset object on this node that may call dataset node
  60. dsNodeName := ds.Spec.NodeName
  61. var trainNodeName string
  62. var evalNodeName string
  63. ann := job.GetAnnotations()
  64. if ann != nil {
  65. trainNodeName = ann[runtime.AnnotationsKeyPrefix+string(sednav1.ILJobTrain)]
  66. evalNodeName = ann[runtime.AnnotationsKeyPrefix+string(sednav1.ILJobEval)]
  67. }
  68. if eventType == watch.Deleted {
  69. // delete jobs from all LCs
  70. for _, v := range []string{dsNodeName, trainNodeName, evalNodeName} {
  71. if v != "" {
  72. c.sendToEdgeFunc(v, eventType, job)
  73. }
  74. }
  75. return nil
  76. }
  77. latestCondition := jobConditions[len(jobConditions)-1]
  78. currentType := latestCondition.Type
  79. jobStage := latestCondition.Stage
  80. syncModelWithName := func(modelName string) {
  81. if err := c.syncModelWithName(dsNodeName, modelName, job.Namespace); err != nil {
  82. klog.Warningf("Error to sync model %s when sync incremental learning job %s to node %s: %v",
  83. modelName, job.Name, dsNodeName, err)
  84. }
  85. }
  86. syncJobWithNodeName := func(nodeName string) {
  87. if err := c.sendToEdgeFunc(nodeName, eventType, job); err != nil {
  88. klog.Warningf("Error to sync incremental learning job %s to node %s in stage %s: %v",
  89. job.Name, nodeName, jobStage, err)
  90. }
  91. }
  92. runtime.InjectSecretAnnotations(c.kubeClient, job, job.Spec.CredentialName)
  93. doJobStageEvent := func(modelName string, nodeName string) {
  94. if currentType == sednav1.ILJobStageCondWaiting {
  95. syncJobWithNodeName(dsNodeName)
  96. syncModelWithName(modelName)
  97. } else if currentType == sednav1.ILJobStageCondRunning {
  98. if nodeName != "" {
  99. syncJobWithNodeName(nodeName)
  100. }
  101. } else if currentType == sednav1.ILJobStageCondCompleted || currentType == sednav1.ILJobStageCondFailed {
  102. if nodeName != dsNodeName {
  103. // delete LC's job from nodeName that's different from dataset node when worker's status is completed or failed.
  104. c.sendToEdgeFunc(nodeName, watch.Deleted, job)
  105. }
  106. }
  107. }
  108. switch jobStage {
  109. case sednav1.ILJobTrain:
  110. doJobStageEvent(job.Spec.InitialModel.Name, trainNodeName)
  111. case sednav1.ILJobEval:
  112. doJobStageEvent(job.Spec.DeploySpec.Model.Name, evalNodeName)
  113. }
  114. return nil
  115. }
  116. func (c *Controller) SetDownstreamSendFunc(f runtime.DownstreamSendFunc) error {
  117. c.sendToEdgeFunc = f
  118. return nil
  119. }