feature: hpa for jointinference

Signed-off-by: ming.tang <ming.tang@daocloud.io>
9 months ago · 27cc953a0f
--- a/build/crds/sedna.io_datasets.yaml
+++ b/build/crds/sedna.io_datasets.yaml
@@ -1,11 +1,9 @@

 ---
 apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
  annotations:
    controller-gen.kubebuilder.io/version: v0.4.1
  creationTimestamp: null
    controller-gen.kubebuilder.io/version: v0.15.0
  name: datasets.sedna.io
 spec:
  group: sedna.io
@@ -22,14 +20,19 @@ spec:
        description: Dataset describes the data that a dataset resource should have
        properties:
          apiVersion:
            description: 'APIVersion defines the versioned schema of this representation
              of an object. Servers should convert recognized schemas to the latest
              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
            description: |-
              APIVersion defines the versioned schema of this representation of an object.
              Servers should convert recognized schemas to the latest internal value, and
              may reject unrecognized values.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
            type: string
          kind:
            description: 'Kind is a string value representing the REST resource this
              object represents. Servers may infer this from the endpoint the client
              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
            description: |-
              Kind is a string value representing the REST resource this object represents.
              Servers may infer this from the endpoint the client submits requests to.
              Cannot be updated.
              In CamelCase.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
            type: string
          metadata:
            type: object
@@ -50,9 +53,9 @@ spec:
            - url
            type: object
          status:
            description: DatasetStatus represents information about the status of
              a dataset including the time a dataset updated, and number of samples
              in a dataset
            description: |-
              DatasetStatus represents information about the status of a dataset
              including the time a dataset updated, and number of samples in a dataset
            properties:
              numberOfSamples:
                type: integer
@@ -69,9 +72,3 @@ spec:
    storage: true
    subresources:
      status: {}
 status:
  acceptedNames:
    kind: ""
    plural: ""
  conditions: []
  storedVersions: []
--- a/build/crds/sedna.io_featureextractionservices.yaml
+++ b/build/crds/sedna.io_featureextractionservices.yaml
--- a/build/crds/sedna.io_federatedlearningjobs.yaml
+++ b/build/crds/sedna.io_federatedlearningjobs.yaml
--- a/build/crds/sedna.io_incrementallearningjobs.yaml
+++ b/build/crds/sedna.io_incrementallearningjobs.yaml
--- a/build/crds/sedna.io_jointinferenceservices.yaml
+++ b/build/crds/sedna.io_jointinferenceservices.yaml
--- a/build/crds/sedna.io_lifelonglearningjobs.yaml
+++ b/build/crds/sedna.io_lifelonglearningjobs.yaml
--- a/build/crds/sedna.io_models.yaml
+++ b/build/crds/sedna.io_models.yaml
@@ -1,11 +1,9 @@

 ---
 apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
  annotations:
    controller-gen.kubebuilder.io/version: v0.4.1
  creationTimestamp: null
    controller-gen.kubebuilder.io/version: v0.15.0
  name: models.sedna.io
 spec:
  group: sedna.io
@@ -22,14 +20,19 @@ spec:
        description: Model describes the data that a model resource should have
        properties:
          apiVersion:
            description: 'APIVersion defines the versioned schema of this representation
              of an object. Servers should convert recognized schemas to the latest
              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
            description: |-
              APIVersion defines the versioned schema of this representation of an object.
              Servers should convert recognized schemas to the latest internal value, and
              may reject unrecognized values.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
            type: string
          kind:
            description: 'Kind is a string value representing the REST resource this
              object represents. Servers may infer this from the endpoint the client
              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
            description: |-
              Kind is a string value representing the REST resource this object represents.
              Servers may infer this from the endpoint the client submits requests to.
              Cannot be updated.
              In CamelCase.
              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
            type: string
          metadata:
            type: object
@@ -51,8 +54,9 @@ spec:
            - url
            type: object
          status:
            description: ModelStatus represents information about the status of a
              model including the time a model updated, and metrics in a model
            description: |-
              ModelStatus represents information about the status of a model
              including the time a model updated, and metrics in a model
            properties:
              metrics:
                items:
@@ -79,9 +83,3 @@ spec:
    storage: true
    subresources:
      status: {}
 status:
  acceptedNames:
    kind: ""
    plural: ""
  conditions: []
  storedVersions: []
--- a/build/crds/sedna.io_objectsearchservices.yaml
+++ b/build/crds/sedna.io_objectsearchservices.yaml
--- a/build/crds/sedna.io_objecttrackingservices.yaml
+++ b/build/crds/sedna.io_objecttrackingservices.yaml
--- a/build/crds/sedna.io_reidjobs.yaml
+++ b/build/crds/sedna.io_reidjobs.yaml
--- a/build/crds/sedna.io_videoanalyticsjobs.yaml
+++ b/build/crds/sedna.io_videoanalyticsjobs.yaml
--- a/pkg/apis/sedna/v1alpha1/jointinferenceservice_types.go
+++ b/pkg/apis/sedna/v1alpha1/jointinferenceservice_types.go
@@ -17,6 +17,7 @@ limitations under the License.
 package v1alpha1

 import (
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
@@ -47,12 +48,51 @@ type EdgeWorker struct {
 	Model             SmallModel         `json:"model"`
 	HardExampleMining HardExampleMining  `json:"hardExampleMining"`
 	Template          v1.PodTemplateSpec `json:"template"`

 	// HPA describes the desired functionality of the HorizontalPodAutoscaler.
 	// +optional
 	HPA *HPA `json:"hpa"`
 }

 // CloudWorker describes the data a cloud worker should have
 type CloudWorker struct {
 	Model    BigModel           `json:"model"`
 	Template v1.PodTemplateSpec `json:"template"`

 	// HPA describes the desired functionality of the HorizontalPodAutoscaler.
 	// +optional
 	HPA *HPA `json:"hpa"`
 }

 // HPA describes the desired functionality of the HorizontalPodAutoscaler.
 type HPA struct {
 	// minReplicas is the lower limit for the number of replicas to which the autoscaler
 	// can scale down.  It defaults to 1 pod.  minReplicas is allowed to be 0 if the
 	// alpha feature gate HPAScaleToZero is enabled and at least one Object or External
 	// metric is configured.  Scaling is active as long as at least one metric value is
 	// available.
 	// +optional
 	MinReplicas *int32 `json:"minReplicas,omitempty"`

 	// maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale up.
 	// It cannot be less that minReplicas.
 	MaxReplicas int32 `json:"maxReplicas"`

 	// metrics contains the specifications for which to use to calculate the
 	// desired replica count (the maximum replica count across all metrics will
 	// be used).  The desired replica count is calculated multiplying the
 	// ratio between the target value and the current value by the current
 	// number of pods.  Ergo, metrics used must decrease as the pod count is
 	// increased, and vice-versa.  See the individual metric source types for
 	// more information about how each type of metric must respond.
 	// +optional
 	Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`

 	// behavior configures the scaling behavior of the target
 	// in both Up and Down directions (scaleUp and scaleDown fields respectively).
 	// If not set, the default HPAScalingRules for scale up and scale down are used.
 	// +optional
 	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
 }

 // SmallModel describes the small model
--- a/pkg/apis/sedna/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/apis/sedna/v1alpha1/zz_generated.deepcopy.go
@@ -22,6 +22,7 @@ limitations under the License.
 package v1alpha1

 import (
 	v2 "k8s.io/api/autoscaling/v2"
 	runtime "k8s.io/apimachinery/pkg/runtime"
 )

@@ -122,6 +123,11 @@ func (in *CloudWorker) DeepCopyInto(out *CloudWorker) {
 	*out = *in
 	out.Model = in.Model
 	in.Template.DeepCopyInto(&out.Template)
 	if in.HPA != nil {
 		in, out := &in.HPA, &out.HPA
 		*out = new(HPA)
 		(*in).DeepCopyInto(*out)
 	}
 	return
 }

@@ -290,6 +296,11 @@ func (in *EdgeWorker) DeepCopyInto(out *EdgeWorker) {
 	out.Model = in.Model
 	in.HardExampleMining.DeepCopyInto(&out.HardExampleMining)
 	in.Template.DeepCopyInto(&out.Template)
 	if in.HPA != nil {
 		in, out := &in.HPA, &out.HPA
 		*out = new(HPA)
 		(*in).DeepCopyInto(*out)
 	}
 	return
 }

@@ -606,6 +617,39 @@ func (in *FederatedLearningJobList) DeepCopyObject() runtime.Object {
 	return nil
 }

 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *HPA) DeepCopyInto(out *HPA) {
 	*out = *in
 	if in.MinReplicas != nil {
 		in, out := &in.MinReplicas, &out.MinReplicas
 		*out = new(int32)
 		**out = **in
 	}
 	if in.Metrics != nil {
 		in, out := &in.Metrics, &out.Metrics
 		*out = make([]v2.MetricSpec, len(*in))
 		for i := range *in {
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
 	}
 	if in.Behavior != nil {
 		in, out := &in.Behavior, &out.Behavior
 		*out = new(v2.HorizontalPodAutoscalerBehavior)
 		(*in).DeepCopyInto(*out)
 	}
 	return
 }

 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HPA.
 func (in *HPA) DeepCopy() *HPA {
 	if in == nil {
 		return nil
 	}
 	out := new(HPA)
 	in.DeepCopyInto(out)
 	return out
 }

 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *HardExampleMining) DeepCopyInto(out *HardExampleMining) {
 	*out = *in
--- a/pkg/globalmanager/controllers/jointinference/jointinferenceservice.go
+++ b/pkg/globalmanager/controllers/jointinference/jointinferenceservice.go
@@ -478,13 +478,17 @@ func (c *Controller) updateInferenceServices(old, cur interface{}) error {
 func (c *Controller) createOrUpdateWorker(service *sednav1.JointInferenceService, workerType string, bigModelHost string, bigModelPort int32, create bool) error {
 	var modelName string
 	var modelTemplate v1.PodTemplateSpec
 	var hpa *sednav1.HPA
 	var workerParam runtime.WorkerParam

 	deploymentName := service.GetName() + "-" + "deployment" + "-" + strings.ToLower(workerType)

 	// Set the corresponding parameters according to the workerType.
 	switch workerType {
 	case jointInferenceForCloud:
 		modelName = service.Spec.CloudWorker.Model.Name
 		modelTemplate = *service.Spec.CloudWorker.Template.DeepCopy()
 		hpa = service.Spec.CloudWorker.HPA.DeepCopy()

 		workerParam.Env = map[string]string{
 			"BIG_MODEL_BIND_PORT": strconv.Itoa(int(bigModelPort)),
@@ -494,6 +498,7 @@ func (c *Controller) createOrUpdateWorker(service *sednav1.JointInferenceService
 	case jointInferenceForEdge:
 		modelName = service.Spec.EdgeWorker.Model.Name
 		modelTemplate = *service.Spec.EdgeWorker.Template.DeepCopy()
 		hpa = service.Spec.EdgeWorker.HPA.DeepCopy()

 		HEMParameterJSON, _ := json.Marshal(service.Spec.EdgeWorker.HardExampleMining.Parameters)
 		HEMParameterString := string(HEMParameterJSON)
@@ -537,19 +542,30 @@ func (c *Controller) createOrUpdateWorker(service *sednav1.JointInferenceService
 	workerParam.Env["SERVICE_NAME"] = service.Name
 	workerParam.Env["WORKER_NAME"] = strings.ToLower(workerType) + "worker-" + utilrand.String(5)

 	// Set the group version kind.
 	service.SetGroupVersionKind(gvk)

 	// Create or update Deployment.
 	if create {
 		_, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, &appsv1.DeploymentSpec{Template: modelTemplate}, &workerParam)
 		// create HPA
 		if hpa != nil {
 			return runtime.CreateHPA(c.kubeClient, service, "Deployment", deploymentName, workerType, hpa)
 		}
 	} else {
 		service.SetGroupVersionKind(gvk)
 		workerName := service.Name + "-deployment-" + strings.ToLower(workerType)
 		existingDeployment, err := c.deploymentsLister.Deployments(service.Namespace).Get(workerName)
 		if err != nil {
 			return fmt.Errorf("get %s Deployment failed:%v", strings.ToLower(workerType), err)
 			return fmt.Errorf("get %s Deployment failed: %v", strings.ToLower(workerType), err)
 		}
 		newDeployment := existingDeployment.DeepCopy()
 		newDeployment.Spec.Template = modelTemplate
 		_, err = runtime.UpdateDeploymentWithTemplate(c.kubeClient, service, newDeployment, &workerParam)
 		// update HPA
 		if hpa != nil {
 			return runtime.UpdateHPA(c.kubeClient, service, "Deployment", deploymentName, workerType, hpa)
 		}
 		return runtime.DeleteHPA(c.kubeClient, service.GetNamespace(), "hpa-"+deploymentName)
 	}
 	return err
 }
--- a/pkg/globalmanager/runtime/worker.go
+++ b/pkg/globalmanager/runtime/worker.go
@@ -3,6 +3,9 @@ package runtime
 import (
 	"context"
 	"fmt"
 	sednav1 "github.com/kubeedge/sedna/pkg/apis/sedna/v1alpha1"
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	"k8s.io/apimachinery/pkg/api/errors"
 	"path/filepath"
 	"strconv"
 	"strings"
@@ -267,6 +270,94 @@ func UpdateDeploymentWithTemplate(client kubernetes.Interface, object CommonInte
 	return updatedDeployment, nil
 }

 func CreateHPA(client kubernetes.Interface, object CommonInterface, kind, scaleTargetRefName, workerType string, hpa *sednav1.HPA) error {
 	hpaName := "hpa-" + scaleTargetRefName
 	newHPA := &autoscalingv2.HorizontalPodAutoscaler{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      hpaName,
 			Namespace: object.GetNamespace(),
 			OwnerReferences: []metav1.OwnerReference{
 				*metav1.NewControllerRef(object, object.GroupVersionKind()),
 			},
 			Labels: generateLabels(object, workerType),
 		},
 		Spec: autoscalingv2.HorizontalPodAutoscalerSpec{
 			MaxReplicas: hpa.MaxReplicas,
 			Metrics:     hpa.Metrics,
 			MinReplicas: hpa.MinReplicas,
 			ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
 				APIVersion: "apps/v1",
 				Kind:       kind,
 				Name:       scaleTargetRefName,
 			},
 			Behavior: hpa.Behavior,
 		},
 	}
 	_, err := client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Create(context.TODO(), newHPA, metav1.CreateOptions{})
 	if err != nil {
 		return fmt.Errorf("failed to create hpa for %s %s, err: %s", kind, hpaName, err)
 	}
 	return nil
 }

 func UpdateHPA(client kubernetes.Interface, object CommonInterface, kind, scaleTargetRefName, workerType string, hpa *sednav1.HPA) error {
 	// get existing HPA
 	hpaName := "hpa-" + scaleTargetRefName
 	existingHPA, err := client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Get(context.TODO(), hpaName, metav1.GetOptions{})
 	if err != nil {
 		// create HPA if not found
 		if errors.IsNotFound(err) {
 			klog.Info("hpa not found, creating new hpa...")
 			return CreateHPA(client, object, kind, scaleTargetRefName, workerType, hpa)
 		}
 		return fmt.Errorf("failed to get hpa for %s %s, err: %s", kind, hpaName, err)
 	}

 	// update HPA
 	existingHPA.ObjectMeta.Labels = generateLabels(object, workerType)
 	existingHPA.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
 		*metav1.NewControllerRef(object, object.GroupVersionKind()),
 	}
 	existingHPA.Spec.MaxReplicas = hpa.MaxReplicas
 	existingHPA.Spec.MinReplicas = hpa.MinReplicas
 	existingHPA.Spec.Metrics = hpa.Metrics
 	existingHPA.Spec.ScaleTargetRef = autoscalingv2.CrossVersionObjectReference{
 		APIVersion: "apps/v1",
 		Kind:       kind,
 		Name:       scaleTargetRefName,
 	}
 	existingHPA.Spec.Behavior = hpa.Behavior

 	// update HPA
 	_, err = client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Update(context.TODO(), existingHPA, metav1.UpdateOptions{})
 	if err != nil {
 		return fmt.Errorf("failed to update hpa for %s %s, err: %s", kind, hpaName, err)
 	}

 	return nil
 }

 func DeleteHPA(client kubernetes.Interface, namespace, name string) error {
 	// check if HPA exists
 	_, err := client.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(context.TODO(), name, metav1.GetOptions{})
 	if err != nil {
 		// Return nil if HPA not found
 		if errors.IsNotFound(err) {
 			return nil
 		}

 		return fmt.Errorf("failed to get hpa %s in namespace %s, err: %s", name, namespace, err)
 	}

 	// delete HPA
 	err = client.AutoscalingV2().HorizontalPodAutoscalers(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{})
 	if err != nil {
 		return fmt.Errorf("failed to delete hpa %s in namespace %s, err: %s", name, namespace, err)
 	}

 	return nil
 }

 func newDeployment(object CommonInterface, spec *appsv1.DeploymentSpec, workerParam *WorkerParam) *appsv1.Deployment {
 	nameSpace := object.GetNamespace()
 	deploymentName := object.GetName() + "-" + "deployment" + "-" + strings.ToLower(workerParam.WorkerType)