Browse Source

feature: hpa for jointinference

Signed-off-by: ming.tang <ming.tang@daocloud.io>
tags/v0.7.0
ming.tang 9 months ago
parent
commit
27cc953a0f
15 changed files with 81542 additions and 61340 deletions
  1. +15
    -18
      build/crds/sedna.io_datasets.yaml
  2. +4451
    -3180
      build/crds/sedna.io_featureextractionservices.yaml
  3. +8810
    -6944
      build/crds/sedna.io_federatedlearningjobs.yaml
  4. +13198
    -10241
      build/crds/sedna.io_incrementallearningjobs.yaml
  5. +10066
    -6872
      build/crds/sedna.io_jointinferenceservices.yaml
  6. +13191
    -10235
      build/crds/sedna.io_lifelonglearningjobs.yaml
  7. +15
    -17
      build/crds/sedna.io_models.yaml
  8. +13268
    -10409
      build/crds/sedna.io_objectsearchservices.yaml
  9. +8881
    -7008
      build/crds/sedna.io_objecttrackingservices.yaml
  10. +4728
    -3208
      build/crds/sedna.io_reidjobs.yaml
  11. +4726
    -3206
      build/crds/sedna.io_videoanalyticsjobs.yaml
  12. +40
    -0
      pkg/apis/sedna/v1alpha1/jointinferenceservice_types.go
  13. +44
    -0
      pkg/apis/sedna/v1alpha1/zz_generated.deepcopy.go
  14. +18
    -2
      pkg/globalmanager/controllers/jointinference/jointinferenceservice.go
  15. +91
    -0
      pkg/globalmanager/runtime/worker.go

+ 15
- 18
build/crds/sedna.io_datasets.yaml View File

@@ -1,11 +1,9 @@

---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
controller-gen.kubebuilder.io/version: v0.15.0
name: datasets.sedna.io
spec:
group: sedna.io
@@ -22,14 +20,19 @@ spec:
description: Dataset describes the data that a dataset resource should have
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
description: |-
APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
description: |-
Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
@@ -50,9 +53,9 @@ spec:
- url
type: object
status:
description: DatasetStatus represents information about the status of
a dataset including the time a dataset updated, and number of samples
in a dataset
description: |-
DatasetStatus represents information about the status of a dataset
including the time a dataset updated, and number of samples in a dataset
properties:
numberOfSamples:
type: integer
@@ -69,9 +72,3 @@ spec:
storage: true
subresources:
status: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

+ 4451
- 3180
build/crds/sedna.io_featureextractionservices.yaml
File diff suppressed because it is too large
View File


+ 8810
- 6944
build/crds/sedna.io_federatedlearningjobs.yaml
File diff suppressed because it is too large
View File


+ 13198
- 10241
build/crds/sedna.io_incrementallearningjobs.yaml
File diff suppressed because it is too large
View File


+ 10066
- 6872
build/crds/sedna.io_jointinferenceservices.yaml
File diff suppressed because it is too large
View File


+ 13191
- 10235
build/crds/sedna.io_lifelonglearningjobs.yaml
File diff suppressed because it is too large
View File


+ 15
- 17
build/crds/sedna.io_models.yaml View File

@@ -1,11 +1,9 @@

---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.4.1
creationTimestamp: null
controller-gen.kubebuilder.io/version: v0.15.0
name: models.sedna.io
spec:
group: sedna.io
@@ -22,14 +20,19 @@ spec:
description: Model describes the data that a model resource should have
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
description: |-
APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
description: |-
Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
@@ -51,8 +54,9 @@ spec:
- url
type: object
status:
description: ModelStatus represents information about the status of a
model including the time a model updated, and metrics in a model
description: |-
ModelStatus represents information about the status of a model
including the time a model updated, and metrics in a model
properties:
metrics:
items:
@@ -79,9 +83,3 @@ spec:
storage: true
subresources:
status: {}
status:
acceptedNames:
kind: ""
plural: ""
conditions: []
storedVersions: []

+ 13268
- 10409
build/crds/sedna.io_objectsearchservices.yaml
File diff suppressed because it is too large
View File


+ 8881
- 7008
build/crds/sedna.io_objecttrackingservices.yaml
File diff suppressed because it is too large
View File


+ 4728
- 3208
build/crds/sedna.io_reidjobs.yaml
File diff suppressed because it is too large
View File


+ 4726
- 3206
build/crds/sedna.io_videoanalyticsjobs.yaml
File diff suppressed because it is too large
View File


+ 40
- 0
pkg/apis/sedna/v1alpha1/jointinferenceservice_types.go View File

@@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1

import (
autoscalingv2 "k8s.io/api/autoscaling/v2"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@@ -47,12 +48,51 @@ type EdgeWorker struct {
Model SmallModel `json:"model"`
HardExampleMining HardExampleMining `json:"hardExampleMining"`
Template v1.PodTemplateSpec `json:"template"`

// HPA describes the desired functionality of the HorizontalPodAutoscaler.
// +optional
HPA *HPA `json:"hpa"`
}

// CloudWorker describes the data a cloud worker should have
type CloudWorker struct {
Model BigModel `json:"model"`
Template v1.PodTemplateSpec `json:"template"`

// HPA describes the desired functionality of the HorizontalPodAutoscaler.
// +optional
HPA *HPA `json:"hpa"`
}

// HPA describes the desired functionality of the HorizontalPodAutoscaler.
type HPA struct {
// minReplicas is the lower limit for the number of replicas to which the autoscaler
// can scale down. It defaults to 1 pod. minReplicas is allowed to be 0 if the
// alpha feature gate HPAScaleToZero is enabled and at least one Object or External
// metric is configured. Scaling is active as long as at least one metric value is
// available.
// +optional
MinReplicas *int32 `json:"minReplicas,omitempty"`

// maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale up.
// It cannot be less that minReplicas.
MaxReplicas int32 `json:"maxReplicas"`

// metrics contains the specifications for which to use to calculate the
// desired replica count (the maximum replica count across all metrics will
// be used). The desired replica count is calculated multiplying the
// ratio between the target value and the current value by the current
// number of pods. Ergo, metrics used must decrease as the pod count is
// increased, and vice-versa. See the individual metric source types for
// more information about how each type of metric must respond.
// +optional
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`

// behavior configures the scaling behavior of the target
// in both Up and Down directions (scaleUp and scaleDown fields respectively).
// If not set, the default HPAScalingRules for scale up and scale down are used.
// +optional
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
}

// SmallModel describes the small model


+ 44
- 0
pkg/apis/sedna/v1alpha1/zz_generated.deepcopy.go View File

@@ -22,6 +22,7 @@ limitations under the License.
package v1alpha1

import (
v2 "k8s.io/api/autoscaling/v2"
runtime "k8s.io/apimachinery/pkg/runtime"
)

@@ -122,6 +123,11 @@ func (in *CloudWorker) DeepCopyInto(out *CloudWorker) {
*out = *in
out.Model = in.Model
in.Template.DeepCopyInto(&out.Template)
if in.HPA != nil {
in, out := &in.HPA, &out.HPA
*out = new(HPA)
(*in).DeepCopyInto(*out)
}
return
}

@@ -290,6 +296,11 @@ func (in *EdgeWorker) DeepCopyInto(out *EdgeWorker) {
out.Model = in.Model
in.HardExampleMining.DeepCopyInto(&out.HardExampleMining)
in.Template.DeepCopyInto(&out.Template)
if in.HPA != nil {
in, out := &in.HPA, &out.HPA
*out = new(HPA)
(*in).DeepCopyInto(*out)
}
return
}

@@ -606,6 +617,39 @@ func (in *FederatedLearningJobList) DeepCopyObject() runtime.Object {
return nil
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HPA) DeepCopyInto(out *HPA) {
*out = *in
if in.MinReplicas != nil {
in, out := &in.MinReplicas, &out.MinReplicas
*out = new(int32)
**out = **in
}
if in.Metrics != nil {
in, out := &in.Metrics, &out.Metrics
*out = make([]v2.MetricSpec, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Behavior != nil {
in, out := &in.Behavior, &out.Behavior
*out = new(v2.HorizontalPodAutoscalerBehavior)
(*in).DeepCopyInto(*out)
}
return
}

// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HPA.
func (in *HPA) DeepCopy() *HPA {
if in == nil {
return nil
}
out := new(HPA)
in.DeepCopyInto(out)
return out
}

// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HardExampleMining) DeepCopyInto(out *HardExampleMining) {
*out = *in


+ 18
- 2
pkg/globalmanager/controllers/jointinference/jointinferenceservice.go View File

@@ -478,13 +478,17 @@ func (c *Controller) updateInferenceServices(old, cur interface{}) error {
func (c *Controller) createOrUpdateWorker(service *sednav1.JointInferenceService, workerType string, bigModelHost string, bigModelPort int32, create bool) error {
var modelName string
var modelTemplate v1.PodTemplateSpec
var hpa *sednav1.HPA
var workerParam runtime.WorkerParam

deploymentName := service.GetName() + "-" + "deployment" + "-" + strings.ToLower(workerType)

// Set the corresponding parameters according to the workerType.
switch workerType {
case jointInferenceForCloud:
modelName = service.Spec.CloudWorker.Model.Name
modelTemplate = *service.Spec.CloudWorker.Template.DeepCopy()
hpa = service.Spec.CloudWorker.HPA.DeepCopy()

workerParam.Env = map[string]string{
"BIG_MODEL_BIND_PORT": strconv.Itoa(int(bigModelPort)),
@@ -494,6 +498,7 @@ func (c *Controller) createOrUpdateWorker(service *sednav1.JointInferenceService
case jointInferenceForEdge:
modelName = service.Spec.EdgeWorker.Model.Name
modelTemplate = *service.Spec.EdgeWorker.Template.DeepCopy()
hpa = service.Spec.EdgeWorker.HPA.DeepCopy()

HEMParameterJSON, _ := json.Marshal(service.Spec.EdgeWorker.HardExampleMining.Parameters)
HEMParameterString := string(HEMParameterJSON)
@@ -537,19 +542,30 @@ func (c *Controller) createOrUpdateWorker(service *sednav1.JointInferenceService
workerParam.Env["SERVICE_NAME"] = service.Name
workerParam.Env["WORKER_NAME"] = strings.ToLower(workerType) + "worker-" + utilrand.String(5)

// Set the group version kind.
service.SetGroupVersionKind(gvk)

// Create or update Deployment.
if create {
_, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, &appsv1.DeploymentSpec{Template: modelTemplate}, &workerParam)
// create HPA
if hpa != nil {
return runtime.CreateHPA(c.kubeClient, service, "Deployment", deploymentName, workerType, hpa)
}
} else {
service.SetGroupVersionKind(gvk)
workerName := service.Name + "-deployment-" + strings.ToLower(workerType)
existingDeployment, err := c.deploymentsLister.Deployments(service.Namespace).Get(workerName)
if err != nil {
return fmt.Errorf("get %s Deployment failed:%v", strings.ToLower(workerType), err)
return fmt.Errorf("get %s Deployment failed: %v", strings.ToLower(workerType), err)
}
newDeployment := existingDeployment.DeepCopy()
newDeployment.Spec.Template = modelTemplate
_, err = runtime.UpdateDeploymentWithTemplate(c.kubeClient, service, newDeployment, &workerParam)
// update HPA
if hpa != nil {
return runtime.UpdateHPA(c.kubeClient, service, "Deployment", deploymentName, workerType, hpa)
}
return runtime.DeleteHPA(c.kubeClient, service.GetNamespace(), "hpa-"+deploymentName)
}
return err
}


+ 91
- 0
pkg/globalmanager/runtime/worker.go View File

@@ -3,6 +3,9 @@ package runtime
import (
"context"
"fmt"
sednav1 "github.com/kubeedge/sedna/pkg/apis/sedna/v1alpha1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
"k8s.io/apimachinery/pkg/api/errors"
"path/filepath"
"strconv"
"strings"
@@ -267,6 +270,94 @@ func UpdateDeploymentWithTemplate(client kubernetes.Interface, object CommonInte
return updatedDeployment, nil
}

func CreateHPA(client kubernetes.Interface, object CommonInterface, kind, scaleTargetRefName, workerType string, hpa *sednav1.HPA) error {
hpaName := "hpa-" + scaleTargetRefName
newHPA := &autoscalingv2.HorizontalPodAutoscaler{
ObjectMeta: metav1.ObjectMeta{
Name: hpaName,
Namespace: object.GetNamespace(),
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(object, object.GroupVersionKind()),
},
Labels: generateLabels(object, workerType),
},
Spec: autoscalingv2.HorizontalPodAutoscalerSpec{
MaxReplicas: hpa.MaxReplicas,
Metrics: hpa.Metrics,
MinReplicas: hpa.MinReplicas,
ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
APIVersion: "apps/v1",
Kind: kind,
Name: scaleTargetRefName,
},
Behavior: hpa.Behavior,
},
}
_, err := client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Create(context.TODO(), newHPA, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("failed to create hpa for %s %s, err: %s", kind, hpaName, err)
}
return nil
}

func UpdateHPA(client kubernetes.Interface, object CommonInterface, kind, scaleTargetRefName, workerType string, hpa *sednav1.HPA) error {
// get existing HPA
hpaName := "hpa-" + scaleTargetRefName
existingHPA, err := client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Get(context.TODO(), hpaName, metav1.GetOptions{})
if err != nil {
// create HPA if not found
if errors.IsNotFound(err) {
klog.Info("hpa not found, creating new hpa...")
return CreateHPA(client, object, kind, scaleTargetRefName, workerType, hpa)
}
return fmt.Errorf("failed to get hpa for %s %s, err: %s", kind, hpaName, err)
}

// update HPA
existingHPA.ObjectMeta.Labels = generateLabels(object, workerType)
existingHPA.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
*metav1.NewControllerRef(object, object.GroupVersionKind()),
}
existingHPA.Spec.MaxReplicas = hpa.MaxReplicas
existingHPA.Spec.MinReplicas = hpa.MinReplicas
existingHPA.Spec.Metrics = hpa.Metrics
existingHPA.Spec.ScaleTargetRef = autoscalingv2.CrossVersionObjectReference{
APIVersion: "apps/v1",
Kind: kind,
Name: scaleTargetRefName,
}
existingHPA.Spec.Behavior = hpa.Behavior

// update HPA
_, err = client.AutoscalingV2().HorizontalPodAutoscalers(object.GetNamespace()).Update(context.TODO(), existingHPA, metav1.UpdateOptions{})
if err != nil {
return fmt.Errorf("failed to update hpa for %s %s, err: %s", kind, hpaName, err)
}

return nil
}

func DeleteHPA(client kubernetes.Interface, namespace, name string) error {
// check if HPA exists
_, err := client.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
// Return nil if HPA not found
if errors.IsNotFound(err) {
return nil
}

return fmt.Errorf("failed to get hpa %s in namespace %s, err: %s", name, namespace, err)
}

// delete HPA
err = client.AutoscalingV2().HorizontalPodAutoscalers(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("failed to delete hpa %s in namespace %s, err: %s", name, namespace, err)
}

return nil
}

func newDeployment(object CommonInterface, spec *appsv1.DeploymentSpec, workerParam *WorkerParam) *appsv1.Deployment {
nameSpace := object.GetNamespace()
deploymentName := object.GetName() + "-" + "deployment" + "-" + strings.ToLower(workerParam.WorkerType)


Loading…
Cancel
Save