|
- /*
- Copyright 2021 The KubeEdge Authors.
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-
- package federatedlearning
-
- import (
- "context"
- "encoding/json"
- "fmt"
-
- sednav1 "github.com/kubeedge/sedna/pkg/apis/sedna/v1alpha1"
- "github.com/kubeedge/sedna/pkg/globalmanager/runtime"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- )
-
- func (c *Controller) updateModelMetrics(jobName, namespace string, metrics []sednav1.Metric) error {
- var err error
- job, err := c.client.FederatedLearningJobs(namespace).Get(context.TODO(), jobName, metav1.GetOptions{})
- if err != nil {
- // federated crd not found
- return err
- }
- modelName := job.Spec.AggregationWorker.Model.Name
- client := c.client.Models(namespace)
-
- return runtime.RetryUpdateStatus(modelName, namespace, func() error {
- model, err := client.Get(context.TODO(), modelName, metav1.GetOptions{})
- if err != nil {
- return err
- }
-
- now := metav1.Now()
- model.Status.UpdateTime = &now
- model.Status.Metrics = metrics
- _, err = client.UpdateStatus(context.TODO(), model, metav1.UpdateOptions{})
- return err
- })
- }
-
- func (c *Controller) appendStatusCondition(name, namespace string, cond sednav1.FLJobCondition) error {
- client := c.client.FederatedLearningJobs(namespace)
-
- return runtime.RetryUpdateStatus(name, namespace, (func() error {
- job, err := client.Get(context.TODO(), name, metav1.GetOptions{})
- if err != nil {
- return err
- }
- job.Status.Conditions = append(job.Status.Conditions, cond)
- _, err = client.UpdateStatus(context.TODO(), job, metav1.UpdateOptions{})
- return err
- }))
- }
-
- // updateFromEdge updates the federated job's status
- func (c *Controller) updateFromEdge(name, namespace, operation string, content []byte) (err error) {
- // JobInfo defines the job information
- type JobInfo struct {
- // Current training round
- CurrentRound int `json:"currentRound"`
- UpdateTime string `json:"updateTime"`
- }
-
- // Output defines job output information
- type Output struct {
- Models []runtime.Model `json:"models"`
- JobInfo *JobInfo `json:"ownerInfo"`
- }
-
- var status struct {
- Phase string `json:"phase"`
- Status string `json:"status"`
- Output *Output `json:"output"`
- }
-
- err = json.Unmarshal(content, &status)
- if err != nil {
- return
- }
-
- output := status.Output
-
- if output != nil {
- // Update the model's metrics
- if len(output.Models) > 0 {
- // only one model
- model := output.Models[0]
- metrics := runtime.ConvertMapToMetrics(model.Metrics)
- if len(metrics) > 0 {
- c.updateModelMetrics(name, namespace, metrics)
- }
- }
-
- jobInfo := output.JobInfo
- // update job info if having any info
- if jobInfo != nil && jobInfo.CurrentRound > 0 {
- // Find a good place to save the progress info
- // TODO: more meaningful reason/message
- reason := "DoTraining"
- message := fmt.Sprintf("Round %v reaches at %s", jobInfo.CurrentRound, jobInfo.UpdateTime)
- cond := NewJobCondition(sednav1.FLJobCondTraining, reason, message)
- c.appendStatusCondition(name, namespace, cond)
- }
- }
-
- return nil
- }
-
- func (c *Controller) SetUpstreamHandler(addFunc runtime.UpstreamHandlerAddFunc) error {
- return addFunc(KindName, c.updateFromEdge)
- }
|