From 14e384b849544fc93e7a8412196fdce54cf8e5ab Mon Sep 17 00:00:00 2001 From: llhuii Date: Mon, 29 Mar 2021 17:24:57 +0800 Subject: [PATCH] incremental-learning: fix dataset relative path Signed-off-by: llhuii --- lib/sedna/common/config.py | 1 + lib/sedna/dataset/dataset.py | 5 +++- pkg/globalmanager/incrementallearningjob.go | 30 ++++++++++++--------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/lib/sedna/common/config.py b/lib/sedna/common/config.py index a67e586d..619bd6a3 100644 --- a/lib/sedna/common/config.py +++ b/lib/sedna/common/config.py @@ -18,6 +18,7 @@ import os class BaseConfig: """The base config, the value can not be changed.""" # dataset + original_dataset_url = os.getenv("ORIGINAL_DATASET_URL") train_dataset_url = os.getenv("TRAIN_DATASET_URL") test_dataset_url = os.getenv("TEST_DATASET_URL") data_path_prefix = os.getenv("DATA_PATH_PREFIX", "/home/data") diff --git a/lib/sedna/dataset/dataset.py b/lib/sedna/dataset/dataset.py index 870f8f9c..22b1d2ab 100644 --- a/lib/sedna/dataset/dataset.py +++ b/lib/sedna/dataset/dataset.py @@ -63,7 +63,10 @@ def load_test_dataset(data_format, preprocess_fun=None, **kwargs): def _load_txt_dataset(dataset_url): LOG.info(f'dataset_url is {dataset_url}, now reading dataset_url') - root_path = os.path.dirname(dataset_url) + + # use original dataset url, + # see https://github.com/kubeedge/sedna/issues/35 + root_path = os.path.dirname(BaseConfig.original_dataset_url or dataset_url) with open(dataset_url) as f: lines = f.readlines() new_lines = [root_path + os.path.sep + l for l in lines] diff --git a/pkg/globalmanager/incrementallearningjob.go b/pkg/globalmanager/incrementallearningjob.go index 6ac2f9cb..c3c6377c 100644 --- a/pkg/globalmanager/incrementallearningjob.go +++ b/pkg/globalmanager/incrementallearningjob.go @@ -564,6 +564,7 @@ func (jc *IncrementalJobController) createPod(job *sednav1.IncrementalLearningJo basemodelConPath := dataPrefix + basemodelPath deploymodelConPath := dataPrefix + deploymodelPath outputConPath := dataPrefix + outputDir + originalDatasetPathInContainer := dataPrefix + datasetPath var workerPara *WorkerPara = new(WorkerPara) if podtype == sednav1.ILJobTrain { workerPara.workerType = "Train" @@ -579,13 +580,15 @@ func (jc *IncrementalJobController) createPod(job *sednav1.IncrementalLearningJo workerPara.volumeList = []string{datasetParent, basemodelPath, deploymodelPath, outputDir} workerPara.volumeMapName = []string{"data", "base-model", "deploy-model", "output-dir"} workerPara.env = map[string]string{ - "TRAIN_DATASET_URL": trainDataURL, - "MODEL_URL": outputModelURL, - "BASE_MODEL_URL": preModelURL, - "NAMESPACE": job.Namespace, - "JOB_NAME": job.Name, - "WORKER_NAME": "train-worker-" + utilrand.String(5), - "LC_SERVER": jc.cfg.LC.Server, + // see https://github.com/kubeedge/sedna/issues/35 + "ORIGINAL_DATASET_URL": originalDatasetPathInContainer, + "TRAIN_DATASET_URL": trainDataURL, + "MODEL_URL": outputModelURL, + "BASE_MODEL_URL": preModelURL, + "NAMESPACE": job.Namespace, + "JOB_NAME": job.Name, + "WORKER_NAME": "train-worker-" + utilrand.String(5), + "LC_SERVER": jc.cfg.LC.Server, } } else { podTemplate = &job.Spec.EvalSpec.Template @@ -600,12 +603,13 @@ func (jc *IncrementalJobController) createPod(job *sednav1.IncrementalLearningJo workerPara.volumeList = []string{datasetParent, basemodelPath, deploymodelPath, outputDir} workerPara.volumeMapName = []string{"data", "base-model", "deploy-model", "output-dir"} workerPara.env = map[string]string{ - "TEST_DATASET_URL": evalDataURL, - "MODEL_URLS": modelForEval, - "NAMESPACE": job.Namespace, - "JOB_NAME": job.Name, - "WORKER_NAME": "eval-worker-" + utilrand.String(5), - "LC_SERVER": jc.cfg.LC.Server, + "ORIGINAL_DATASET_URL": originalDatasetPathInContainer, + "TEST_DATASET_URL": evalDataURL, + "MODEL_URLS": modelForEval, + "NAMESPACE": job.Namespace, + "JOB_NAME": job.Name, + "WORKER_NAME": "eval-worker-" + utilrand.String(5), + "LC_SERVER": jc.cfg.LC.Server, } } // create pod based on podtype