Browse Source

incremental-learning: fix dataset relative path

Signed-off-by: llhuii <liulinghui@huawei.com>
tags/v0.1.0
llhuii 4 years ago
parent
commit
14e384b849
3 changed files with 22 additions and 14 deletions
  1. +1
    -0
      lib/sedna/common/config.py
  2. +4
    -1
      lib/sedna/dataset/dataset.py
  3. +17
    -13
      pkg/globalmanager/incrementallearningjob.go

+ 1
- 0
lib/sedna/common/config.py View File

@@ -18,6 +18,7 @@ import os
class BaseConfig:
"""The base config, the value can not be changed."""
# dataset
original_dataset_url = os.getenv("ORIGINAL_DATASET_URL")
train_dataset_url = os.getenv("TRAIN_DATASET_URL")
test_dataset_url = os.getenv("TEST_DATASET_URL")
data_path_prefix = os.getenv("DATA_PATH_PREFIX", "/home/data")


+ 4
- 1
lib/sedna/dataset/dataset.py View File

@@ -63,7 +63,10 @@ def load_test_dataset(data_format, preprocess_fun=None, **kwargs):

def _load_txt_dataset(dataset_url):
LOG.info(f'dataset_url is {dataset_url}, now reading dataset_url')
root_path = os.path.dirname(dataset_url)

# use original dataset url,
# see https://github.com/kubeedge/sedna/issues/35
root_path = os.path.dirname(BaseConfig.original_dataset_url or dataset_url)
with open(dataset_url) as f:
lines = f.readlines()
new_lines = [root_path + os.path.sep + l for l in lines]


+ 17
- 13
pkg/globalmanager/incrementallearningjob.go View File

@@ -564,6 +564,7 @@ func (jc *IncrementalJobController) createPod(job *sednav1.IncrementalLearningJo
basemodelConPath := dataPrefix + basemodelPath
deploymodelConPath := dataPrefix + deploymodelPath
outputConPath := dataPrefix + outputDir
originalDatasetPathInContainer := dataPrefix + datasetPath
var workerPara *WorkerPara = new(WorkerPara)
if podtype == sednav1.ILJobTrain {
workerPara.workerType = "Train"
@@ -579,13 +580,15 @@ func (jc *IncrementalJobController) createPod(job *sednav1.IncrementalLearningJo
workerPara.volumeList = []string{datasetParent, basemodelPath, deploymodelPath, outputDir}
workerPara.volumeMapName = []string{"data", "base-model", "deploy-model", "output-dir"}
workerPara.env = map[string]string{
"TRAIN_DATASET_URL": trainDataURL,
"MODEL_URL": outputModelURL,
"BASE_MODEL_URL": preModelURL,
"NAMESPACE": job.Namespace,
"JOB_NAME": job.Name,
"WORKER_NAME": "train-worker-" + utilrand.String(5),
"LC_SERVER": jc.cfg.LC.Server,
// see https://github.com/kubeedge/sedna/issues/35
"ORIGINAL_DATASET_URL": originalDatasetPathInContainer,
"TRAIN_DATASET_URL": trainDataURL,
"MODEL_URL": outputModelURL,
"BASE_MODEL_URL": preModelURL,
"NAMESPACE": job.Namespace,
"JOB_NAME": job.Name,
"WORKER_NAME": "train-worker-" + utilrand.String(5),
"LC_SERVER": jc.cfg.LC.Server,
}
} else {
podTemplate = &job.Spec.EvalSpec.Template
@@ -600,12 +603,13 @@ func (jc *IncrementalJobController) createPod(job *sednav1.IncrementalLearningJo
workerPara.volumeList = []string{datasetParent, basemodelPath, deploymodelPath, outputDir}
workerPara.volumeMapName = []string{"data", "base-model", "deploy-model", "output-dir"}
workerPara.env = map[string]string{
"TEST_DATASET_URL": evalDataURL,
"MODEL_URLS": modelForEval,
"NAMESPACE": job.Namespace,
"JOB_NAME": job.Name,
"WORKER_NAME": "eval-worker-" + utilrand.String(5),
"LC_SERVER": jc.cfg.LC.Server,
"ORIGINAL_DATASET_URL": originalDatasetPathInContainer,
"TEST_DATASET_URL": evalDataURL,
"MODEL_URLS": modelForEval,
"NAMESPACE": job.Namespace,
"JOB_NAME": job.Name,
"WORKER_NAME": "eval-worker-" + utilrand.String(5),
"LC_SERVER": jc.cfg.LC.Server,
}
}
// create pod based on podtype


Loading…
Cancel
Save