Browse Source

Merge pull request '智算网络NPU训练任务优化--取消代码下载时对外网的依赖' (#3029) from npu-internet-limit into V20221019

Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/3029
Reviewed-by: ychao_1983 <ychao_1983@sina.com>
tags/v1.22.10.1^2
ychao_1983 3 years ago
parent
commit
ec7a5c1cbf
3 changed files with 17 additions and 5 deletions
  1. +2
    -0
      models/cloudbrain.go
  2. +11
    -0
      modules/grampus/grampus.go
  3. +4
    -5
      routers/repo/grampus.go

+ 2
- 0
models/cloudbrain.go View File

@@ -1490,6 +1490,8 @@ type GrampusTasks struct {
ReplicaNum int `json:"replicaNum"`
Datasets []GrampusDataset `json:"datasets"`
Models []GrampusDataset `json:"models"`
Code GrampusDataset `json:"code"`
BootFile string `json:"bootFile"`
}

type GrampusDataset struct {


+ 11
- 0
modules/grampus/grampus.go View File

@@ -1,6 +1,7 @@
package grampus

import (
"code.gitea.io/gitea/modules/cloudbrain"
"encoding/json"
"strings"

@@ -73,6 +74,7 @@ type GenerateTrainJobReq struct {
PreTrainModelPath string
PreTrainModelUrl string
Spec *models.Specification
CodeName string
}

func getEndPoint() string {
@@ -102,6 +104,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
centerID, centerName := getCentersParamter(ctx, req)

var datasetGrampus, modelGrampus []models.GrampusDataset
var codeGrampus models.GrampusDataset
if ProcessorTypeNPU == req.ProcessType {
datasetGrampus = getDatasetGrampus(req.DatasetInfos)
if len(req.ModelName) != 0 {
@@ -114,6 +117,12 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
},
}
}
codeGrampus = models.GrampusDataset{
Name: req.CodeName,
Bucket: setting.Bucket,
EndPoint: getEndPoint(),
ObjectKey: req.CodeObsPath + cloudbrain.DefaultBranchName + ".zip",
}
}

jobResult, err := createJob(models.CreateGrampusJobRequest{
@@ -130,6 +139,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
ReplicaNum: 1,
Datasets: datasetGrampus,
Models: modelGrampus,
Code: codeGrampus,
BootFile: req.BootFile,
},
},
})


+ 4
- 5
routers/repo/grampus.go View File

@@ -713,6 +713,7 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
DatasetNames: datasetNames,
DatasetInfos: datasetInfos,
Spec: spec,
CodeName: strings.ToLower(repo.Name),
}
if form.ModelName != "" { //使用预训练模型训练
req.ModelName = form.ModelName
@@ -977,8 +978,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject, setting.Grampus.SyncScriptProject)
//download code & dataset
if processorType == grampus.ProcessorTypeNPU {
commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + ";"
command += commandDownload
//no need to download code & dataset by internet
} else if processorType == grampus.ProcessorTypeGPU {
commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " '" + dataRemotePath + "' '" + datasetName + "'"
commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload)
@@ -987,8 +987,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo

//unzip code & dataset
if processorType == grampus.ProcessorTypeNPU {
commandUnzip := "cd " + workDir + "code;unzip -q master.zip;"
command += commandUnzip
//no need to process
} else if processorType == grampus.ProcessorTypeGPU {
unZipDatasetCommand := generateDatasetUnzipCommand(datasetName)
commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand
@@ -1025,7 +1024,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo

var commandCode string
if processorType == grampus.ProcessorTypeNPU {
commandCode = "/bin/bash /home/work/run_train_for_openi.sh " + workDir + "code/" + strings.ToLower(repoName) + "/" + bootFile + " /tmp/log/train.log" + paramCode + ";"
commandCode = "/bin/bash /home/work/run_train_for_openi.sh /home/work/openi.py /tmp/log/train.log" + paramCode + ";"
} else if processorType == grampus.ProcessorTypeGPU {
if pretrainModelFileName != "" {
paramCode += " --ckpt_url" + "=" + workDir + "pretrainmodel/" + pretrainModelFileName


Loading…
Cancel
Save