You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

modelarts.go 48 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago

  1. package repo
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "io"
  6. "io/ioutil"
  7. "net/http"
  8. "os"
  9. "path"
  10. "strconv"
  11. "strings"
  12. "time"
  13. "code.gitea.io/gitea/models"
  14. "code.gitea.io/gitea/modules/auth"
  15. "code.gitea.io/gitea/modules/base"
  16. "code.gitea.io/gitea/modules/context"
  17. "code.gitea.io/gitea/modules/git"
  18. "code.gitea.io/gitea/modules/log"
  19. "code.gitea.io/gitea/modules/modelarts"
  20. "code.gitea.io/gitea/modules/obs"
  21. "code.gitea.io/gitea/modules/setting"
  22. "code.gitea.io/gitea/modules/storage"
  23. "github.com/unknwon/com"
  24. )
  25. const (
  26. // tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  27. tplModelArtsNotebookIndex base.TplName = "repo/modelarts/notebook/index"
  28. tplModelArtsNotebookNew base.TplName = "repo/modelarts/notebook/new"
  29. tplModelArtsNotebookShow base.TplName = "repo/modelarts/notebook/show"
  30. tplModelArtsIndex base.TplName = "repo/modelarts/index"
  31. tplModelArtsNew base.TplName = "repo/modelarts/new"
  32. tplModelArtsShow base.TplName = "repo/modelarts/show"
  33. tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index"
  34. tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new"
  35. tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show"
  36. tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index"
  37. tplModelArtsTrainJobVersionNew base.TplName = "repo/modelarts/trainjob/version_new"
  38. )
  39. // MustEnableDataset check if repository enable internal cb
  40. func MustEnableModelArts(ctx *context.Context) {
  41. if !ctx.Repo.CanRead(models.UnitTypeCloudBrain) {
  42. ctx.NotFound("MustEnableCloudbrain", nil)
  43. return
  44. }
  45. }
  46. func ModelArtsIndex(ctx *context.Context) {
  47. MustEnableModelArts(ctx)
  48. repo := ctx.Repo.Repository
  49. page := ctx.QueryInt("page")
  50. if page <= 0 {
  51. page = 1
  52. }
  53. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  54. ListOptions: models.ListOptions{
  55. Page: page,
  56. PageSize: setting.UI.IssuePagingNum,
  57. },
  58. RepoID: repo.ID,
  59. Type: models.TypeCloudBrainTwo,
  60. })
  61. if err != nil {
  62. ctx.ServerError("Cloudbrain", err)
  63. return
  64. }
  65. for i, task := range ciTasks {
  66. if task.Status == string(models.JobRunning) {
  67. ciTasks[i].CanDebug = true
  68. } else {
  69. ciTasks[i].CanDebug = false
  70. }
  71. ciTasks[i].CanDel = models.CanDelJob(ctx.IsSigned, ctx.User, task)
  72. }
  73. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  74. pager.SetDefaultParams(ctx)
  75. ctx.Data["Page"] = pager
  76. ctx.Data["PageIsCloudBrain"] = true
  77. ctx.Data["Tasks"] = ciTasks
  78. ctx.HTML(200, tplModelArtsIndex)
  79. }
  80. func ModelArtsNew(ctx *context.Context) {
  81. ctx.Data["PageIsCloudBrain"] = true
  82. t := time.Now()
  83. var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  84. ctx.Data["job_name"] = jobName
  85. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  86. if err != nil {
  87. ctx.ServerError("GetAllUserAttachments failed:", err)
  88. return
  89. }
  90. ctx.Data["attachments"] = attachs
  91. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  92. ctx.Data["env"] = modelarts.NotebookEnv
  93. ctx.Data["notebook_type"] = modelarts.NotebookType
  94. if modelarts.FlavorInfos == nil {
  95. json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
  96. }
  97. ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
  98. ctx.HTML(200, tplModelArtsNew)
  99. }
  100. func ModelArtsCreate(ctx *context.Context, form auth.CreateModelArtsForm) {
  101. ctx.Data["PageIsCloudBrain"] = true
  102. jobName := form.JobName
  103. uuid := form.Attachment
  104. description := form.Description
  105. //repo := ctx.Repo.Repository
  106. if !jobNamePattern.MatchString(jobName) {
  107. ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplModelArtsNew, &form)
  108. return
  109. }
  110. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  111. if err != nil {
  112. ctx.RenderWithErr(err.Error(), tplModelArtsNew, &form)
  113. return
  114. }
  115. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  116. }
  117. func ModelArtsShow(ctx *context.Context) {
  118. ctx.Data["PageIsCloudBrain"] = true
  119. var jobID = ctx.Params(":jobid")
  120. task, err := models.GetCloudbrainByJobID(jobID)
  121. if err != nil {
  122. ctx.Data["error"] = err.Error()
  123. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  124. return
  125. }
  126. result, err := modelarts.GetJob(jobID)
  127. if err != nil {
  128. ctx.Data["error"] = err.Error()
  129. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  130. return
  131. }
  132. if result != nil {
  133. task.Status = result.Status
  134. err = models.UpdateJob(task)
  135. if err != nil {
  136. ctx.Data["error"] = err.Error()
  137. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  138. return
  139. }
  140. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  141. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  142. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  143. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  144. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  145. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  146. }
  147. ctx.Data["task"] = task
  148. ctx.Data["jobID"] = jobID
  149. ctx.Data["result"] = result
  150. ctx.HTML(200, tplModelArtsShow)
  151. }
  152. func ModelArtsDebug(ctx *context.Context) {
  153. var jobID = ctx.Params(":jobid")
  154. _, err := models.GetCloudbrainByJobID(jobID)
  155. if err != nil {
  156. ctx.ServerError("GetCloudbrainByJobID failed", err)
  157. return
  158. }
  159. result, err := modelarts.GetJob(jobID)
  160. if err != nil {
  161. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  162. return
  163. }
  164. res, err := modelarts.GetJobToken(jobID)
  165. if err != nil {
  166. ctx.RenderWithErr(err.Error(), tplModelArtsIndex, nil)
  167. return
  168. }
  169. urls := strings.Split(result.Spec.Annotations.Url, "/")
  170. urlPrefix := result.Spec.Annotations.TargetDomain
  171. for i, url := range urls {
  172. if i > 2 {
  173. urlPrefix += "/" + url
  174. }
  175. }
  176. //urlPrefix := result.Spec.Annotations.TargetDomain + "/modelarts/internal/hub/notebook/user/" + task.JobID
  177. log.Info(urlPrefix)
  178. debugUrl := urlPrefix + "?token=" + res.Token
  179. ctx.Redirect(debugUrl)
  180. }
  181. func ModelArtsStop(ctx *context.Context) {
  182. var jobID = ctx.Params(":jobid")
  183. log.Info(jobID)
  184. task, err := models.GetCloudbrainByJobID(jobID)
  185. if err != nil {
  186. ctx.ServerError("GetCloudbrainByJobID failed", err)
  187. return
  188. }
  189. if task.Status != string(models.JobRunning) {
  190. log.Error("the job(%s) is not running", task.JobName)
  191. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  192. return
  193. }
  194. param := models.NotebookAction{
  195. Action: models.ActionStop,
  196. }
  197. res, err := modelarts.StopJob(jobID, param)
  198. if err != nil {
  199. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  200. ctx.ServerError("StopJob failed", err)
  201. return
  202. }
  203. task.Status = res.CurrentStatus
  204. err = models.UpdateJob(task)
  205. if err != nil {
  206. ctx.ServerError("UpdateJob failed", err)
  207. return
  208. }
  209. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  210. }
  211. func ModelArtsDel(ctx *context.Context) {
  212. var jobID = ctx.Params(":jobid")
  213. task, err := models.GetCloudbrainByJobID(jobID)
  214. if err != nil {
  215. ctx.ServerError("GetCloudbrainByJobID failed", err)
  216. return
  217. }
  218. if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) {
  219. log.Error("the job(%s) has not been stopped", task.JobName)
  220. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  221. return
  222. }
  223. _, err = modelarts.DelJob(jobID)
  224. if err != nil {
  225. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  226. ctx.ServerError("DelJob failed", err)
  227. return
  228. }
  229. err = models.DeleteJob(task)
  230. if err != nil {
  231. ctx.ServerError("DeleteJob failed", err)
  232. return
  233. }
  234. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts")
  235. }
  236. func NotebookIndex(ctx *context.Context) {
  237. MustEnableModelArts(ctx)
  238. repo := ctx.Repo.Repository
  239. page := ctx.QueryInt("page")
  240. if page <= 0 {
  241. page = 1
  242. }
  243. ciTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  244. ListOptions: models.ListOptions{
  245. Page: page,
  246. PageSize: setting.UI.IssuePagingNum,
  247. },
  248. RepoID: repo.ID,
  249. Type: models.TypeCloudBrainTwo,
  250. JobType: string(models.JobTypeDebug),
  251. })
  252. if err != nil {
  253. ctx.ServerError("Cloudbrain", err)
  254. return
  255. }
  256. for i, task := range ciTasks {
  257. if task.Status == string(models.JobRunning) {
  258. ciTasks[i].CanDebug = true
  259. } else {
  260. ciTasks[i].CanDebug = false
  261. }
  262. }
  263. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  264. pager.SetDefaultParams(ctx)
  265. ctx.Data["Page"] = pager
  266. ctx.Data["PageIsCloudBrain"] = true
  267. ctx.Data["Tasks"] = ciTasks
  268. ctx.HTML(200, tplModelArtsNotebookIndex)
  269. }
  270. func NotebookNew(ctx *context.Context) {
  271. ctx.Data["PageIsCloudBrain"] = true
  272. t := time.Now()
  273. var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  274. ctx.Data["job_name"] = jobName
  275. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  276. if err != nil {
  277. ctx.ServerError("GetAllUserAttachments failed:", err)
  278. return
  279. }
  280. ctx.Data["attachments"] = attachs
  281. ctx.Data["dataset_path"] = modelarts.DataSetMountPath
  282. ctx.Data["env"] = modelarts.NotebookEnv
  283. ctx.Data["notebook_type"] = modelarts.NotebookType
  284. if modelarts.FlavorInfos == nil {
  285. json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
  286. }
  287. ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
  288. ctx.HTML(200, tplModelArtsNotebookNew)
  289. }
  290. func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
  291. ctx.Data["PageIsNotebook"] = true
  292. jobName := form.JobName
  293. uuid := form.Attachment
  294. description := form.Description
  295. err := modelarts.GenerateTask(ctx, jobName, uuid, description)
  296. if err != nil {
  297. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
  298. return
  299. }
  300. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  301. }
  302. func NotebookShow(ctx *context.Context) {
  303. ctx.Data["PageIsCloudBrain"] = true
  304. var jobID = ctx.Params(":jobid")
  305. task, err := models.GetCloudbrainByJobID(jobID)
  306. if err != nil {
  307. ctx.Data["error"] = err.Error()
  308. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  309. return
  310. }
  311. result, err := modelarts.GetJob(jobID)
  312. if err != nil {
  313. ctx.Data["error"] = err.Error()
  314. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  315. return
  316. }
  317. if result != nil {
  318. task.Status = result.Status
  319. err = models.UpdateJob(task)
  320. if err != nil {
  321. ctx.Data["error"] = err.Error()
  322. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
  323. return
  324. }
  325. createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
  326. result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
  327. endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
  328. result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
  329. result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  330. result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
  331. }
  332. ctx.Data["task"] = task
  333. ctx.Data["jobID"] = jobID
  334. ctx.Data["result"] = result
  335. ctx.HTML(200, tplModelArtsNotebookShow)
  336. }
  337. func NotebookDebug(ctx *context.Context) {
  338. var jobID = ctx.Params(":jobid")
  339. _, err := models.GetCloudbrainByJobID(jobID)
  340. if err != nil {
  341. ctx.ServerError("GetCloudbrainByJobID failed", err)
  342. return
  343. }
  344. result, err := modelarts.GetJob(jobID)
  345. if err != nil {
  346. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  347. return
  348. }
  349. res, err := modelarts.GetJobToken(jobID)
  350. if err != nil {
  351. ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
  352. return
  353. }
  354. urls := strings.Split(result.Spec.Annotations.Url, "/")
  355. urlPrefix := result.Spec.Annotations.TargetDomain
  356. for i, url := range urls {
  357. if i > 2 {
  358. urlPrefix += "/" + url
  359. }
  360. }
  361. debugUrl := urlPrefix + "?token=" + res.Token
  362. ctx.Redirect(debugUrl)
  363. }
  364. func NotebookStop(ctx *context.Context) {
  365. var jobID = ctx.Params(":jobid")
  366. log.Info(jobID)
  367. task, err := models.GetCloudbrainByJobID(jobID)
  368. if err != nil {
  369. ctx.ServerError("GetCloudbrainByJobID failed", err)
  370. return
  371. }
  372. if task.Status != string(models.JobRunning) {
  373. log.Error("the job(%s) is not running", task.JobName)
  374. ctx.ServerError("the job is not running", errors.New("the job is not running"))
  375. return
  376. }
  377. param := models.NotebookAction{
  378. Action: models.ActionStop,
  379. }
  380. res, err := modelarts.StopJob(jobID, param)
  381. if err != nil {
  382. log.Error("StopJob(%s) failed:%v", task.JobName, err.Error())
  383. ctx.ServerError("StopJob failed", err)
  384. return
  385. }
  386. task.Status = res.CurrentStatus
  387. err = models.UpdateJob(task)
  388. if err != nil {
  389. ctx.ServerError("UpdateJob failed", err)
  390. return
  391. }
  392. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  393. }
  394. func NotebookDel(ctx *context.Context) {
  395. var jobID = ctx.Params(":jobid")
  396. task, err := models.GetCloudbrainByJobID(jobID)
  397. if err != nil {
  398. ctx.ServerError("GetCloudbrainByJobID failed", err)
  399. return
  400. }
  401. if task.Status != string(models.JobStopped) {
  402. log.Error("the job(%s) has not been stopped", task.JobName)
  403. ctx.ServerError("the job has not been stopped", errors.New("the job has not been stopped"))
  404. return
  405. }
  406. _, err = modelarts.DelNotebook(jobID)
  407. if err != nil {
  408. log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
  409. ctx.ServerError("DelJob failed", err)
  410. return
  411. }
  412. err = models.DeleteJob(task)
  413. if err != nil {
  414. ctx.ServerError("DeleteJob failed", err)
  415. return
  416. }
  417. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/notebook")
  418. }
  419. func TrainJobIndex(ctx *context.Context) {
  420. MustEnableModelArts(ctx)
  421. repo := ctx.Repo.Repository
  422. page := ctx.QueryInt("page")
  423. if page <= 0 {
  424. page = 1
  425. }
  426. tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{
  427. ListOptions: models.ListOptions{
  428. Page: page,
  429. PageSize: setting.UI.IssuePagingNum,
  430. },
  431. RepoID: repo.ID,
  432. Type: models.TypeCloudBrainTwo,
  433. JobType: string(models.JobTypeTrain),
  434. IsLatestVersion: modelarts.IsLatestVersion,
  435. })
  436. if err != nil {
  437. ctx.ServerError("Cloudbrain", err)
  438. return
  439. }
  440. pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5)
  441. pager.SetDefaultParams(ctx)
  442. ctx.Data["Page"] = pager
  443. ctx.Data["PageIsCloudBrain"] = true
  444. ctx.Data["Tasks"] = tasks
  445. ctx.HTML(200, tplModelArtsTrainJobIndex)
  446. }
  447. func TrainJobNew(ctx *context.Context) {
  448. err := trainJobNewDataPrepare(ctx)
  449. if err != nil {
  450. ctx.ServerError("get new train-job info failed", err)
  451. return
  452. }
  453. ctx.HTML(200, tplModelArtsTrainJobNew)
  454. }
  455. func trainJobNewDataPrepare(ctx *context.Context) error {
  456. ctx.Data["PageIsCloudBrain"] = true
  457. //can, err := canUserCreateTrainJob(ctx.User.ID)
  458. //if err != nil {
  459. // ctx.ServerError("canUserCreateTrainJob", err)
  460. // return
  461. //}
  462. //
  463. //if !can {
  464. // log.Error("the user can not create train-job")
  465. // ctx.ServerError("the user can not create train-job", fmt.Errorf("the user can not create train-job"))
  466. // return
  467. //}
  468. t := time.Now()
  469. var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  470. ctx.Data["job_name"] = jobName
  471. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  472. if err != nil {
  473. ctx.ServerError("GetAllUserAttachments failed:", err)
  474. return err
  475. }
  476. ctx.Data["attachments"] = attachs
  477. var resourcePools modelarts.ResourcePool
  478. if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
  479. ctx.ServerError("json.Unmarshal failed:", err)
  480. return err
  481. }
  482. ctx.Data["resource_pools"] = resourcePools.Info
  483. var engines modelarts.Engine
  484. if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil {
  485. ctx.ServerError("json.Unmarshal failed:", err)
  486. return err
  487. }
  488. ctx.Data["engines"] = engines.Info
  489. var versionInfos modelarts.VersionInfo
  490. if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
  491. ctx.ServerError("json.Unmarshal failed:", err)
  492. return err
  493. }
  494. ctx.Data["engine_versions"] = versionInfos.Version
  495. var flavorInfos modelarts.Flavor
  496. if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
  497. ctx.ServerError("json.Unmarshal failed:", err)
  498. return err
  499. }
  500. ctx.Data["flavor_infos"] = flavorInfos.Info
  501. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  502. ctx.Data["train_url"] = outputObsPath
  503. Branches, err := ctx.Repo.GitRepo.GetBranches()
  504. if err != nil {
  505. ctx.ServerError("GetBranches error:", err)
  506. return err
  507. }
  508. ctx.Data["Branches"] = Branches
  509. ctx.Data["BranchesCount"] = len(Branches)
  510. configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
  511. if err != nil {
  512. ctx.ServerError("getConfigList failed:", err)
  513. return err
  514. }
  515. ctx.Data["config_list"] = configList.ParaConfigs
  516. return nil
  517. }
  518. func TrainJobNewVersion(ctx *context.Context) {
  519. err := trainJobNewVersionDataPrepare(ctx)
  520. if err != nil {
  521. ctx.ServerError("get new train-job info failed", err)
  522. return
  523. }
  524. ctx.HTML(200, tplModelArtsTrainJobVersionNew)
  525. }
  526. func trainJobNewVersionDataPrepare(ctx *context.Context) error {
  527. ctx.Data["PageIsCloudBrain"] = true
  528. var jobID = ctx.Params(":jobid")
  529. var versionName = ctx.Query("version_name")
  530. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  531. if err != nil {
  532. log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error())
  533. return err
  534. }
  535. t := time.Now()
  536. var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
  537. ctx.Data["job_name"] = task.JobName
  538. attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
  539. if err != nil {
  540. ctx.ServerError("GetAllUserAttachments failed:", err)
  541. return err
  542. }
  543. ctx.Data["attachments"] = attachs
  544. var resourcePools modelarts.ResourcePool
  545. if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
  546. ctx.ServerError("json.Unmarshal failed:", err)
  547. return err
  548. }
  549. ctx.Data["resource_pools"] = resourcePools.Info
  550. var engines modelarts.Engine
  551. if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil {
  552. ctx.ServerError("json.Unmarshal failed:", err)
  553. return err
  554. }
  555. ctx.Data["engines"] = engines.Info
  556. var versionInfos modelarts.VersionInfo
  557. if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
  558. ctx.ServerError("json.Unmarshal failed:", err)
  559. return err
  560. }
  561. ctx.Data["engine_versions"] = versionInfos.Version
  562. var flavorInfos modelarts.Flavor
  563. if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
  564. ctx.ServerError("json.Unmarshal failed:", err)
  565. return err
  566. }
  567. ctx.Data["flavor_infos"] = flavorInfos.Info
  568. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  569. ctx.Data["train_url"] = outputObsPath
  570. Branches, err := ctx.Repo.GitRepo.GetBranches()
  571. if err != nil {
  572. ctx.ServerError("GetBranches error:", err)
  573. return err
  574. }
  575. ctx.Data["branches"] = Branches
  576. ctx.Data["branch_name"] = task.BranchName
  577. ctx.Data["description"] = task.Description
  578. ctx.Data["boot_file"] = task.BootFile
  579. ctx.Data["dataset_name"] = task.DatasetName
  580. ctx.Data["params"] = task.Parameters
  581. ctx.Data["work_server_number"] = task.WorkServerNumber
  582. ctx.Data["flavor_name"] = task.FlavorName
  583. configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
  584. if err != nil {
  585. ctx.ServerError("getConfigList failed:", err)
  586. return err
  587. }
  588. ctx.Data["config_list"] = configList.ParaConfigs
  589. return nil
  590. }
  591. func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
  592. ctx.Data["PageIsTrainJob"] = true
  593. jobName := form.JobName
  594. uuid := form.Attachment
  595. description := form.Description
  596. workServerNumber := form.WorkServerNumber
  597. engineID := form.EngineID
  598. bootFile := form.BootFile
  599. flavorCode := form.Flavor
  600. params := form.Params
  601. poolID := form.PoolID
  602. isSaveParam := form.IsSaveParam
  603. repo := ctx.Repo.Repository
  604. codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
  605. codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
  606. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  607. logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
  608. dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
  609. branch_name := form.BranchName
  610. isLatestVersion := modelarts.IsLatestVersion
  611. FlavorName := form.FlavorName
  612. if err := paramCheckCreateTrainJob(form); err != nil {
  613. log.Error("paramCheckCreateTrainJob failed:(%v)", err)
  614. trainJobNewDataPrepare(ctx)
  615. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  616. return
  617. }
  618. attach, err := models.GetAttachmentByUUID(uuid)
  619. if err != nil {
  620. log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error())
  621. return
  622. }
  623. //todo: del the codeLocalPath
  624. _, err = ioutil.ReadDir(codeLocalPath)
  625. if err == nil {
  626. os.RemoveAll(codeLocalPath)
  627. }
  628. gitRepo, _ := git.OpenRepository(repo.RepoPath())
  629. commitID, _ := gitRepo.GetBranchCommitID(branch_name)
  630. if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{
  631. Branch: branch_name,
  632. }); err != nil {
  633. log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err)
  634. trainJobNewDataPrepare(ctx)
  635. ctx.Data["bootFile"] = form.BootFile
  636. ctx.Data["uuid"] = form.Attachment
  637. ctx.Data["datasetName"] = attach.Name
  638. ctx.Data["params"] = form.Params
  639. ctx.Data["branch_name"] = branch_name
  640. trainJobNewDataPrepare(ctx)
  641. // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
  642. ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobNew, &form)
  643. // ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form)
  644. return
  645. }
  646. //todo: upload code (send to file_server todo this work?)
  647. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
  648. log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
  649. trainJobNewDataPrepare(ctx)
  650. ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
  651. return
  652. }
  653. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
  654. log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
  655. trainJobNewDataPrepare(ctx)
  656. ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form)
  657. return
  658. }
  659. if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
  660. log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
  661. trainJobNewDataPrepare(ctx)
  662. ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form)
  663. return
  664. }
  665. //todo: del local code?
  666. var parameters models.Parameters
  667. param := make([]models.Parameter, 0)
  668. param = append(param, models.Parameter{
  669. Label: modelarts.TrainUrl,
  670. Value: outputObsPath,
  671. }, models.Parameter{
  672. Label: modelarts.DataUrl,
  673. Value: dataPath,
  674. })
  675. if len(params) != 0 {
  676. err := json.Unmarshal([]byte(params), &parameters)
  677. if err != nil {
  678. log.Error("Failed to Unmarshal params: %s (%v)", params, err)
  679. trainJobNewDataPrepare(ctx)
  680. ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form)
  681. return
  682. }
  683. for _, parameter := range parameters.Parameter {
  684. if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
  685. param = append(param, models.Parameter{
  686. Label: parameter.Label,
  687. Value: parameter.Value,
  688. })
  689. }
  690. }
  691. }
  692. //save param config
  693. if isSaveParam == "on" {
  694. if form.ParameterTemplateName == "" {
  695. log.Error("ParameterTemplateName is empty")
  696. trainJobNewDataPrepare(ctx)
  697. ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobNew, &form)
  698. return
  699. }
  700. _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{
  701. ConfigName: form.ParameterTemplateName,
  702. Description: form.PrameterDescription,
  703. DataUrl: dataPath,
  704. AppUrl: codeObsPath,
  705. BootFileUrl: codeObsPath + bootFile,
  706. TrainUrl: outputObsPath,
  707. Flavor: models.Flavor{
  708. Code: flavorCode,
  709. },
  710. WorkServerNum: workServerNumber,
  711. EngineID: int64(engineID),
  712. LogUrl: logObsPath,
  713. PoolID: poolID,
  714. Parameter: param,
  715. })
  716. if err != nil {
  717. log.Error("Failed to CreateTrainJobConfig: %v", err)
  718. trainJobNewDataPrepare(ctx)
  719. ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form)
  720. return
  721. }
  722. }
  723. req := &modelarts.GenerateTrainJobReq{
  724. JobName: jobName,
  725. DataUrl: dataPath,
  726. Description: description,
  727. CodeObsPath: codeObsPath,
  728. BootFile: codeObsPath + bootFile,
  729. TrainUrl: outputObsPath,
  730. FlavorCode: flavorCode,
  731. WorkServerNumber: workServerNumber,
  732. EngineID: int64(engineID),
  733. LogUrl: logObsPath,
  734. PoolID: poolID,
  735. Uuid: uuid,
  736. Parameters: parameters.Parameter,
  737. CommitID: commitID,
  738. IsLatestVersion: isLatestVersion,
  739. BranchName: branch_name,
  740. Params: form.Params,
  741. FatherVersionName: modelarts.InitFatherVersionName,
  742. FlavorName: FlavorName,
  743. }
  744. err = modelarts.GenerateTrainJob(ctx, req)
  745. if err != nil {
  746. log.Error("GenerateTrainJob failed:%v", err.Error())
  747. trainJobNewDataPrepare(ctx)
  748. ctx.Data["bootFile"] = form.BootFile
  749. ctx.Data["uuid"] = form.Attachment
  750. ctx.Data["datasetName"] = attach.Name
  751. ctx.Data["params"] = form.Params
  752. ctx.Data["branch_name"] = branch_name
  753. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
  754. return
  755. }
  756. // // 保存openi创建训练任务界面的参数
  757. // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{
  758. // JobName: req.JobName,
  759. // JobID: strconv.FormatInt(jobResult.JobID, 10),
  760. // VersionName: jobResult.VersionName,
  761. // ResourcePools: form.PoolID,
  762. // EngineVersions: form.EngineID,
  763. // FlavorInfos: form.Flavor,
  764. // TrainUrl: outputObsPath,
  765. // BootFile: form.BootFile,
  766. // Uuid: form.Attachment,
  767. // DatasetName: attach.Name,
  768. // Params: form.Params,
  769. // BranchName: branch_name,
  770. // })
  771. // if err != nil {
  772. // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error())
  773. // trainJobNewVersionDataPrepare(ctx)
  774. // ctx.Data["bootFile"] = form.BootFile
  775. // ctx.Data["uuid"] = form.Attachment
  776. // ctx.Data["datasetName"] = attach.Name
  777. // ctx.Data["params"] = form.Params
  778. // ctx.Data["branch_name"] = branch_name
  779. // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
  780. // return
  781. // }
  782. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  783. }
  784. func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
  785. ctx.Data["PageIsTrainJob"] = true
  786. var jobID = ctx.Params(":jobid")
  787. // var fatherVersionName = ctx.Query("versionName")
  788. // jobID = "19373"
  789. // versionName = "V0009"
  790. jobName := form.JobName
  791. uuid := form.Attachment
  792. description := form.Description
  793. workServerNumber := form.WorkServerNumber
  794. engineID := form.EngineID
  795. bootFile := form.BootFile
  796. flavorCode := form.Flavor
  797. params := form.Params
  798. poolID := form.PoolID
  799. isSaveParam := form.IsSaveParam
  800. repo := ctx.Repo.Repository
  801. codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
  802. codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
  803. outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
  804. logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
  805. dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
  806. branch_name := form.BranchName
  807. fatherVersionName := form.VersionName
  808. FlavorName := form.FlavorName
  809. if err := paramCheckCreateTrainJob(form); err != nil {
  810. log.Error("paramCheckCreateTrainJob failed:(%v)", err)
  811. trainJobNewVersionDataPrepare(ctx)
  812. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
  813. return
  814. }
  815. attach, err := models.GetAttachmentByUUID(uuid)
  816. if err != nil {
  817. log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error())
  818. return
  819. }
  820. //todo: del the codeLocalPath
  821. _, err = ioutil.ReadDir(codeLocalPath)
  822. if err == nil {
  823. os.RemoveAll(codeLocalPath)
  824. }
  825. gitRepo, _ := git.OpenRepository(repo.RepoPath())
  826. commitID, _ := gitRepo.GetBranchCommitID(branch_name)
  827. if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{
  828. Branch: branch_name,
  829. }); err != nil {
  830. log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err)
  831. trainJobNewVersionDataPrepare(ctx)
  832. ctx.Data["bootFile"] = form.BootFile
  833. ctx.Data["uuid"] = form.Attachment
  834. ctx.Data["datasetName"] = attach.Name
  835. ctx.Data["params"] = form.Params
  836. ctx.Data["branch_name"] = branch_name
  837. // ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
  838. ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form)
  839. // ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form)
  840. return
  841. }
  842. //todo: upload code (send to file_server todo this work?)
  843. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
  844. log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
  845. trainJobNewVersionDataPrepare(ctx)
  846. ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form)
  847. return
  848. }
  849. if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
  850. log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
  851. trainJobNewVersionDataPrepare(ctx)
  852. ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form)
  853. return
  854. }
  855. if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
  856. log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
  857. trainJobNewVersionDataPrepare(ctx)
  858. ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form)
  859. return
  860. }
  861. //todo: del local code?
  862. var parameters models.Parameters
  863. param := make([]models.Parameter, 0)
  864. param = append(param, models.Parameter{
  865. Label: modelarts.TrainUrl,
  866. Value: outputObsPath,
  867. }, models.Parameter{
  868. Label: modelarts.DataUrl,
  869. Value: dataPath,
  870. })
  871. if len(params) != 0 {
  872. err := json.Unmarshal([]byte(params), &parameters)
  873. if err != nil {
  874. log.Error("Failed to Unmarshal params: %s (%v)", params, err)
  875. trainJobNewVersionDataPrepare(ctx)
  876. ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form)
  877. return
  878. }
  879. for _, parameter := range parameters.Parameter {
  880. if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl {
  881. param = append(param, models.Parameter{
  882. Label: parameter.Label,
  883. Value: parameter.Value,
  884. })
  885. }
  886. }
  887. }
  888. //save param config
  889. if isSaveParam == "on" {
  890. if form.ParameterTemplateName == "" {
  891. log.Error("ParameterTemplateName is empty")
  892. trainJobNewVersionDataPrepare(ctx)
  893. ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form)
  894. return
  895. }
  896. _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{
  897. ConfigName: form.ParameterTemplateName,
  898. Description: form.PrameterDescription,
  899. DataUrl: dataPath,
  900. AppUrl: codeObsPath,
  901. BootFileUrl: codeObsPath + bootFile,
  902. TrainUrl: outputObsPath,
  903. Flavor: models.Flavor{
  904. Code: flavorCode,
  905. },
  906. WorkServerNum: workServerNumber,
  907. EngineID: int64(engineID),
  908. LogUrl: logObsPath,
  909. PoolID: poolID,
  910. Parameter: parameters.Parameter,
  911. })
  912. if err != nil {
  913. log.Error("Failed to CreateTrainJobConfig: %v", err)
  914. trainJobNewVersionDataPrepare(ctx)
  915. ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form)
  916. return
  917. }
  918. }
  919. // JobVersionName := "V0001"
  920. // PreVersionId := int64(67646)
  921. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, fatherVersionName)
  922. if err != nil {
  923. log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error())
  924. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
  925. return
  926. }
  927. req := &modelarts.GenerateTrainJobVersionReq{
  928. JobName: task.JobName,
  929. DataUrl: dataPath,
  930. Description: description,
  931. CodeObsPath: codeObsPath,
  932. BootFile: codeObsPath + bootFile,
  933. TrainUrl: outputObsPath,
  934. FlavorCode: flavorCode,
  935. WorkServerNumber: workServerNumber,
  936. EngineID: int64(engineID),
  937. LogUrl: logObsPath,
  938. PoolID: poolID,
  939. Uuid: uuid,
  940. Params: form.Params,
  941. PreVersionId: task.VersionID,
  942. CommitID: commitID,
  943. BranchName: branch_name,
  944. FlavorName: FlavorName,
  945. }
  946. err = modelarts.GenerateTrainJobVersion(ctx, req, jobID, fatherVersionName)
  947. if err != nil {
  948. log.Error("GenerateTrainJob failed:%v", err.Error())
  949. trainJobNewVersionDataPrepare(ctx)
  950. ctx.Data["bootFile"] = form.BootFile
  951. ctx.Data["uuid"] = form.Attachment
  952. ctx.Data["datasetName"] = attach.Name
  953. ctx.Data["params"] = form.Params
  954. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
  955. return
  956. }
  957. // 保存openi创建训练任务界面的参数
  958. // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{
  959. // JobName: req.JobName,
  960. // JobID: strconv.FormatInt(jobResult.JobID, 10),
  961. // VersionName: jobResult.VersionName,
  962. // ResourcePools: form.PoolID,
  963. // EngineVersions: form.EngineID,
  964. // FlavorInfos: form.Flavor,
  965. // TrainUrl: outputObsPath,
  966. // BootFile: form.BootFile,
  967. // Uuid: form.Attachment,
  968. // DatasetName: attach.Name,
  969. // Params: form.Params,
  970. // BranchName: branch_name,
  971. // })
  972. // if err != nil {
  973. // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error())
  974. // trainJobNewVersionDataPrepare(ctx)
  975. // ctx.Data["bootFile"] = form.BootFile
  976. // ctx.Data["uuid"] = form.Attachment
  977. // ctx.Data["datasetName"] = attach.Name
  978. // ctx.Data["params"] = form.Params
  979. // ctx.Data["branch_name"] = branch_name
  980. // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
  981. // return
  982. // }
  983. // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  984. ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  985. }
  986. // readDir reads the directory named by dirname and returns
  987. // a list of directory entries sorted by filename.
  988. func readDir(dirname string) ([]os.FileInfo, error) {
  989. f, err := os.Open(dirname)
  990. if err != nil {
  991. return nil, err
  992. }
  993. list, err := f.Readdir(100)
  994. f.Close()
  995. if err != nil {
  996. //todo: can not upload empty folder
  997. if err == io.EOF {
  998. return nil, nil
  999. }
  1000. return nil, err
  1001. }
  1002. //sort.Slice(list, func(i, j int) bool { return list[i].Name() < list[j].Name() })
  1003. return list, nil
  1004. }
  1005. func uploadCodeToObs(codePath, jobName, parentDir string) error {
  1006. files, err := readDir(codePath)
  1007. if err != nil {
  1008. log.Error("readDir(%s) failed: %s", codePath, err.Error())
  1009. return err
  1010. }
  1011. for _, file := range files {
  1012. if file.IsDir() {
  1013. input := &obs.PutObjectInput{}
  1014. input.Bucket = setting.Bucket
  1015. input.Key = parentDir + file.Name() + "/"
  1016. _, err = storage.ObsCli.PutObject(input)
  1017. if err != nil {
  1018. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  1019. return err
  1020. }
  1021. if err = uploadCodeToObs(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil {
  1022. log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error())
  1023. return err
  1024. }
  1025. } else {
  1026. input := &obs.PutFileInput{}
  1027. input.Bucket = setting.Bucket
  1028. input.Key = setting.CodePathPrefix + jobName + "/code/" + parentDir + file.Name()
  1029. input.SourceFile = codePath + file.Name()
  1030. _, err = storage.ObsCli.PutFile(input)
  1031. if err != nil {
  1032. log.Error("PutFile(%s) failed: %s", input.SourceFile, err.Error())
  1033. return err
  1034. }
  1035. }
  1036. }
  1037. return nil
  1038. }
  1039. func obsMkdir(dir string) error {
  1040. input := &obs.PutObjectInput{}
  1041. input.Bucket = setting.Bucket
  1042. input.Key = dir
  1043. _, err := storage.ObsCli.PutObject(input)
  1044. if err != nil {
  1045. log.Error("PutObject(%s) failed: %s", input.Key, err.Error())
  1046. return err
  1047. }
  1048. return nil
  1049. }
  1050. func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
  1051. if !strings.HasSuffix(form.BootFile, ".py") {
  1052. log.Error("the boot file(%s) must be a python file", form.BootFile)
  1053. return errors.New("启动文件必须是python文件")
  1054. }
  1055. if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 {
  1056. log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber)
  1057. return errors.New("计算节点数必须在1-25之间")
  1058. }
  1059. return nil
  1060. }
  1061. func TrainJobShow(ctx *context.Context) {
  1062. ctx.Data["PageIsCloudBrain"] = true
  1063. var jobID = ctx.Params(":jobid")
  1064. task, err := models.GetCloudbrainByJobID(jobID)
  1065. if err != nil {
  1066. ctx.ServerError("GetCloudbrainByJobID faild", err)
  1067. return
  1068. }
  1069. repo := ctx.Repo.Repository
  1070. page := ctx.QueryInt("page")
  1071. if page <= 0 {
  1072. page = 1
  1073. }
  1074. VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
  1075. ListOptions: models.ListOptions{
  1076. Page: page,
  1077. PageSize: setting.UI.IssuePagingNum,
  1078. },
  1079. RepoID: repo.ID,
  1080. Type: models.TypeCloudBrainTwo,
  1081. JobType: string(models.JobTypeTrain),
  1082. JobID: jobID,
  1083. })
  1084. if err != nil {
  1085. ctx.ServerError("Cloudbrain", err)
  1086. return
  1087. }
  1088. if err != nil {
  1089. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  1090. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1091. return
  1092. }
  1093. // attach, err := models.GetAttachmentByUUID(task.Uuid)
  1094. // if err != nil {
  1095. // log.Error("GetAttachmentByUUID(%s) failed:%v", jobID, err.Error())
  1096. // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1097. // return
  1098. // }
  1099. result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  1100. if err != nil {
  1101. log.Error("GetJob(%s) failed:%v", jobID, err.Error())
  1102. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1103. return
  1104. }
  1105. if result != nil {
  1106. result.CreateTime = time.Unix(int64(result.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05")
  1107. if result.Duration != 0 {
  1108. result.TrainJobDuration = addZero(result.Duration/3600000) + ":" + addZero(result.Duration%3600000/60000) + ":" + addZero(result.Duration%60000/1000)
  1109. } else {
  1110. result.TrainJobDuration = "00:00:00"
  1111. }
  1112. result.Status = modelarts.TransTrainJobStatus(result.IntStatus)
  1113. err = models.SetTrainJobStatusByJobID(jobID, result.Status, result.Duration, string(result.TrainJobDuration))
  1114. if err != nil {
  1115. ctx.ServerError("UpdateJob failed", err)
  1116. return
  1117. }
  1118. result.DatasetName = task.DatasetName
  1119. }
  1120. resultLogFile, resultLog, err := trainJobGetLog(jobID)
  1121. if err != nil {
  1122. log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error())
  1123. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1124. return
  1125. }
  1126. ctx.Data["log_file_name"] = resultLogFile.LogFileList[0]
  1127. ctx.Data["log"] = resultLog
  1128. ctx.Data["task"] = task
  1129. ctx.Data["jobID"] = jobID
  1130. ctx.Data["result"] = result
  1131. ctx.Data["version_list_task"] = VersionListTasks
  1132. ctx.Data["version_list_count"] = VersionListCount
  1133. ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  1134. }
  1135. func addZero(t int64) (m string) {
  1136. if t < 10 {
  1137. m = "0" + strconv.FormatInt(t, 10)
  1138. return m
  1139. } else {
  1140. return strconv.FormatInt(t, 10)
  1141. }
  1142. }
  1143. func TrainJobGetLog(ctx *context.Context) {
  1144. ctx.Data["PageIsTrainJob"] = true
  1145. var jobID = ctx.Params(":jobid")
  1146. var logFileName = ctx.Query("file_name")
  1147. var baseLine = ctx.Query("base_line")
  1148. var order = ctx.Query("order")
  1149. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  1150. log.Error("order(%s) check failed", order)
  1151. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  1152. return
  1153. }
  1154. task, err := models.GetCloudbrainByJobID(jobID)
  1155. if err != nil {
  1156. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  1157. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1158. return
  1159. }
  1160. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
  1161. if err != nil {
  1162. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  1163. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1164. return
  1165. }
  1166. ctx.Data["log"] = result
  1167. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  1168. }
  1169. func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
  1170. task, err := models.GetCloudbrainByJobID(jobID)
  1171. if err != nil {
  1172. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  1173. return nil, nil, err
  1174. }
  1175. resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
  1176. if err != nil {
  1177. log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
  1178. return nil, nil, err
  1179. }
  1180. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines)
  1181. if err != nil {
  1182. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  1183. return nil, nil, err
  1184. }
  1185. return resultLogFile, result, err
  1186. }
  1187. func TrainJobDel(ctx *context.Context) {
  1188. var jobID = ctx.Params(":jobid")
  1189. task, err := models.GetCloudbrainByJobID(jobID)
  1190. if err != nil {
  1191. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  1192. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  1193. return
  1194. }
  1195. _, err = modelarts.DelTrainJob(jobID)
  1196. if err != nil {
  1197. log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error())
  1198. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  1199. return
  1200. }
  1201. err = models.DeleteJob(task)
  1202. if err != nil {
  1203. ctx.ServerError("DeleteJob failed", err)
  1204. return
  1205. }
  1206. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  1207. }
  1208. func TrainJobStop(ctx *context.Context) {
  1209. var jobID = ctx.Params(":jobid")
  1210. task, err := models.GetCloudbrainByJobID(jobID)
  1211. if err != nil {
  1212. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  1213. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  1214. return
  1215. }
  1216. _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  1217. if err != nil {
  1218. log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
  1219. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  1220. return
  1221. }
  1222. ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  1223. }
  1224. func TrainJobVersionDel(ctx *context.Context) {
  1225. var jobID = ctx.Params(":jobid")
  1226. var versionName = ctx.Query(":versionName")
  1227. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  1228. if err != nil {
  1229. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  1230. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1231. return
  1232. }
  1233. _, err = modelarts.DelTrainJob(jobID)
  1234. if err != nil {
  1235. log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error())
  1236. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1237. return
  1238. }
  1239. err = models.DeleteJob(task)
  1240. if err != nil {
  1241. ctx.ServerError("DeleteJob failed", err)
  1242. return
  1243. }
  1244. // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  1245. ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  1246. }
  1247. func TrainJobVersionStop(ctx *context.Context) {
  1248. var jobID = ctx.Params(":jobid")
  1249. var versionName = ctx.Query(":versionName")
  1250. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  1251. if err != nil {
  1252. log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
  1253. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  1254. return
  1255. }
  1256. _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
  1257. if err != nil {
  1258. log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
  1259. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
  1260. return
  1261. }
  1262. // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
  1263. ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  1264. }
  1265. func canUserCreateTrainJob(uid int64) (bool, error) {
  1266. org, err := models.GetOrgByName(setting.AllowedOrg)
  1267. if err != nil {
  1268. log.Error("get allowed org failed: ", setting.AllowedOrg)
  1269. return false, err
  1270. }
  1271. return org.IsOrgMember(uid)
  1272. }
  1273. func TrainJobGetConfigList(ctx *context.Context) {
  1274. ctx.Data["PageIsTrainJob"] = true
  1275. var jobID = ctx.Params(":jobid")
  1276. var logFileName = ctx.Query("file_name")
  1277. var baseLine = ctx.Query("base_line")
  1278. var order = ctx.Query("order")
  1279. if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
  1280. log.Error("order(%s) check failed", order)
  1281. ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
  1282. return
  1283. }
  1284. task, err := models.GetCloudbrainByJobID(jobID)
  1285. if err != nil {
  1286. log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
  1287. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1288. return
  1289. }
  1290. result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
  1291. if err != nil {
  1292. log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
  1293. ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
  1294. return
  1295. }
  1296. ctx.Data["log"] = result
  1297. //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
  1298. }
  1299. func getConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) {
  1300. var result models.GetConfigListResult
  1301. list, err := modelarts.GetConfigList(perPage, page, sortBy, order, searchContent, configType)
  1302. if err != nil {
  1303. log.Error("GetConfigList failed:", err)
  1304. return &result, err
  1305. }
  1306. for _, config := range list.ParaConfigs {
  1307. paraConfig, err := modelarts.GetParaConfig(config.ConfigName, configType)
  1308. if err != nil {
  1309. log.Error("GetParaConfig failed:", err)
  1310. return &result, err
  1311. }
  1312. config.Result = paraConfig
  1313. }
  1314. return list, nil
  1315. }
  1316. func TrainJobShowModels(ctx *context.Context) {
  1317. ctx.Data["PageIsCloudBrain"] = true
  1318. jobID := ctx.Params(":jobid")
  1319. parentDir := ctx.Query("parentDir")
  1320. dirArray := strings.Split(parentDir, "/")
  1321. task, err := models.GetCloudbrainByJobID(jobID)
  1322. if err != nil {
  1323. log.Error("no such job!", ctx.Data["msgID"])
  1324. ctx.ServerError("no such job:", err)
  1325. return
  1326. }
  1327. models, err := storage.GetObsListObject(task.JobName, parentDir)
  1328. if err != nil {
  1329. log.Info("get TrainJobListModel failed:", err)
  1330. ctx.ServerError("GetObsListObject:", err)
  1331. return
  1332. }
  1333. ctx.Data["Path"] = dirArray
  1334. ctx.Data["Dirs"] = models
  1335. ctx.Data["task"] = task
  1336. ctx.Data["JobID"] = jobID
  1337. ctx.HTML(200, tplModelArtsTrainJobShowModels)
  1338. }
  1339. func TrainJobVersionShowModels(ctx *context.Context) {
  1340. ctx.Data["PageIsCloudBrain"] = true
  1341. jobID := ctx.Params(":jobid")
  1342. parentDir := ctx.Query("parentDir")
  1343. versionName := ctx.Query("version_name")
  1344. dirArray := strings.Split(parentDir, "/")
  1345. task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
  1346. if err != nil {
  1347. log.Error("no such job!", ctx.Data["msgID"])
  1348. ctx.ServerError("no such job:", err)
  1349. return
  1350. }
  1351. parentDir = versionName
  1352. models, err := storage.GetVersionObsListObject(task.JobName, parentDir)
  1353. if err != nil {
  1354. log.Info("get TrainJobListModel failed:", err)
  1355. ctx.ServerError("GetVersionObsListObject:", err)
  1356. return
  1357. }
  1358. ctx.Data["Path"] = dirArray
  1359. ctx.Data["Dirs"] = models
  1360. ctx.Data["task"] = task
  1361. ctx.Data["JobID"] = jobID
  1362. ctx.HTML(200, tplModelArtsTrainJobShowModels)
  1363. }
  1364. func TrainJobDownloadModel(ctx *context.Context) {
  1365. parentDir := ctx.Query("parentDir")
  1366. fileName := ctx.Query("fileName")
  1367. jobName := ctx.Query("jobName")
  1368. url, err := storage.GetObsCreateSignedUrl(jobName, parentDir, fileName)
  1369. if err != nil {
  1370. log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
  1371. ctx.ServerError("GetObsCreateSignedUrl", err)
  1372. return
  1373. }
  1374. http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently)
  1375. }