From a1cb52cef2b892bb3e148e9bd8cc55e817604835 Mon Sep 17 00:00:00 2001 From: liuzx Date: Thu, 8 Sep 2022 17:58:57 +0800 Subject: [PATCH 01/57] grampus version new --- routers/repo/grampus.go | 142 +++++++++++++++++- routers/routes/routes.go | 2 + .../repo/grampus/trainjob/gpu/versionnew.tmpl | 0 .../repo/grampus/trainjob/npu/versionnew.tmpl | 0 4 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 templates/repo/grampus/trainjob/gpu/versionnew.tmpl create mode 100644 templates/repo/grampus/trainjob/npu/versionnew.tmpl diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index d7e799427e..f4ab40f5ed 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -1,7 +1,6 @@ package repo import ( - "code.gitea.io/gitea/services/cloudbrain/resource" "encoding/json" "errors" "fmt" @@ -13,6 +12,8 @@ import ( "strings" "time" + "code.gitea.io/gitea/services/cloudbrain/resource" + "code.gitea.io/gitea/modules/auth" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/grampus" @@ -34,10 +35,12 @@ const ( tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show" //GPU - tplGrampusTrainJobGPUNew base.TplName = "repo/grampus/trainjob/gpu/new" + tplGrampusTrainJobGPUNew base.TplName = "repo/grampus/trainjob/gpu/new" + tplGrampusTrainJobGPUVersionNew base.TplName = "repo/grampus/trainjob/gpu/versionnew" //NPU - tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new" + tplGrampusTrainJobNPUNew base.TplName = "repo/grampus/trainjob/npu/new" + tplGrampusTrainJobNPUVersionNew base.TplName = "repo/grampus/trainjob/npu/versionnew" ) func GrampusTrainJobGPUNew(ctx *context.Context) { @@ -137,6 +140,126 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err return nil } +func GrampusTrainJobVersionNew(ctx *context.Context) { + task := ctx.Cloudbrain + if task.ComputeResource == models.GPUResource { + err := grampusTrainJobVersionNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + if err != nil { + ctx.ServerError("get new train-job version info failed", err) + return + } + ctx.HTML(http.StatusOK, tplGrampusTrainJobGPUVersionNew) + } else if task.ComputeResource == models.NPUResource { + err := grampusTrainJobVersionNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + if err != nil { + ctx.ServerError("get new train-job version info failed", err) + return + } + ctx.HTML(200, tplGrampusTrainJobNPUVersionNew) + } +} + +func grampusTrainJobVersionNewDataPrepare(ctx *context.Context, processType string) error { + ctx.Data["PageIsCloudBrain"] = true + + t := time.Now() + var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["display_job_name"] = displayJobName + + //get valid images + images, err := grampus.GetImages(processType) + if err != nil { + log.Error("GetImages failed:", err.Error()) + } else { + ctx.Data["images"] = images.Infos + } + + grampus.InitSpecialPool() + + ctx.Data["GPUEnabled"] = true + ctx.Data["NPUEnabled"] = true + includeCenters := make(map[string]struct{}) + excludeCenters := make(map[string]struct{}) + if grampus.SpecialPools != nil { + for _, pool := range grampus.SpecialPools.Pools { + if pool.IsExclusive { + if !IsUserInOrgPool(ctx.User.ID, pool) { + ctx.Data[pool.Type+"Enabled"] = false + } + } else { + if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) { + if IsUserInOrgPool(ctx.User.ID, pool) { + for _, center := range pool.Pool { + includeCenters[center.Queue] = struct{}{} + } + } else { + for _, center := range pool.Pool { + excludeCenters[center.Queue] = struct{}{} + } + + } + + } + + } + } + } + + //prepare available specs + if processType == grampus.ProcessorTypeNPU { + prepareGrampusTrainSpecs(ctx, models.NPU) + } else if processType == grampus.ProcessorTypeGPU { + prepareGrampusTrainSpecs(ctx, models.GPU) + } + + //get branches + branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) + if err != nil { + log.Error("GetBranches error:", err.Error()) + } else { + ctx.Data["branches"] = branches + } + + ctx.Data["BranchName"] = ctx.Cloudbrain.BranchName + ctx.Data["ImageName"] = ctx.Cloudbrain.Image + ctx.Data["BootFile"] = ctx.Cloudbrain.BootFile + ctx.Data["description"] = ctx.Cloudbrain.Description + spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) + if spec != nil { + log.Info("spec_id = %d", spec.ID) + ctx.Data["spec_id"] = spec.ID + } + var Parameters modelarts.Parameters + if err = json.Unmarshal([]byte(ctx.Cloudbrain.Parameters), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + + _, _, datasetNames, _, err := getDatasUrlListByUUIDS(ctx.Cloudbrain.Uuid) + if err != nil { + log.Info("query dataset error," + err.Error()) + ctx.Data["dataset_name"] = "" + } else { + ctx.Data["dataset_name"] = datasetNames + } + ctx.Data["uuid"] = ctx.Cloudbrain.Uuid + ctx.Data["ComputeResource"] = ctx.Cloudbrain.ComputeResource + + if processType == grampus.ProcessorTypeGPU { + ctx.Data["datasetType"] = models.TypeCloudBrainOne + waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain) + ctx.Data["WaitCount"] = waitCount + } else if processType == grampus.ProcessorTypeNPU { + ctx.Data["datasetType"] = models.TypeCloudBrainTwo + waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.NPUResource, models.JobTypeTrain) + ctx.Data["WaitCount"] = waitCount + ctx.Data["work_server_number"] = ctx.Cloudbrain.WorkServerNumber + } + + return nil +} + func prepareGrampusTrainSpecs(ctx *context.Context, computeResource string) { noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ JobType: models.JobTypeTrain, @@ -378,6 +501,19 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } +func GrampusTrainJobVersionCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) { + computeResource := ctx.Query("compute_resource") + if computeResource == models.GPUResource { + GrampusTrainJobGpuCreate(ctx, form) + } else if computeResource == models.NPUResource { + GrampusTrainJobNpuCreate(ctx, form) + } else { + ctx.ServerError("resource error", errors.New("compute resource is not support")) + return + } + +} + func checkSpecialPool(ctx *context.Context, resourceType string) string { grampus.InitSpecialPool() if grampus.SpecialPools != nil { diff --git a/routers/routes/routes.go b/routers/routes/routes.go index d8b1bff220..82105ffd15 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1159,6 +1159,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.GrampusStopJob) m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel) m.Get("/model_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ModelDownload) + m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.GrampusTrainJobVersionNew) + m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.GrampusTrainJobVersionCreate) }) m.Group("/gpu", func() { m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.GrampusTrainJobGPUNew) diff --git a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/repo/grampus/trainjob/npu/versionnew.tmpl b/templates/repo/grampus/trainjob/npu/versionnew.tmpl new file mode 100644 index 0000000000..e69de29bb2 -- 2.34.1 From 31959dea0b848866e2708a9c380cc184d8cceb72 Mon Sep 17 00:00:00 2001 From: liuzx Date: Tue, 13 Sep 2022 11:10:44 +0800 Subject: [PATCH 02/57] fix-2817 --- routers/repo/cloudbrain.go | 81 +++++++++++++++++++++++++++++++++++++- routers/routes/routes.go | 4 +- 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 7020f0a616..ec3bf1f397 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -2,7 +2,6 @@ package repo import ( "bufio" - "code.gitea.io/gitea/services/cloudbrain/resource" "encoding/json" "errors" "fmt" @@ -16,6 +15,8 @@ import ( "time" "unicode/utf8" + "code.gitea.io/gitea/services/cloudbrain/resource" + "code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/grampus" @@ -140,6 +141,71 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { return nil } +func cloudBrainVersionNewDataPrepare(ctx *context.Context) error { + ctx.Data["PageIsCloudBrain"] = true + t := time.Now() + var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] + ctx.Data["display_job_name"] = displayJobName + + ctx.Data["command"] = cloudbrain.GetCloudbrainDebugCommand() + ctx.Data["code_path"] = cloudbrain.CodeMountPath + ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath + ctx.Data["model_path"] = cloudbrain.ModelMountPath + ctx.Data["benchmark_path"] = cloudbrain.BenchMarkMountPath + ctx.Data["is_benchmark_enabled"] = setting.IsBenchmarkEnabled + + if categories == nil { + json.Unmarshal([]byte(setting.BenchmarkCategory), &categories) + } + ctx.Data["benchmark_categories"] = categories.Category + + ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType + queuesDetail, _ := cloudbrain.GetQueuesDetail() + if queuesDetail != nil { + ctx.Data["QueuesDetail"] = queuesDetail + } + + prepareCloudbrainOneSpecs(ctx) + + ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath + ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled + ctx.Data["brainscore_path"] = cloudbrain.BrainScoreMountPath + ctx.Data["is_brainscore_enabled"] = setting.IsBrainScoreEnabled + ctx.Data["benchmarkMode"] = ctx.Query("benchmarkMode") + + ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName + ctx.Data["image_name"] = ctx.Cloudbrain.Image + ctx.Data["bootfile"] = ctx.Cloudbrain.BootFile + ctx.Data["description"] = ctx.Cloudbrain.Description + spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) + if spec != nil { + log.Info("spec_id = %d", spec.ID) + ctx.Data["spec_id"] = spec.ID + } + var Parameters modelarts.Parameters + if err := json.Unmarshal([]byte(ctx.Cloudbrain.Parameters), &Parameters); err != nil { + ctx.ServerError("json.Unmarshal failed:", err) + return err + } + ctx.Data["params"] = Parameters.Parameter + + _, _, datasetNames, _, err := getDatasUrlListByUUIDS(ctx.Cloudbrain.Uuid) + if err != nil { + log.Info("query dataset error," + err.Error()) + ctx.Data["dataset_name"] = "" + } else { + ctx.Data["dataset_name"] = datasetNames + } + ctx.Data["uuid"] = ctx.Cloudbrain.Uuid + ctx.Data["compute_resource"] = ctx.Cloudbrain.ComputeResource + + ctx.Data["dataset_type"] = models.TypeCloudBrainOne + waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain) + ctx.Data["wait_count"] = waitCount + + return nil +} + func prepareCloudbrainOneSpecs(ctx *context.Context) { debugSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ JobType: models.JobTypeDebug, @@ -342,6 +408,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { } } +func CloudBrainTrainJobVersionCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { + CloudBrainCreate(ctx, form) +} + func loadCodeAndMakeModelPath(repo *models.Repository, codePath string, branchName string, jobName string, resultPath string) string { err := downloadCode(repo, codePath, branchName) if err != nil { @@ -2548,6 +2618,15 @@ func CloudBrainTrainJobNew(ctx *context.Context) { ctx.HTML(http.StatusOK, tplCloudBrainTrainJobNew) } +func CloudBrainTrainJobVersionNew(ctx *context.Context) { + err := cloudBrainVersionNewDataPrepare(ctx) + if err != nil { + ctx.ServerError("get new train-job info failed", err) + return + } + ctx.HTML(http.StatusOK, tplCloudBrainTrainJobNew) +} + func InferenceCloudBrainJobNew(ctx *context.Context) { err := cloudBrainNewDataPrepare(ctx) if err != nil { diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 82105ffd15..e97d4a0ca5 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1137,6 +1137,8 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/download_model", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.CloudBrainDownloadModel) //m.Get("/get_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo.GetLogFromModelDir) //m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) + m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.CloudBrainTrainJobVersionNew) + m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.CloudBrainTrainJobVersionCreate) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.CloudBrainTrainJobNew) m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainCreate) @@ -1160,7 +1162,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.GrampusTrainJobDel) m.Get("/model_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ModelDownload) m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.GrampusTrainJobVersionNew) - m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.GrampusTrainJobVersionCreate) + m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.GrampusTrainJobVersionCreate) }) m.Group("/gpu", func() { m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.GrampusTrainJobGPUNew) -- 2.34.1 From e54de5eb15243ad2ff26a65c4e49ffcdb3ea48e4 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Tue, 13 Sep 2022 11:39:28 +0800 Subject: [PATCH 03/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 71 +++++++++----- modules/auth/cloudbrain.go | 5 + modules/auth/grampus.go | 5 + modules/auth/modelarts.go | 5 + modules/cloudbrain/cloudbrain.go | 14 +++ modules/grampus/grampus.go | 73 +++++++++------ routers/repo/cloudbrain.go | 13 +++ routers/repo/grampus.go | 155 ++++++++++++++++++++++++------- 8 files changed, 254 insertions(+), 87 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index dc56efef73..12c76ce576 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -4,6 +4,7 @@ import ( "encoding/json" "errors" "fmt" + "path" "strconv" "strings" "time" @@ -170,24 +171,25 @@ type Cloudbrain struct { ImageID string //grampus image_id AiCenter string //grampus ai center: center_id+center_name - TrainUrl string //输出模型的obs路径 - BranchName string //分支名称 - Parameters string //传给modelarts的param参数 - BootFile string //启动文件 - DataUrl string //数据集的obs路径 - LogUrl string //日志输出的obs路径 - PreVersionId int64 //父版本的版本id - FlavorCode string //modelarts上的规格id - Description string `xorm:"varchar(256)"` //描述 - WorkServerNumber int //节点数 - FlavorName string //规格名称 - EngineName string //引擎名称 - TotalVersionCount int //任务的所有版本数量,包括删除的 - LabelName string //标签名称 - ModelName string //模型名称 - ModelVersion string //模型版本 - CkptName string //权重文件名称 - ResultUrl string //推理结果的obs路径 + TrainUrl string //输出模型的obs路径 + BranchName string //分支名称 + Parameters string //传给modelarts的param参数 + BootFile string //启动文件 + DataUrl string //数据集的obs路径 + LogUrl string //日志输出的obs路径 + PreVersionId int64 //父版本的版本id + FlavorCode string //modelarts上的规格id + Description string `xorm:"varchar(256)"` //描述 + WorkServerNumber int //节点数 + FlavorName string //规格名称 + EngineName string //引擎名称 + TotalVersionCount int //任务的所有版本数量,包括删除的 + LabelName string //标签名称 + ModelName string //模型名称 + ModelVersion string //模型版本 + CkptName string //权重文件名称 + PreTrainingModelUrl string //预训练模型地址 + ResultUrl string //推理结果的obs路径 User *User `xorm:"-"` Repo *Repository `xorm:"-"` @@ -603,6 +605,16 @@ type ResourceSpec struct { ShareMemMiB int `json:"shareMemMiB"` } +type FlavorInfos struct { + FlavorInfo []*FlavorInfo `json:"flavor_info"` +} + +type FlavorInfo struct { + Id int `json:"id"` + Value string `json:"value"` + Desc string `json:"desc"` +} + type SpecialPools struct { Pools []*SpecialPool `json:"pools"` } @@ -2223,9 +2235,10 @@ func CloudbrainAllStatic(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, er type DatasetInfo struct { DataLocalPath string Name string + FullName string } -func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) { +func GetDatasetInfo(uuidStr string, grampusType ...string) (map[string]DatasetInfo, string, error) { var datasetNames string uuids := strings.Split(uuidStr, ";") if len(uuids) > setting.MaxDatasetNum { @@ -2258,16 +2271,26 @@ func GetDatasetInfo(uuidStr string) (map[string]DatasetInfo, string, error) { return nil, datasetNames, errors.New("the dataset name is same") } } + var dataLocalPath string + if len(grampusType) > 0 { + if grampusType[0] == GPU { + dataLocalPath = setting.Attachment.Minio.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + } else { + dataLocalPath = setting.BasePath + path.Join(attach.UUID[0:1], attach.UUID[1:2]) + "/" + attach.UUID + "/" + } - dataLocalPath := setting.Attachment.Minio.RealPath + - setting.Attachment.Minio.Bucket + "/" + - setting.Attachment.Minio.BasePath + - AttachmentRelativePath(attach.UUID) + - attach.UUID + } else { + dataLocalPath = setting.Attachment.Minio.RealPath + + setting.Attachment.Minio.Bucket + "/" + + setting.Attachment.Minio.BasePath + + AttachmentRelativePath(attach.UUID) + + attach.UUID + } datasetInfos[attach.UUID] = DatasetInfo{ DataLocalPath: dataLocalPath, Name: fileName, + FullName: attach.Name, } if i == 0 { datasetNames = attach.Name diff --git a/modules/auth/cloudbrain.go b/modules/auth/cloudbrain.go index 5bd294f2a7..48e23efacc 100755 --- a/modules/auth/cloudbrain.go +++ b/modules/auth/cloudbrain.go @@ -23,6 +23,11 @@ type CreateCloudBrainForm struct { BootFile string `form:"boot_file"` Params string `form:"run_para_list"` BranchName string `form:"branch_name"` + ModelName string `form:"model_name"` + ModelVersion string `form:"model_version"` + CkptName string `form:"ckpt_name"` + LabelName string `form:"label_names"` + PreTrainModelUrl string `form:"pre_train_model_url"` DatasetName string `form:"dataset_name"` SpecId int64 `form:"spec_id"` } diff --git a/modules/auth/grampus.go b/modules/auth/grampus.go index 21008ea09b..414a7c25dc 100755 --- a/modules/auth/grampus.go +++ b/modules/auth/grampus.go @@ -18,6 +18,11 @@ type CreateGrampusTrainJobForm struct { WorkServerNumber int `form:"work_server_number" binding:"Required"` Image string `form:"image"` DatasetName string `form:"dataset_name"` + ModelName string `form:"model_name"` + ModelVersion string `form:"model_version"` + CkptName string `form:"ckpt_name"` + LabelName string `form:"label_names"` + PreTrainModelUrl string `form:"pre_train_model_url"` SpecId int64 `form:"spec_id"` } diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 23e1f325af..ced5ea1e80 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -48,6 +48,11 @@ type CreateModelArtsTrainJobForm struct { FlavorName string `form:"flaver_names" binding:"Required"` EngineName string `form:"engine_names" binding:"Required"` SpecId int64 `form:"spec_id" binding:"Required"` + ModelName string `form:"model_name"` + ModelVersion string `form:"model_version"` + CkptName string `form:"ckpt_name"` + LabelName string `form:"label_names"` + PreTrainModelUrl string `form:"pre_train_model_url"` } type CreateModelArtsInferenceJobForm struct { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 748af4a29b..5a4d2fe058 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -24,6 +24,7 @@ const ( CodeMountPath = "/code" DataSetMountPath = "/dataset" ModelMountPath = "/model" + PretrainModelMountPath = "/pretrainmodel" LogFile = "log.txt" BenchMarkMountPath = "/benchmark" BenchMarkResourceID = 1 @@ -77,6 +78,8 @@ type GenerateCloudBrainTaskReq struct { ModelVersion string CkptName string LabelName string + PreTrainModelPath string + PreTrainingModelUrl string Spec *models.Specification } @@ -276,6 +279,16 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { }, } + if req.PreTrainingModelUrl != "" { //预训练 + volumes = append(volumes, models.Volume{ + HostPath: models.StHostPath{ + Path: req.PreTrainModelPath, + MountPath: PretrainModelMountPath, + ReadOnly: true, + }, + }) + } + if len(req.DatasetInfos) == 1 { volumes = append(volumes, models.Volume{ HostPath: models.StHostPath{ @@ -359,6 +372,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { CkptName: req.CkptName, ResultUrl: req.ResultPath, LabelName: req.LabelName, + PreTrainingModelUrl: req.PreTrainingModelUrl, CreatedUnix: createTime, UpdatedUnix: createTime, CommitID: req.CommitID, diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 687fb4959f..f434a484c9 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -62,9 +62,17 @@ type GenerateTrainJobReq struct { TotalVersionCount int ComputeResource string ProcessType string - DatasetName string - Params string - Spec *models.Specification + + DatasetNames string + DatasetInfos map[string]models.DatasetInfo + Params string + ModelName string + LabelName string + CkptName string + ModelVersion string + PreTrainModelPath string + PreTrainingModelUrl string + Spec *models.Specification } func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { @@ -94,33 +102,38 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error jobID := jobResult.JobInfo.JobID err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.JobInfo.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: jobID, - JobName: req.JobName, - DisplayJobName: req.DisplayJobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeC2Net, - Uuid: req.Uuid, - DatasetName: req.DatasetName, - CommitID: req.CommitID, - IsLatestVersion: req.IsLatestVersion, - ComputeResource: req.ComputeResource, - ImageID: req.ImageId, - TrainUrl: req.TrainUrl, - BranchName: req.BranchName, - Parameters: req.Params, - BootFile: req.BootFile, - DataUrl: req.DataUrl, - Description: req.Description, - WorkServerNumber: req.WorkServerNumber, - EngineName: req.EngineName, - VersionCount: req.VersionCount, - TotalVersionCount: req.TotalVersionCount, - CreatedUnix: createTime, - UpdatedUnix: createTime, - Spec: req.Spec, + Status: TransTrainJobStatus(jobResult.JobInfo.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: jobID, + JobName: req.JobName, + DisplayJobName: req.DisplayJobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeC2Net, + Uuid: req.Uuid, + DatasetName: req.DatasetNames, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: req.ComputeResource, + ImageID: req.ImageId, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + EngineName: req.EngineName, + VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, + CreatedUnix: createTime, + UpdatedUnix: createTime, + Spec: req.Spec, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + LabelName: req.LabelName, + PreTrainingModelUrl: req.PreTrainingModelUrl, + CkptName: req.CkptName, }) if err != nil { diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 7020f0a616..56a485b66e 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -328,6 +328,16 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { Spec: spec, } + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainModelPath = setting.Attachment.Minio.RealPath + form.PreTrainModelUrl + req.PreTrainingModelUrl = form.PreTrainModelUrl + + } + err = cloudbrain.GenerateTask(req) if err != nil { cloudBrainNewDataPrepare(ctx) @@ -2629,6 +2639,9 @@ func getTrainJobCommand(form auth.CreateCloudBrainForm) (string, error) { param += " --" + parameter.Label + "=" + parameter.Value } } + if form.CkptName != "" { + param += " --pretrainmodelname" + "=" + form.CkptName + } command += "python /code/" + bootFile + param + " > " + cloudbrain.ModelMountPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index d7e799427e..b32070a84f 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -210,7 +210,6 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" - dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid branchName := form.BranchName image := strings.TrimSpace(form.Image) @@ -290,11 +289,12 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain } //check dataset - attachment, err := models.GetAttachmentByUUID(uuid) + + datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.GPU) if err != nil { - log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form) return } @@ -336,8 +336,22 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain return } + var datasetRemotePath, allFileName string + for _, datasetInfo := range datasetInfos { + if datasetRemotePath == "" { + datasetRemotePath = datasetInfo.DataLocalPath + allFileName = datasetInfo.FullName + } else { + datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + allFileName = allFileName + ";" + datasetInfo.FullName + } + + } + //prepare command - command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name) + preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) + + command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, form.CkptName) if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) @@ -348,26 +362,37 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) req := &grampus.GenerateTrainJobReq{ - JobName: jobName, - DisplayJobName: displayJobName, - ComputeResource: models.GPUResource, - ProcessType: grampus.ProcessorTypeGPU, - Command: command, - ImageUrl: image, - Description: description, - BootFile: bootFile, - Uuid: uuid, - CommitID: commitID, - BranchName: branchName, - Params: form.Params, - EngineName: image, - DatasetName: attachment.Name, + JobName: jobName, + DisplayJobName: displayJobName, + ComputeResource: models.GPUResource, + ProcessType: grampus.ProcessorTypeGPU, + Command: command, + ImageUrl: image, + Description: description, + BootFile: bootFile, + Uuid: uuid, + CommitID: commitID, + BranchName: branchName, + Params: form.Params, + EngineName: image, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, + IsLatestVersion: modelarts.IsLatestVersion, VersionCount: modelarts.VersionCountOne, WorkServerNumber: 1, Spec: spec, } + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainingModelUrl = form.PreTrainModelUrl + + } + err = grampus.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) @@ -378,6 +403,17 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") } +func getPreTrainModelPath(pretrainModelDir string, fileName string) string { + index := strings.Index(pretrainModelDir, "/") + if index > 0 { + filterBucket := pretrainModelDir[index+1:] + return filterBucket + fileName + } else { + return "" + } + +} + func checkSpecialPool(ctx *context.Context, resourceType string) string { grampus.InitSpecialPool() if grampus.SpecialPools != nil { @@ -410,7 +446,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := grampus.JobPath + jobName + modelarts.CodePath - dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" + //dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" branchName := form.BranchName isLatestVersion := modelarts.IsLatestVersion versionCount := modelarts.VersionCountOne @@ -492,11 +528,11 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain } //check dataset - attachment, err := models.GetAttachmentByUUID(uuid) + datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, models.NPU) if err != nil { - log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"]) + log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form) return } @@ -528,8 +564,21 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain return } + var datasetRemotePath, allFileName string + for _, datasetInfo := range datasetInfos { + if datasetRemotePath == "" { + datasetRemotePath = datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" + allFileName = datasetInfo.FullName + } else { + datasetRemotePath = datasetRemotePath + ";" + datasetInfo.DataLocalPath + "'" + datasetInfo.FullName + "'" + allFileName = allFileName + ";" + datasetInfo.FullName + } + + } + //prepare command - command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+"'"+attachment.Name+"'", bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name) + preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) + command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, allFileName, preTrainModelPath, form.CkptName) if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) @@ -546,7 +595,6 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain ProcessType: grampus.ProcessorTypeNPU, Command: command, ImageId: form.ImageID, - DataUrl: dataObsPath, Description: description, CodeObsPath: codeObsPath, BootFileUrl: codeObsPath + bootFile, @@ -560,9 +608,18 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain EngineName: engineName, VersionCount: versionCount, TotalVersionCount: modelarts.TotalVersionCount, - DatasetName: attachment.Name, + DatasetNames: datasetNames, + DatasetInfos: datasetInfos, Spec: spec, } + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainingModelUrl = form.PreTrainModelUrl + + } err = grampus.GenerateTrainJob(ctx, req) if err != nil { @@ -776,7 +833,7 @@ func GrampusGetLog(ctx *context.Context) { return } -func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName string) (string, error) { +func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName string) (string, error) { var command string workDir := grampus.NpuWorkDir @@ -788,18 +845,18 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo //download code & dataset if processorType == grampus.ProcessorTypeNPU { commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" + commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) command += commandDownload } else if processorType == grampus.ProcessorTypeGPU { - commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" + commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "'" + commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) command += commandDownload } //unzip code & dataset - toolUnzip := "unzip -q '" - if strings.HasSuffix(datasetName, ".tar.gz") { - toolUnzip = "tar -zxvf '" - } - commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + toolUnzip + datasetName + "';" + unZipDatasetCommand := generateDatasetUnzipCommand(datasetName) + + commandUnzip := "cd " + workDir + "code;unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand command += commandUnzip command += "echo \"unzip finished;start to exec code;\";" @@ -859,6 +916,38 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo return command, nil } +func processPretrainModelParameter(pretrainModelPath string, pretrainModelFileName string, commandDownload string) string { + commandDownloadTemp := commandDownload + if pretrainModelPath != "" { + commandDownloadTemp += " '" + pretrainModelPath + "' '" + pretrainModelFileName + "'" + } + commandDownloadTemp += ";" + return commandDownloadTemp +} + +func generateDatasetUnzipCommand(datasetName string) string { + var unZipDatasetCommand string + + datasetNameArray := strings.Split(datasetName, ";") + if len(datasetNameArray) == 1 { //单数据集 + unZipDatasetCommand = "unzip -q '" + datasetName + "';" + if strings.HasSuffix(datasetName, ".tar.gz") { + unZipDatasetCommand = "tar --strip-components=1 -zxvf '" + datasetName + "';" + } + + } else { //多数据集 + for _, datasetNameTemp := range datasetNameArray { + if strings.HasSuffix(datasetName, ".tar.gz") { + unZipDatasetCommand = unZipDatasetCommand + "tar -zxvf '" + datasetName + "';" + } else { + unZipDatasetCommand = unZipDatasetCommand + "unzip -q '" + datasetNameTemp + "' -d './" + strings.TrimSuffix(datasetNameTemp, ".zip") + "';" + } + } + + } + return unZipDatasetCommand +} + func downloadZipCode(ctx *context.Context, codePath, branchName string) error { archiveType := git.ZIP archivePath := codePath -- 2.34.1 From 9df8fa9f24032d1cae07f8f034f5dc233a2f126d Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Tue, 13 Sep 2022 14:48:34 +0800 Subject: [PATCH 04/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/modelarts/modelarts.go | 132 ++++++++++++++++++--------------- routers/repo/modelarts.go | 16 ++++ 2 files changed, 87 insertions(+), 61 deletions(-) diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 4539699ad4..ead824b60e 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -75,35 +75,40 @@ var ( ) type GenerateTrainJobReq struct { - JobName string - DisplayJobName string - Uuid string - Description string - CodeObsPath string - BootFile string - BootFileUrl string - DataUrl string - TrainUrl string - LogUrl string - PoolID string - WorkServerNumber int - EngineID int64 - Parameters []models.Parameter - CommitID string - IsLatestVersion string - Params string - BranchName string - PreVersionId int64 - PreVersionName string - FlavorCode string - FlavorName string - VersionCount int - EngineName string - TotalVersionCount int - UserImageUrl string - UserCommand string - DatasetName string - Spec *models.Specification + JobName string + DisplayJobName string + Uuid string + Description string + CodeObsPath string + BootFile string + BootFileUrl string + DataUrl string + TrainUrl string + LogUrl string + PoolID string + WorkServerNumber int + EngineID int64 + Parameters []models.Parameter + CommitID string + IsLatestVersion string + Params string + BranchName string + PreVersionId int64 + PreVersionName string + FlavorCode string + FlavorName string + VersionCount int + EngineName string + TotalVersionCount int + UserImageUrl string + UserCommand string + DatasetName string + Spec *models.Specification + ModelName string + LabelName string + CkptName string + ModelVersion string + PreTrainingModelUrl string } type GenerateInferenceJobReq struct { @@ -407,38 +412,43 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error } jobId := strconv.FormatInt(jobResult.JobID, 10) createErr = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: jobId, - JobName: req.JobName, - DisplayJobName: req.DisplayJobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeCloudBrainTwo, - VersionID: jobResult.VersionID, - VersionName: jobResult.VersionName, - Uuid: req.Uuid, - DatasetName: req.DatasetName, - CommitID: req.CommitID, - IsLatestVersion: req.IsLatestVersion, - ComputeResource: models.NPUResource, - EngineID: req.EngineID, - TrainUrl: req.TrainUrl, - BranchName: req.BranchName, - Parameters: req.Params, - BootFile: req.BootFile, - DataUrl: req.DataUrl, - LogUrl: req.LogUrl, - FlavorCode: req.Spec.SourceSpecId, - Description: req.Description, - WorkServerNumber: req.WorkServerNumber, - FlavorName: req.FlavorName, - EngineName: req.EngineName, - VersionCount: req.VersionCount, - TotalVersionCount: req.TotalVersionCount, - CreatedUnix: createTime, - UpdatedUnix: createTime, - Spec: req.Spec, + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: jobId, + JobName: req.JobName, + DisplayJobName: req.DisplayJobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: req.DatasetName, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: models.NPUResource, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + LogUrl: req.LogUrl, + FlavorCode: req.Spec.SourceSpecId, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + FlavorName: req.FlavorName, + EngineName: req.EngineName, + VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, + CreatedUnix: createTime, + UpdatedUnix: createTime, + Spec: req.Spec, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + LabelName: req.LabelName, + PreTrainingModelUrl: req.PreTrainingModelUrl, + CkptName: req.CkptName, }) if createErr != nil { diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index b4f6f000ef..121c4fd78c 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1290,6 +1290,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) Value: string(jsondatas), }) } + if form.ModelName != "" { //使用预训练模型训练 + ckptUrl := "/" + form.PreTrainModelUrl + form.CkptName + param = append(param, models.Parameter{ + Label: modelarts.CkptUrl, + Value: "s3:/" + ckptUrl, + }) + } //save param config // if isSaveParam == "on" { @@ -1358,6 +1365,15 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) DatasetName: datasetNames, Spec: spec, } + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainingModelUrl = form.PreTrainModelUrl + + } + userCommand, userImageUrl := getUserCommand(engineID, req) req.UserCommand = userCommand req.UserImageUrl = userImageUrl -- 2.34.1 From e24b67cfa97bfca382801751eb4f3e9c38904482 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Tue, 13 Sep 2022 15:16:39 +0800 Subject: [PATCH 05/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/cloudbrain.go | 38 ++++----- modules/cloudbrain/cloudbrain.go | 6 +- modules/grampus/grampus.go | 84 +++++++++--------- modules/modelarts/modelarts.go | 142 +++++++++++++++---------------- routers/repo/cloudbrain.go | 2 +- routers/repo/grampus.go | 4 +- routers/repo/modelarts.go | 9 +- 7 files changed, 146 insertions(+), 139 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 12c76ce576..f93b653e1e 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -171,25 +171,25 @@ type Cloudbrain struct { ImageID string //grampus image_id AiCenter string //grampus ai center: center_id+center_name - TrainUrl string //输出模型的obs路径 - BranchName string //分支名称 - Parameters string //传给modelarts的param参数 - BootFile string //启动文件 - DataUrl string //数据集的obs路径 - LogUrl string //日志输出的obs路径 - PreVersionId int64 //父版本的版本id - FlavorCode string //modelarts上的规格id - Description string `xorm:"varchar(256)"` //描述 - WorkServerNumber int //节点数 - FlavorName string //规格名称 - EngineName string //引擎名称 - TotalVersionCount int //任务的所有版本数量,包括删除的 - LabelName string //标签名称 - ModelName string //模型名称 - ModelVersion string //模型版本 - CkptName string //权重文件名称 - PreTrainingModelUrl string //预训练模型地址 - ResultUrl string //推理结果的obs路径 + TrainUrl string //输出模型的obs路径 + BranchName string //分支名称 + Parameters string //传给modelarts的param参数 + BootFile string //启动文件 + DataUrl string //数据集的obs路径 + LogUrl string //日志输出的obs路径 + PreVersionId int64 //父版本的版本id + FlavorCode string //modelarts上的规格id + Description string `xorm:"varchar(256)"` //描述 + WorkServerNumber int //节点数 + FlavorName string //规格名称 + EngineName string //引擎名称 + TotalVersionCount int //任务的所有版本数量,包括删除的 + LabelName string //标签名称 + ModelName string //模型名称 + ModelVersion string //模型版本 + CkptName string //权重文件名称 + PreTrainModelUrl string //预训练模型地址 + ResultUrl string //推理结果的obs路径 User *User `xorm:"-"` Repo *Repository `xorm:"-"` diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 5a4d2fe058..4e527b6bfd 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -79,7 +79,7 @@ type GenerateCloudBrainTaskReq struct { CkptName string LabelName string PreTrainModelPath string - PreTrainingModelUrl string + PreTrainModelUrl string Spec *models.Specification } @@ -279,7 +279,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { }, } - if req.PreTrainingModelUrl != "" { //预训练 + if req.PreTrainModelUrl != "" { //预训练 volumes = append(volumes, models.Volume{ HostPath: models.StHostPath{ Path: req.PreTrainModelPath, @@ -372,7 +372,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { CkptName: req.CkptName, ResultUrl: req.ResultPath, LabelName: req.LabelName, - PreTrainingModelUrl: req.PreTrainingModelUrl, + PreTrainModelUrl: req.PreTrainModelUrl, CreatedUnix: createTime, UpdatedUnix: createTime, CommitID: req.CommitID, diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index f434a484c9..45c1271416 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -63,16 +63,16 @@ type GenerateTrainJobReq struct { ComputeResource string ProcessType string - DatasetNames string - DatasetInfos map[string]models.DatasetInfo - Params string - ModelName string - LabelName string - CkptName string - ModelVersion string - PreTrainModelPath string - PreTrainingModelUrl string - Spec *models.Specification + DatasetNames string + DatasetInfos map[string]models.DatasetInfo + Params string + ModelName string + LabelName string + CkptName string + ModelVersion string + PreTrainModelPath string + PreTrainModelUrl string + Spec *models.Specification } func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { @@ -102,38 +102,38 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error jobID := jobResult.JobInfo.JobID err = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.JobInfo.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: jobID, - JobName: req.JobName, - DisplayJobName: req.DisplayJobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeC2Net, - Uuid: req.Uuid, - DatasetName: req.DatasetNames, - CommitID: req.CommitID, - IsLatestVersion: req.IsLatestVersion, - ComputeResource: req.ComputeResource, - ImageID: req.ImageId, - TrainUrl: req.TrainUrl, - BranchName: req.BranchName, - Parameters: req.Params, - BootFile: req.BootFile, - DataUrl: req.DataUrl, - Description: req.Description, - WorkServerNumber: req.WorkServerNumber, - EngineName: req.EngineName, - VersionCount: req.VersionCount, - TotalVersionCount: req.TotalVersionCount, - CreatedUnix: createTime, - UpdatedUnix: createTime, - Spec: req.Spec, - ModelName: req.ModelName, - ModelVersion: req.ModelVersion, - LabelName: req.LabelName, - PreTrainingModelUrl: req.PreTrainingModelUrl, - CkptName: req.CkptName, + Status: TransTrainJobStatus(jobResult.JobInfo.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: jobID, + JobName: req.JobName, + DisplayJobName: req.DisplayJobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeC2Net, + Uuid: req.Uuid, + DatasetName: req.DatasetNames, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: req.ComputeResource, + ImageID: req.ImageId, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + EngineName: req.EngineName, + VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, + CreatedUnix: createTime, + UpdatedUnix: createTime, + Spec: req.Spec, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + LabelName: req.LabelName, + PreTrainModelUrl: req.PreTrainModelUrl, + CkptName: req.CkptName, }) if err != nil { diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index ead824b60e..f35601191f 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -75,40 +75,40 @@ var ( ) type GenerateTrainJobReq struct { - JobName string - DisplayJobName string - Uuid string - Description string - CodeObsPath string - BootFile string - BootFileUrl string - DataUrl string - TrainUrl string - LogUrl string - PoolID string - WorkServerNumber int - EngineID int64 - Parameters []models.Parameter - CommitID string - IsLatestVersion string - Params string - BranchName string - PreVersionId int64 - PreVersionName string - FlavorCode string - FlavorName string - VersionCount int - EngineName string - TotalVersionCount int - UserImageUrl string - UserCommand string - DatasetName string - Spec *models.Specification - ModelName string - LabelName string - CkptName string - ModelVersion string - PreTrainingModelUrl string + JobName string + DisplayJobName string + Uuid string + Description string + CodeObsPath string + BootFile string + BootFileUrl string + DataUrl string + TrainUrl string + LogUrl string + PoolID string + WorkServerNumber int + EngineID int64 + Parameters []models.Parameter + CommitID string + IsLatestVersion string + Params string + BranchName string + PreVersionId int64 + PreVersionName string + FlavorCode string + FlavorName string + VersionCount int + EngineName string + TotalVersionCount int + UserImageUrl string + UserCommand string + DatasetName string + Spec *models.Specification + ModelName string + LabelName string + CkptName string + ModelVersion string + PreTrainModelUrl string } type GenerateInferenceJobReq struct { @@ -412,43 +412,43 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error } jobId := strconv.FormatInt(jobResult.JobID, 10) createErr = models.CreateCloudbrain(&models.Cloudbrain{ - Status: TransTrainJobStatus(jobResult.Status), - UserID: ctx.User.ID, - RepoID: ctx.Repo.Repository.ID, - JobID: jobId, - JobName: req.JobName, - DisplayJobName: req.DisplayJobName, - JobType: string(models.JobTypeTrain), - Type: models.TypeCloudBrainTwo, - VersionID: jobResult.VersionID, - VersionName: jobResult.VersionName, - Uuid: req.Uuid, - DatasetName: req.DatasetName, - CommitID: req.CommitID, - IsLatestVersion: req.IsLatestVersion, - ComputeResource: models.NPUResource, - EngineID: req.EngineID, - TrainUrl: req.TrainUrl, - BranchName: req.BranchName, - Parameters: req.Params, - BootFile: req.BootFile, - DataUrl: req.DataUrl, - LogUrl: req.LogUrl, - FlavorCode: req.Spec.SourceSpecId, - Description: req.Description, - WorkServerNumber: req.WorkServerNumber, - FlavorName: req.FlavorName, - EngineName: req.EngineName, - VersionCount: req.VersionCount, - TotalVersionCount: req.TotalVersionCount, - CreatedUnix: createTime, - UpdatedUnix: createTime, - Spec: req.Spec, - ModelName: req.ModelName, - ModelVersion: req.ModelVersion, - LabelName: req.LabelName, - PreTrainingModelUrl: req.PreTrainingModelUrl, - CkptName: req.CkptName, + Status: TransTrainJobStatus(jobResult.Status), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: jobId, + JobName: req.JobName, + DisplayJobName: req.DisplayJobName, + JobType: string(models.JobTypeTrain), + Type: models.TypeCloudBrainTwo, + VersionID: jobResult.VersionID, + VersionName: jobResult.VersionName, + Uuid: req.Uuid, + DatasetName: req.DatasetName, + CommitID: req.CommitID, + IsLatestVersion: req.IsLatestVersion, + ComputeResource: models.NPUResource, + EngineID: req.EngineID, + TrainUrl: req.TrainUrl, + BranchName: req.BranchName, + Parameters: req.Params, + BootFile: req.BootFile, + DataUrl: req.DataUrl, + LogUrl: req.LogUrl, + FlavorCode: req.Spec.SourceSpecId, + Description: req.Description, + WorkServerNumber: req.WorkServerNumber, + FlavorName: req.FlavorName, + EngineName: req.EngineName, + VersionCount: req.VersionCount, + TotalVersionCount: req.TotalVersionCount, + CreatedUnix: createTime, + UpdatedUnix: createTime, + Spec: req.Spec, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + LabelName: req.LabelName, + PreTrainModelUrl: req.PreTrainModelUrl, + CkptName: req.CkptName, }) if createErr != nil { diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 56a485b66e..8cd45e06fa 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -334,7 +334,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { req.CkptName = form.CkptName req.ModelVersion = form.ModelVersion req.PreTrainModelPath = setting.Attachment.Minio.RealPath + form.PreTrainModelUrl - req.PreTrainingModelUrl = form.PreTrainModelUrl + req.PreTrainModelUrl = form.PreTrainModelUrl } diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index b32070a84f..ed869e76e7 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -389,7 +389,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain req.LabelName = form.LabelName req.CkptName = form.CkptName req.ModelVersion = form.ModelVersion - req.PreTrainingModelUrl = form.PreTrainModelUrl + req.PreTrainModelUrl = form.PreTrainModelUrl } @@ -617,7 +617,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain req.LabelName = form.LabelName req.CkptName = form.CkptName req.ModelVersion = form.ModelVersion - req.PreTrainingModelUrl = form.PreTrainModelUrl + req.PreTrainModelUrl = form.PreTrainModelUrl } diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 121c4fd78c..cb4b2c1cc5 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1010,6 +1010,13 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.Data["engine_id"] = task.EngineID ctx.Data["datasetType"] = models.TypeCloudBrainTwo + //pretrain model + ctx.Data["model_name"] = task.ModelName + ctx.Data["model_version"] = task.ModelVersion + ctx.Data["ckpt_name"] = task.CkptName + ctx.Data["label_names"] = task.LabelName + ctx.Data["pre_train_model_url"] = task.PreTrainModelUrl + configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) if err != nil { ctx.ServerError("getConfigList failed:", err) @@ -1370,7 +1377,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) req.LabelName = form.LabelName req.CkptName = form.CkptName req.ModelVersion = form.ModelVersion - req.PreTrainingModelUrl = form.PreTrainModelUrl + req.PreTrainModelUrl = form.PreTrainModelUrl } -- 2.34.1 From 9ce1b017b7bab8858f1ff45b2390b492ecfd56c4 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Tue, 13 Sep 2022 16:32:30 +0800 Subject: [PATCH 06/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 6 +++--- modules/setting/setting.go | 15 +++++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 45c1271416..3cdd59c5cb 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -22,9 +22,6 @@ const ( GpuWorkDir = "/tmp/" NpuWorkDir = "/cache/" - CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;" + - "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" - CodeArchiveName = "master.zip" ) @@ -34,6 +31,9 @@ var ( ImageInfos *setting.StImageInfosModelArts SpecialPools *models.SpecialPools + + CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/" + setting.Grampus.SyncScriptProject + "/archive/master.zip;" + + "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" ) type GenerateTrainJobReq struct { diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 1992baf545..7d726a773e 100755 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -583,12 +583,13 @@ var ( //grampus config Grampus = struct { - Env string - Host string - UserName string - Password string - SpecialPools string - C2NetSequence string + Env string + Host string + UserName string + Password string + SpecialPools string + C2NetSequence string + SyncScriptProject string }{} C2NetInfos *C2NetSqInfos @@ -1558,6 +1559,8 @@ func getGrampusConfig() { log.Error("Unmarshal(C2NetSequence) failed:%v", err) } } + Grampus.SyncScriptProject = sec.Key("SYNC_SCRIPT_PROJECT").MustString("script_for_grampus") + } func SetRadarMapConfig() { -- 2.34.1 From 041cdec7304512b9416e0de77a44129f36ee6751 Mon Sep 17 00:00:00 2001 From: liuzx Date: Tue, 13 Sep 2022 16:56:49 +0800 Subject: [PATCH 07/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/cloudbrain.go | 38 +++---------------- routers/repo/grampus.go | 17 +++++---- .../repo/cloudbrain/trainjob/versionnew.tmpl | 0 3 files changed, 15 insertions(+), 40 deletions(-) create mode 100644 templates/repo/cloudbrain/trainjob/versionnew.tmpl diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index ec3bf1f397..d870d575c5 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -49,8 +49,9 @@ const ( tplCloudBrainImageSubmit base.TplName = "repo/cloudbrain/image/submit" tplCloudBrainImageEdit base.TplName = "repo/cloudbrain/image/edit" - tplCloudBrainTrainJobNew base.TplName = "repo/cloudbrain/trainjob/new" - tplCloudBrainTrainJobShow base.TplName = "repo/cloudbrain/trainjob/show" + tplCloudBrainTrainJobNew base.TplName = "repo/cloudbrain/trainjob/new" + tplCloudBrainTrainJobVersionNew base.TplName = "repo/cloudbrain/trainjob/versionnew" + tplCloudBrainTrainJobShow base.TplName = "repo/cloudbrain/trainjob/show" tplCloudBrainInferenceJobNew base.TplName = "repo/cloudbrain/inference/new" tplCloudBrainInferenceJobShow base.TplName = "repo/cloudbrain/inference/show" @@ -146,33 +147,6 @@ func cloudBrainVersionNewDataPrepare(ctx *context.Context) error { t := time.Now() var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] ctx.Data["display_job_name"] = displayJobName - - ctx.Data["command"] = cloudbrain.GetCloudbrainDebugCommand() - ctx.Data["code_path"] = cloudbrain.CodeMountPath - ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath - ctx.Data["model_path"] = cloudbrain.ModelMountPath - ctx.Data["benchmark_path"] = cloudbrain.BenchMarkMountPath - ctx.Data["is_benchmark_enabled"] = setting.IsBenchmarkEnabled - - if categories == nil { - json.Unmarshal([]byte(setting.BenchmarkCategory), &categories) - } - ctx.Data["benchmark_categories"] = categories.Category - - ctx.Data["benchmark_types"] = GetBenchmarkTypes(ctx).BenchmarkType - queuesDetail, _ := cloudbrain.GetQueuesDetail() - if queuesDetail != nil { - ctx.Data["QueuesDetail"] = queuesDetail - } - - prepareCloudbrainOneSpecs(ctx) - - ctx.Data["snn4imagenet_path"] = cloudbrain.Snn4imagenetMountPath - ctx.Data["is_snn4imagenet_enabled"] = setting.IsSnn4imagenetEnabled - ctx.Data["brainscore_path"] = cloudbrain.BrainScoreMountPath - ctx.Data["is_brainscore_enabled"] = setting.IsBrainScoreEnabled - ctx.Data["benchmarkMode"] = ctx.Query("benchmarkMode") - ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName ctx.Data["image_name"] = ctx.Cloudbrain.Image ctx.Data["bootfile"] = ctx.Cloudbrain.BootFile @@ -197,10 +171,10 @@ func cloudBrainVersionNewDataPrepare(ctx *context.Context) error { ctx.Data["dataset_name"] = datasetNames } ctx.Data["uuid"] = ctx.Cloudbrain.Uuid + ctx.Data["cluster_type"] = models.OpenICluster ctx.Data["compute_resource"] = ctx.Cloudbrain.ComputeResource - ctx.Data["dataset_type"] = models.TypeCloudBrainOne - waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain) + waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainOne, models.GPUResource, models.JobTypeTrain) ctx.Data["wait_count"] = waitCount return nil @@ -2624,7 +2598,7 @@ func CloudBrainTrainJobVersionNew(ctx *context.Context) { ctx.ServerError("get new train-job info failed", err) return } - ctx.HTML(http.StatusOK, tplCloudBrainTrainJobNew) + ctx.HTML(http.StatusOK, tplCloudBrainTrainJobVersionNew) } func InferenceCloudBrainJobNew(ctx *context.Context) { diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index f4ab40f5ed..0c55067da9 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -220,9 +220,9 @@ func grampusTrainJobVersionNewDataPrepare(ctx *context.Context, processType stri ctx.Data["branches"] = branches } - ctx.Data["BranchName"] = ctx.Cloudbrain.BranchName - ctx.Data["ImageName"] = ctx.Cloudbrain.Image - ctx.Data["BootFile"] = ctx.Cloudbrain.BootFile + ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName + ctx.Data["image_name"] = ctx.Cloudbrain.Image + ctx.Data["bootfile"] = ctx.Cloudbrain.BootFile ctx.Data["description"] = ctx.Cloudbrain.Description spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) if spec != nil { @@ -244,16 +244,17 @@ func grampusTrainJobVersionNewDataPrepare(ctx *context.Context, processType stri ctx.Data["dataset_name"] = datasetNames } ctx.Data["uuid"] = ctx.Cloudbrain.Uuid - ctx.Data["ComputeResource"] = ctx.Cloudbrain.ComputeResource + ctx.Data["cloudbrain_type"] = models.C2NetCluster + ctx.Data["compute_resource"] = ctx.Cloudbrain.ComputeResource if processType == grampus.ProcessorTypeGPU { - ctx.Data["datasetType"] = models.TypeCloudBrainOne + ctx.Data["dataset_type"] = models.TypeCloudBrainOne waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain) - ctx.Data["WaitCount"] = waitCount + ctx.Data["wait_count"] = waitCount } else if processType == grampus.ProcessorTypeNPU { - ctx.Data["datasetType"] = models.TypeCloudBrainTwo + ctx.Data["dataset_type"] = models.TypeCloudBrainTwo waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.NPUResource, models.JobTypeTrain) - ctx.Data["WaitCount"] = waitCount + ctx.Data["wait_count"] = waitCount ctx.Data["work_server_number"] = ctx.Cloudbrain.WorkServerNumber } diff --git a/templates/repo/cloudbrain/trainjob/versionnew.tmpl b/templates/repo/cloudbrain/trainjob/versionnew.tmpl new file mode 100644 index 0000000000..e69de29bb2 -- 2.34.1 From 601b7873b9f01bf41579a31ccec8a89b74092b58 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Wed, 14 Sep 2022 12:19:55 +0800 Subject: [PATCH 08/57] fix issue --- templates/custom/select_model.tmpl | 37 +++++ templates/repo/cloudbrain/trainjob/new.tmpl | 4 +- templates/repo/grampus/trainjob/gpu/new.tmpl | 3 +- templates/repo/grampus/trainjob/npu/new.tmpl | 2 + templates/repo/modelarts/trainjob/new.tmpl | 6 +- .../repo/modelarts/trainjob/version_new.tmpl | 4 +- web_src/js/features/cloudbrainShow.js | 130 ++++++++++++++++++ 7 files changed, 178 insertions(+), 8 deletions(-) create mode 100644 templates/custom/select_model.tmpl diff --git a/templates/custom/select_model.tmpl b/templates/custom/select_model.tmpl new file mode 100644 index 0000000000..81332b8732 --- /dev/null +++ b/templates/custom/select_model.tmpl @@ -0,0 +1,37 @@ + +
+   +
+ +
+
+ +
+
+ + +
+ + + + +
\ No newline at end of file diff --git a/templates/repo/cloudbrain/trainjob/new.tmpl b/templates/repo/cloudbrain/trainjob/new.tmpl index 709490ac10..b2cff22cc6 100755 --- a/templates/repo/cloudbrain/trainjob/new.tmpl +++ b/templates/repo/cloudbrain/trainjob/new.tmpl @@ -70,7 +70,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -168,7 +168,7 @@ {{end}}

- + {{template "custom/select_model" .}} - + {{template "custom/select_model" .}}
diff --git a/templates/repo/grampus/trainjob/npu/new.tmpl b/templates/repo/grampus/trainjob/npu/new.tmpl index 88a41779e2..a11d84bb32 100755 --- a/templates/repo/grampus/trainjob/npu/new.tmpl +++ b/templates/repo/grampus/trainjob/npu/new.tmpl @@ -57,6 +57,7 @@
{{template "repo/header" .}}
+ {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -149,6 +150,7 @@ {{end}}

+ {{template "custom/select_model" .}}
- - + + {{template "custom/select_model" .}}
diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl index c95f5699a7..ffc1045e8d 100644 --- a/templates/repo/modelarts/trainjob/version_new.tmpl +++ b/templates/repo/modelarts/trainjob/version_new.tmpl @@ -55,7 +55,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -154,7 +154,7 @@

- + {{template "custom/select_model" .}}
diff --git a/web_src/js/features/cloudbrainShow.js b/web_src/js/features/cloudbrainShow.js index 73a8ed7b60..ac1263b9d0 100644 --- a/web_src/js/features/cloudbrainShow.js +++ b/web_src/js/features/cloudbrainShow.js @@ -443,4 +443,134 @@ export default async function initCloudrainSow() { html += "
"; $(`#dir_list${version_name}`).append(html); } + + let nameMap, nameList; + let RepoLink = $(".cloudbrain-type").data("repo-link"); + let type = $(".cloudbrain-type").data("cloudbrain-type"); + let flagModel = $(".cloudbrain-type").data("flag-model"); + // 获取模型列表和模型名称对应的模型版本 + $(document).ready(function () { + if (!flagModel) return; + else { + $.get( + `${RepoLink}/modelmanage/query_model_for_predict?type=${type}`, + (data) => { + nameMap = data.nameMap; + nameList = data.nameList; + let html = ""; + nameList.forEach((element) => { + html += `
${element}
`; + }); + if (nameList.length !== 0) { + $("#model_name").append(html); + } + let faildModelName = $('input[name="model_name"]').val(); + let faildModelVersion = $('input[name="model_version"]').val(); + let faildTrainUrl = $('input[name="pre_train_model_url"]').val(); + let faildCkptName = $('input[name="ckpt_name"]').val(); + // 新建错误的表单返回初始化 + if (faildModelName) { + $("#select_model").dropdown("set text", faildModelName); + $("#select_model").dropdown("set value", faildModelName); + $("#select_model_version").dropdown("set text", faildModelVersion); + $("#select_model_version").dropdown("set value", faildTrainUrl); + $("#select_model_checkpoint").dropdown("set text", faildCkptName); + $("#select_model_checkpoint").dropdown("set value", faildCkptName); + } + } + ); + } + $("#select_model").dropdown({ + onChange: function (value, text, $selectedItem) { + $("#model_name_version").empty(); + let html = ""; + nameMap[value].forEach((element) => { + let { TrainTaskInfo } = element; + TrainTaskInfo = JSON.parse(TrainTaskInfo); + html += `
${element.Version}
`; + }); + $("#model_name_version").append(html); + const initVersionText = $( + "#model_name_version div.item:first-child" + ).text(); + const initVersionValue = $( + "#model_name_version div.item:first-child" + ).data("value"); + + $("#select_model_version").dropdown("set text", initVersionText); + $("#select_model_version").dropdown( + "set value", + initVersionValue, + initVersionText, + $("#model_name_version div.item:first-child") + ); + }, + }); + + $("#select_model_version").dropdown({ + onChange: function (value, text, $selectedItem) { + const dataID = + $selectedItem && $selectedItem[0].getAttribute("data-id"); + $("input#ai_model_version").val(text); + $("#select_model_checkpoint").addClass("loading"); + $("#model_checkpoint").empty(); + let html = ""; + loadCheckpointList(dataID).then((res) => { + res.forEach((element) => { + const ckptSuffix = element.FileName.split("."); + const loadCheckpointFile = [ + "ckpt", + "pb", + "h5", + "json", + "pkl", + "pth", + "t7", + "pdparams", + "onnx", + "pbtxt", + "keras", + "mlmodel", + "cfg", + "pt", + ]; + if ( + !element.IsDir && + loadCheckpointFile.includes(ckptSuffix[ckptSuffix.length - 1]) + ) { + html += `
${element.FileName}
`; + } + }); + $("#model_checkpoint").append(html); + $("#select_model_checkpoint").removeClass("loading"); + const initVersionText = $( + "#model_checkpoint div.item:first-child" + ).text(); + const initVersionValue = $( + "#model_checkpoint div.item:first-child" + ).data("value"); + + $("#select_model_checkpoint").dropdown("set text", initVersionText); + $("#select_model_checkpoint").dropdown( + "set value", + initVersionValue, + initVersionText, + $("#model_name_version div.item:first-child") + ); + }); + }, + }); + }); + + function loadCheckpointList(value) { + return new Promise((resolve, reject) => { + $.get( + `${RepoLink}/modelmanage/query_modelfile_for_predict`, + { ID: value }, + (data) => { + resolve(data); + } + ); + }); + } } -- 2.34.1 From 4752033910ed48ad3c6a9fe3cccce61574468f92 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 14 Sep 2022 15:07:53 +0800 Subject: [PATCH 09/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index ed869e76e7..dee8fbdd19 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -887,6 +887,10 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo } } + if pretrainModelFileName != "" { + paramCode += " --pretrainmodelname" + "=" + pretrainModelFileName + } + var commandCode string if processorType == grampus.ProcessorTypeNPU { commandCode = "/bin/bash /home/work/run_train_for_openi.sh " + workDir + "code/" + strings.ToLower(repoName) + "/" + bootFile + " /tmp/log/train.log" + paramCode + ";" -- 2.34.1 From cc8b76eacaab2f3e9a6281c9c0d69ea1a591d5c1 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 14 Sep 2022 15:25:41 +0800 Subject: [PATCH 10/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/modelarts/modelarts.go | 5 +++++ routers/repo/modelarts.go | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index f35601191f..5f318a5462 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -598,6 +598,11 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job CreatedUnix: createTime, UpdatedUnix: createTime, Spec: req.Spec, + ModelName: req.ModelName, + ModelVersion: req.ModelVersion, + LabelName: req.LabelName, + PreTrainModelUrl: req.PreTrainModelUrl, + CkptName: req.CkptName, }) if createErr != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error()) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index cb4b2c1cc5..13ae93dcf3 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -1656,6 +1656,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ }) } + if form.ModelName != "" { //使用预训练模型训练 + ckptUrl := "/" + form.PreTrainModelUrl + form.CkptName + param = append(param, models.Parameter{ + Label: modelarts.CkptUrl, + Value: "s3:/" + ckptUrl, + }) + } + // //save param config // if isSaveParam == "on" { // saveparams := append(param, models.Parameter{ @@ -1730,6 +1738,15 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ DatasetName: datasetNames, Spec: spec, } + + if form.ModelName != "" { //使用预训练模型训练 + req.ModelName = form.ModelName + req.LabelName = form.LabelName + req.CkptName = form.CkptName + req.ModelVersion = form.ModelVersion + req.PreTrainModelUrl = form.PreTrainModelUrl + + } userCommand, userImageUrl := getUserCommand(engineID, req) req.UserCommand = userCommand req.UserImageUrl = userImageUrl -- 2.34.1 From 8187c87e3df47528dc93faf0e80f06efc24eecaa Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 14 Sep 2022 15:35:48 +0800 Subject: [PATCH 11/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 3cdd59c5cb..76f33bf81f 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -32,7 +32,7 @@ var ( SpecialPools *models.SpecialPools - CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/" + setting.Grampus.SyncScriptProject + "/archive/master.zip;" + + CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget https://git.openi.org.cn/OpenIOSSG/" + setting.Grampus.SyncScriptProject + "/archive/master.zip;" + "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" ) -- 2.34.1 From fee45b7b0044951764da40fc36b7e290cefb50ce Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 14 Sep 2022 15:51:08 +0800 Subject: [PATCH 12/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 2 +- routers/repo/grampus.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 76f33bf81f..41bceddd76 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -32,7 +32,7 @@ var ( SpecialPools *models.SpecialPools - CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget https://git.openi.org.cn/OpenIOSSG/" + setting.Grampus.SyncScriptProject + "/archive/master.zip;" + + CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget https://git.openi.org.cn/OpenIOSSG/%s/archive/master.zip;" + "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" ) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index dee8fbdd19..831551841d 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -841,7 +841,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo workDir = grampus.GpuWorkDir } - command += "pwd;cd " + workDir + grampus.CommandPrepareScript + command += "pwd;cd " + workDir + fmt.Sprint(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject) //download code & dataset if processorType == grampus.ProcessorTypeNPU { commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" -- 2.34.1 From c94dd366d1b3edea897c7548414ce1c9ab18a85f Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 14 Sep 2022 16:00:25 +0800 Subject: [PATCH 13/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 831551841d..81b1cf743c 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -841,7 +841,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo workDir = grampus.GpuWorkDir } - command += "pwd;cd " + workDir + fmt.Sprint(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject) + command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject) //download code & dataset if processorType == grampus.ProcessorTypeNPU { commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" -- 2.34.1 From f7677b1cb5e4224a602d3586f475531776e3e9cb Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 14 Sep 2022 16:09:47 +0800 Subject: [PATCH 14/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 2 +- routers/repo/grampus.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 41bceddd76..9ff2ed2123 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -33,7 +33,7 @@ var ( SpecialPools *models.SpecialPools CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget https://git.openi.org.cn/OpenIOSSG/%s/archive/master.zip;" + - "echo \"finish loading script\";unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" + "echo \"finish loading script\";unzip -q master.zip;cd %s;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" ) type GenerateTrainJobReq struct { diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 81b1cf743c..cae7be6222 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -841,7 +841,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo workDir = grampus.GpuWorkDir } - command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject) + command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject, setting.Grampus.SyncScriptProject) //download code & dataset if processorType == grampus.ProcessorTypeNPU { commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" -- 2.34.1 From de0b416509de28d148df5a07ed5a8e4b605e70e3 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 14 Sep 2022 16:27:53 +0800 Subject: [PATCH 15/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index cae7be6222..7c3472304c 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -844,7 +844,7 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject, setting.Grampus.SyncScriptProject) //download code & dataset if processorType == grampus.ProcessorTypeNPU { - commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "';" + commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "'" commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) command += commandDownload } else if processorType == grampus.ProcessorTypeGPU { -- 2.34.1 From 820016b1e2e1081f8699ce0ad1d05947aa03bbc3 Mon Sep 17 00:00:00 2001 From: liuzx Date: Wed, 14 Sep 2022 16:40:44 +0800 Subject: [PATCH 16/57] add ckpt --- routers/repo/cloudbrain.go | 6 ++++++ routers/repo/grampus.go | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index bfa7cff355..7dd0ac1563 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -177,6 +177,12 @@ func cloudBrainVersionNewDataPrepare(ctx *context.Context) error { waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainOne, models.GPUResource, models.JobTypeTrain) ctx.Data["wait_count"] = waitCount + ctx.Data["model_name"] = ctx.Cloudbrain.ModelName + ctx.Data["label_name"] = ctx.Cloudbrain.LabelName + ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName + ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion + ctx.Data["pre_train_model_url"] = ctx.Cloudbrain.PreTrainModelUrl + return nil } diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index e55e492e3e..f0521db7da 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -258,6 +258,12 @@ func grampusTrainJobVersionNewDataPrepare(ctx *context.Context, processType stri ctx.Data["work_server_number"] = ctx.Cloudbrain.WorkServerNumber } + ctx.Data["model_name"] = ctx.Cloudbrain.ModelName + ctx.Data["label_name"] = ctx.Cloudbrain.LabelName + ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName + ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion + ctx.Data["pre_train_model_url"] = ctx.Cloudbrain.PreTrainModelUrl + return nil } -- 2.34.1 From 6d33ab7f5d959c37db6af36f2d1c22aa245ab38d Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Wed, 14 Sep 2022 17:27:15 +0800 Subject: [PATCH 17/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 7c3472304c..3c4cd2dc5c 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -906,10 +906,10 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo //upload models if processorType == grampus.ProcessorTypeNPU { - commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_npu " + setting.Bucket + " " + outputRemotePath + " " + workDir + "output/;" + commandUpload := "cd " + workDir + setting.Grampus.SyncScriptProject + "/;./uploader_for_npu " + setting.Bucket + " " + outputRemotePath + " " + workDir + "output/;" command += commandUpload } else if processorType == grampus.ProcessorTypeGPU { - commandUpload := "cd " + workDir + "script_for_grampus/;./uploader_for_gpu " + setting.Grampus.Env + " " + outputRemotePath + " " + workDir + "output/;" + commandUpload := "cd " + workDir + setting.Grampus.SyncScriptProject + "/;./uploader_for_gpu " + setting.Grampus.Env + " " + outputRemotePath + " " + workDir + "output/;" command += commandUpload } -- 2.34.1 From 78a8b817007f9ff8c1c12edd3b8f0f63df7e6583 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Wed, 14 Sep 2022 17:50:48 +0800 Subject: [PATCH 18/57] fix issue --- .../repo/cloudbrain/trainjob/versionnew.tmpl | 454 ++++++++++++++++++ .../repo/grampus/trainjob/npu/versionnew.tmpl | 426 ++++++++++++++++ templates/repo/modelarts/trainjob/index.tmpl | 17 +- 3 files changed, 896 insertions(+), 1 deletion(-) diff --git a/templates/repo/cloudbrain/trainjob/versionnew.tmpl b/templates/repo/cloudbrain/trainjob/versionnew.tmpl index e69de29bb2..b2cff22cc6 100644 --- a/templates/repo/cloudbrain/trainjob/versionnew.tmpl +++ b/templates/repo/cloudbrain/trainjob/versionnew.tmpl @@ -0,0 +1,454 @@ +{{template "base/head" .}} + +{{template "custom/global_mask" .}} +
+ {{template "repo/header" .}} +
+ + {{template "base/alert" .}} +

+ {{.i18n.Tr "repo.modelarts.train_job.new"}} +

+
+ +
+ {{.CsrfTokenHtml}} + + +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+ +
+ + +
+
+ + {{template "custom/task_wait_count" .}} +
+ + {{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/code" "/dataset" "/model" | Safe}} +
+
+
+ + + {{.i18n.Tr "repo.cloudbrain_jobname_err"}} +
+ +
+ + {{if .description}} + + {{else}} + + {{end}} +
+
+ +

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:

+ + +
+ + +
+ {{template "custom/select_model" .}} + + +
+ +
+ +
+ + {{if .boot_file}} + + {{else}} + + {{end}} + + + + {{.i18n.Tr "cloudbrain.view_sample"}} +
+
+ +
+ +
+ + {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} + +
+ +
+
+ + +
+ + +
+ +
+ + + {{.i18n.Tr "repo.cloudbrain.cancel"}} +
+ + + +
+
+
+
+{{template "base/footer" .}} + + diff --git a/templates/repo/grampus/trainjob/npu/versionnew.tmpl b/templates/repo/grampus/trainjob/npu/versionnew.tmpl index e69de29bb2..8ba1d07cab 100644 --- a/templates/repo/grampus/trainjob/npu/versionnew.tmpl +++ b/templates/repo/grampus/trainjob/npu/versionnew.tmpl @@ -0,0 +1,426 @@ +{{template "base/head" .}} + +{{template "custom/global_mask" .}} +
+ {{template "repo/header" .}} +
+ + {{template "base/alert" .}} +

+ {{.i18n.Tr "repo.modelarts.train_job.new"}} +

+
+ +
+ {{.CsrfTokenHtml}} + + + + + +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

+ +
+ + +
+
+ + {{template "custom/task_wait_count" .}} +
+ + {{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/cache/code" "/cache/dataset" "/cache/output" | Safe}} +
+
+
+ + + {{.i18n.Tr "repo.cloudbrain_jobname_err"}} +
+ +
+ + {{if .description}} + + {{else}} + + {{end}} +
+
+ +

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:

+ +
+ + +
+ {{template "custom/select_model" .}} +
+ + +
+
+ + {{if .boot_file}} + + {{else}} + + {{end}} + + + + {{.i18n.Tr "cloudbrain.view_sample"}} +
+ + {{template "custom/select_dataset_train" .}} + +
+ + {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} + +
+ +
+
+ + +
+ + +
+
+ + +
+ + +
+ +
+ +
+
+ +
+ + + {{.i18n.Tr "repo.cloudbrain.cancel"}} +
+ + + +
+
+
+
+{{template "base/footer" .}} + + + diff --git a/templates/repo/modelarts/trainjob/index.tmpl b/templates/repo/modelarts/trainjob/index.tmpl index 42c59ba4bf..0ac12982bb 100755 --- a/templates/repo/modelarts/trainjob/index.tmpl +++ b/templates/repo/modelarts/trainjob/index.tmpl @@ -109,7 +109,6 @@ {{range .Tasks}}
- + + +
+ {{$.CsrfTokenHtml}} + {{if .CanDel}} + + {{$.i18n.Tr "repo.modelarts.modify"}} + + {{else}} + + {{$.i18n.Tr "repo.modelarts.modify"}} + + {{end}} +
@@ -239,6 +253,7 @@ + + -- 2.34.1 From 1793aab66b2d7037e03719f202693ab846041d84 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Thu, 15 Sep 2022 18:20:56 +0800 Subject: [PATCH 30/57] fix issue --- templates/repo/grampus/trainjob/gpu/new.tmpl | 2 +- .../repo/grampus/trainjob/gpu/versionnew.tmpl | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/templates/repo/grampus/trainjob/gpu/new.tmpl b/templates/repo/grampus/trainjob/gpu/new.tmpl index 530be6b768..d71ed07362 100755 --- a/templates/repo/grampus/trainjob/gpu/new.tmpl +++ b/templates/repo/grampus/trainjob/gpu/new.tmpl @@ -62,7 +62,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} diff --git a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl index 530be6b768..56823f5579 100644 --- a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl +++ b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl @@ -62,7 +62,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -183,7 +183,22 @@ {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}
+ {{if ne 0 (len .params)}} + {{range $k ,$v := .params}} +
+
+ +
+
+ +
+ + + +
+ {{end}} + {{end}}

- + {{.CsrfTokenHtml}} diff --git a/templates/repo/grampus/trainjob/npu/new.tmpl b/templates/repo/grampus/trainjob/npu/new.tmpl index 1105e76387..69f28f47a3 100755 --- a/templates/repo/grampus/trainjob/npu/new.tmpl +++ b/templates/repo/grampus/trainjob/npu/new.tmpl @@ -57,7 +57,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} -- 2.34.1 From 19d2fc8c1ac7dcf8fe7b4aa4850b4b043d109e47 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Thu, 15 Sep 2022 19:08:11 +0800 Subject: [PATCH 32/57] fix issue --- templates/repo/grampus/trainjob/gpu/versionnew.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl index c3b531c463..2344318e56 100644 --- a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl +++ b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl @@ -74,7 +74,7 @@ - +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

-- 2.34.1 From 6ced72f8892f2a40b1f4a1e0023c6cb30ceb1fc6 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Thu, 15 Sep 2022 19:45:37 +0800 Subject: [PATCH 33/57] fix issue --- templates/repo/grampus/trainjob/gpu/new.tmpl | 5 ++--- templates/repo/grampus/trainjob/gpu/versionnew.tmpl | 2 +- templates/repo/grampus/trainjob/npu/new.tmpl | 4 +--- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/templates/repo/grampus/trainjob/gpu/new.tmpl b/templates/repo/grampus/trainjob/gpu/new.tmpl index b0b28e33dd..3de1d06275 100755 --- a/templates/repo/grampus/trainjob/gpu/new.tmpl +++ b/templates/repo/grampus/trainjob/gpu/new.tmpl @@ -62,7 +62,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -75,8 +75,7 @@ - - +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

diff --git a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl index 2344318e56..c3b531c463 100644 --- a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl +++ b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl @@ -74,7 +74,7 @@ - +

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

diff --git a/templates/repo/grampus/trainjob/npu/new.tmpl b/templates/repo/grampus/trainjob/npu/new.tmpl index 69f28f47a3..064c0c4231 100755 --- a/templates/repo/grampus/trainjob/npu/new.tmpl +++ b/templates/repo/grampus/trainjob/npu/new.tmpl @@ -57,7 +57,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -69,8 +69,6 @@ - -

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

-- 2.34.1 From 7055faf201eb4812d9e1bc96dbddb8db7608da8c Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Fri, 16 Sep 2022 09:34:33 +0800 Subject: [PATCH 34/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index ef7e55c654..83fc3b1d47 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -80,6 +80,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error centerID, centerName := getCentersParamter(ctx, req) + log.Info("grampus Command:" + req.Command) + jobResult, err := createJob(models.CreateGrampusJobRequest{ Name: req.JobName, Tasks: []models.GrampusTasks{ -- 2.34.1 From 3d7ff1b6063ffc59ab87860fff6a68c1ace7d750 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Fri, 16 Sep 2022 10:11:25 +0800 Subject: [PATCH 35/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 97beb638fc..a770406d30 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -988,11 +988,11 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo command += "pwd;cd " + workDir + fmt.Sprintf(grampus.CommandPrepareScript, setting.Grampus.SyncScriptProject, setting.Grampus.SyncScriptProject) //download code & dataset if processorType == grampus.ProcessorTypeNPU { - commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "'" + commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " '" + dataRemotePath + "' '" + datasetName + "'" commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) command += commandDownload } else if processorType == grampus.ProcessorTypeGPU { - commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " '" + datasetName + "'" + commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " '" + dataRemotePath + "' '" + datasetName + "'" commandDownload = processPretrainModelParameter(pretrainModelPath, pretrainModelFileName, commandDownload) command += commandDownload } -- 2.34.1 From 531ca27fe2e31169544e1c91aaaf53576b374ddb Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Fri, 16 Sep 2022 11:43:33 +0800 Subject: [PATCH 36/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 83fc3b1d47..a07aa49f80 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -32,7 +32,7 @@ var ( SpecialPools *models.SpecialPools - CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget -q https://git.openi.org.cn/OpenIOSSG/%s/archive/master.zip;" + + CommandPrepareScript = ";mkdir -p output;mkdir -p code;mkdir -p dataset;mkdir -p pretrainmodel;echo \"start loading script\";wget https://git.openi.org.cn/OpenIOSSG/%s/archive/master.zip;" + "echo \"finish loading script\";unzip -q master.zip;cd %s;chmod 777 downloader_for_obs uploader_for_npu downloader_for_minio uploader_for_gpu;" ) -- 2.34.1 From 2bebc01193058ff330318b27518683c8dcb519e2 Mon Sep 17 00:00:00 2001 From: liuzx Date: Fri, 16 Sep 2022 15:51:25 +0800 Subject: [PATCH 37/57] fix-bug --- routers/repo/cloudbrain.go | 7 ++++ routers/repo/grampus.go | 73 ++++++++++++++++++++++---------------- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index b8d421ac6f..ad4cc8130c 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -157,6 +157,7 @@ func cloudBrainVersionNewDataPrepare(ctx *context.Context) error { log.Info("spec_id = %d", spec.ID) ctx.Data["spec_id"] = spec.ID } + prepareCloudbrainOneSpecs(ctx) var Parameters modelarts.Parameters if err := json.Unmarshal([]byte(ctx.Cloudbrain.Parameters), &Parameters); err != nil { ctx.ServerError("json.Unmarshal failed:", err) @@ -248,6 +249,12 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { tpl = tplCloudBrainTrainJobNew } + var jobID = ctx.Params(":jobid") + log.Info("jobIDTest= %s", jobID) + // if jobID != "" { + // tpl = tplCloudBrainTrainJobVersionNew + // } + tasks, err := models.GetCloudbrainsByDisplayJobName(repo.ID, jobType, displayJobName) if err == nil { if len(tasks) != 0 { diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 97beb638fc..0e8b457bdc 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -343,10 +343,16 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" branchName := form.BranchName image := strings.TrimSpace(form.Image) + tpl := tplGrampusTrainJobGPUNew + + var jobID = ctx.Params(":jobid") + if jobID != "" { + tpl = tplGrampusTrainJobGPUVersionNew + } if !jobNamePattern.MatchString(displayJobName) { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } @@ -354,14 +360,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil || !bootFileExist { log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) return } errStr := checkSpecialPool(ctx, "GPU") if errStr != "" { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(errStr, tpl, &form) return } @@ -370,13 +376,13 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) return } } @@ -385,7 +391,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := grampusParamCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -395,14 +401,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr("system error", tpl, &form) return } } @@ -415,7 +421,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain }) if err != nil || spec == nil { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr("Resource specification not available", tpl, &form) return } @@ -425,7 +431,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -438,7 +444,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) return } @@ -447,7 +453,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) return } @@ -455,7 +461,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := mkModelPath(modelPath); err != nil { log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) return } @@ -463,7 +469,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) return } @@ -486,7 +492,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr("Create task failed, internal error", tpl, &form) return } @@ -528,7 +534,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) - ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form) + ctx.RenderWithErr(err.Error(), tpl, &form) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") @@ -595,10 +601,17 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain isLatestVersion := modelarts.IsLatestVersion versionCount := modelarts.VersionCountOne engineName := form.EngineName + tpl := tplGrampusTrainJobNPUNew + + //判断路由是否存在jobID,若存在,则说明是创建版本 + var jobID = ctx.Params(":jobid") + if jobID != "" { + tpl = tplGrampusTrainJobNPUVersionNew + } if !jobNamePattern.MatchString(displayJobName) { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tpl, &form) return } @@ -606,7 +619,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil || !bootFileExist { log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tpl, &form) return } @@ -622,13 +635,13 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil { log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr("system error", tpl, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) return } } @@ -637,7 +650,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := grampusParamCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(err.Error(), tpl, &form) return } @@ -647,14 +660,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr("the job name did already exist", tpl, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr("system error", tpl, &form) return } } @@ -667,7 +680,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain }) if err != nil || spec == nil { grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr("Resource specification not available", tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr("Resource specification not available", tpl, &form) return } @@ -676,7 +689,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil { log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } @@ -689,7 +702,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) return } @@ -697,14 +710,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) return } if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) return } @@ -726,7 +739,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil { log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr("Create task failed, internal error", tpl, &form) return } @@ -769,7 +782,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) - ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form) + ctx.RenderWithErr(err.Error(), tpl, &form) return } ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") -- 2.34.1 From dc941b6b206abb9c61564784b4be263c639554e0 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Mon, 19 Sep 2022 09:36:06 +0800 Subject: [PATCH 38/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/cloudbrain.go | 69 +++++++------------ routers/repo/grampus.go | 138 ++++++++----------------------------- 2 files changed, 53 insertions(+), 154 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index ad4cc8130c..6fe9040bc9 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -139,51 +139,34 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { ctx.Data["benchmarkMode"] = ctx.Query("benchmarkMode") - return nil -} + if ctx.Cloudbrain != nil { + ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName + ctx.Data["image_name"] = ctx.Cloudbrain.Image + ctx.Data["image_id"] = ctx.Cloudbrain.ImageID + ctx.Data["boot_file"] = ctx.Cloudbrain.BootFile + ctx.Data["description"] = ctx.Cloudbrain.Description + spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) + if spec != nil { + ctx.Data["spec_id"] = spec.ID + } + ctx.Data["run_para_list"] = ctx.Cloudbrain.Parameters + ctx.Data["model_name"] = ctx.Cloudbrain.ModelName + ctx.Data["label_name"] = ctx.Cloudbrain.LabelName + ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName + ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion + ctx.Data["pre_train_model_url"] = ctx.Cloudbrain.PreTrainModelUrl + ctx.Data["compute_resource"] = ctx.Cloudbrain.ComputeResource + ctx.Data["attachment"] = ctx.Cloudbrain.Uuid + ctx.Data["cluster_type"] = models.OpenICluster + _, _, datasetNames, _, err := getDatasUrlListByUUIDS(ctx.Cloudbrain.Uuid) + if err != nil { + log.Info("query dataset error," + err.Error()) + ctx.Data["dataset_name"] = "" + } else { + ctx.Data["dataset_name"] = datasetNames + } -func cloudBrainVersionNewDataPrepare(ctx *context.Context) error { - ctx.Data["PageIsCloudBrain"] = true - t := time.Now() - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] - ctx.Data["display_job_name"] = displayJobName - ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName - ctx.Data["image_name"] = ctx.Cloudbrain.Image - ctx.Data["image_id"] = ctx.Cloudbrain.ImageID - ctx.Data["boot_file"] = ctx.Cloudbrain.BootFile - ctx.Data["description"] = ctx.Cloudbrain.Description - spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) - if spec != nil { - log.Info("spec_id = %d", spec.ID) - ctx.Data["spec_id"] = spec.ID - } - prepareCloudbrainOneSpecs(ctx) - var Parameters modelarts.Parameters - if err := json.Unmarshal([]byte(ctx.Cloudbrain.Parameters), &Parameters); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err } - ctx.Data["params"] = Parameters.Parameter - - _, _, datasetNames, _, err := getDatasUrlListByUUIDS(ctx.Cloudbrain.Uuid) - if err != nil { - log.Info("query dataset error," + err.Error()) - ctx.Data["dataset_name"] = "" - } else { - ctx.Data["dataset_name"] = datasetNames - } - ctx.Data["uuid"] = ctx.Cloudbrain.Uuid - ctx.Data["cluster_type"] = models.OpenICluster - ctx.Data["compute_resource"] = ctx.Cloudbrain.ComputeResource - ctx.Data["dataset_type"] = models.TypeCloudBrainOne - waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainOne, models.GPUResource, models.JobTypeTrain) - ctx.Data["wait_count"] = waitCount - - ctx.Data["model_name"] = ctx.Cloudbrain.ModelName - ctx.Data["label_name"] = ctx.Cloudbrain.LabelName - ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName - ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion - ctx.Data["pre_train_model_url"] = ctx.Cloudbrain.PreTrainModelUrl return nil } diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index a64a629bdc..fc4cca5160 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -137,20 +137,45 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["WaitCount"] = waitCount } + if ctx.Cloudbrain != nil { + ctx.Data["attachment"] = ctx.Cloudbrain.Uuid + ctx.Data["boot_file"] = ctx.Cloudbrain.BootFile + ctx.Data["image_id"] = ctx.Cloudbrain.ImageID + ctx.Data["run_para_list"] = ctx.Cloudbrain.Parameters + ctx.Data["description"] = ctx.Cloudbrain.Description + ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName + ctx.Data["engine_name"] = ctx.Cloudbrain.EngineName + ctx.Data["WorkServerNumber"] = ctx.Cloudbrain.WorkServerNumber + ctx.Data["image"] = ctx.Cloudbrain.Image + ctx.Data["dataset_name"] = ctx.Cloudbrain.DatasetName + ctx.Data["model_name"] = ctx.Cloudbrain.ModelName + + ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion + ctx.Data["dataset_name"] = ctx.Cloudbrain.DatasetName + ctx.Data["model_name"] = ctx.Cloudbrain.ModelName + ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName + ctx.Data["label_names"] = ctx.Cloudbrain.LabelName + ctx.Data["PreTrainModelUrl"] = ctx.Cloudbrain.PreTrainModelUrl + spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) + if spec != nil { + ctx.Data["spec_id"] = spec.ID + } + + } return nil } func GrampusTrainJobVersionNew(ctx *context.Context) { task := ctx.Cloudbrain if task.ComputeResource == models.GPUResource { - err := grampusTrainJobVersionNewDataPrepare(ctx, grampus.ProcessorTypeGPU) + err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) if err != nil { ctx.ServerError("get new train-job version info failed", err) return } ctx.HTML(http.StatusOK, tplGrampusTrainJobGPUVersionNew) } else if task.ComputeResource == models.NPUResource { - err := grampusTrainJobVersionNewDataPrepare(ctx, grampus.ProcessorTypeNPU) + err := grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) if err != nil { ctx.ServerError("get new train-job version info failed", err) return @@ -159,115 +184,6 @@ func GrampusTrainJobVersionNew(ctx *context.Context) { } } -func grampusTrainJobVersionNewDataPrepare(ctx *context.Context, processType string) error { - ctx.Data["PageIsCloudBrain"] = true - - t := time.Now() - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] - ctx.Data["display_job_name"] = displayJobName - - //get valid images - images, err := grampus.GetImages(processType) - if err != nil { - log.Error("GetImages failed:", err.Error()) - } else { - ctx.Data["images"] = images.Infos - } - - grampus.InitSpecialPool() - - ctx.Data["GPUEnabled"] = true - ctx.Data["NPUEnabled"] = true - includeCenters := make(map[string]struct{}) - excludeCenters := make(map[string]struct{}) - if grampus.SpecialPools != nil { - for _, pool := range grampus.SpecialPools.Pools { - if pool.IsExclusive { - if !IsUserInOrgPool(ctx.User.ID, pool) { - ctx.Data[pool.Type+"Enabled"] = false - } - } else { - if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) { - if IsUserInOrgPool(ctx.User.ID, pool) { - for _, center := range pool.Pool { - includeCenters[center.Queue] = struct{}{} - } - } else { - for _, center := range pool.Pool { - excludeCenters[center.Queue] = struct{}{} - } - - } - - } - - } - } - } - - //prepare available specs - if processType == grampus.ProcessorTypeNPU { - prepareGrampusTrainSpecs(ctx, models.NPU) - } else if processType == grampus.ProcessorTypeGPU { - prepareGrampusTrainSpecs(ctx, models.GPU) - } - - //get branches - branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) - if err != nil { - log.Error("GetBranches error:", err.Error()) - } else { - ctx.Data["branches"] = branches - } - - ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName - ctx.Data["image_name"] = ctx.Cloudbrain.Image - ctx.Data["image_id"] = ctx.Cloudbrain.ImageID - ctx.Data["boot_file"] = ctx.Cloudbrain.BootFile - ctx.Data["description"] = ctx.Cloudbrain.Description - spec, _ := resource.GetCloudbrainSpec(ctx.Cloudbrain.ID) - if spec != nil { - log.Info("spec_id = %d", spec.ID) - ctx.Data["spec_id"] = spec.ID - } - var Parameters modelarts.Parameters - if err = json.Unmarshal([]byte(ctx.Cloudbrain.Parameters), &Parameters); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["params"] = Parameters.Parameter - - _, _, datasetNames, _, err := getDatasUrlListByUUIDS(ctx.Cloudbrain.Uuid) - if err != nil { - log.Info("query dataset error," + err.Error()) - ctx.Data["dataset_name"] = "" - } else { - ctx.Data["dataset_name"] = datasetNames - } - ctx.Data["uuid"] = ctx.Cloudbrain.Uuid - ctx.Data["cloudbrain_type"] = models.C2NetCluster - ctx.Data["compute_resource"] = ctx.Cloudbrain.ComputeResource - - if processType == grampus.ProcessorTypeGPU { - ctx.Data["dataset_type"] = models.TypeCloudBrainOne - waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.GPUResource, models.JobTypeTrain) - ctx.Data["wait_count"] = waitCount - } else if processType == grampus.ProcessorTypeNPU { - ctx.Data["dataset_type"] = models.TypeCloudBrainTwo - waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeC2Net, models.NPUResource, models.JobTypeTrain) - ctx.Data["wait_count"] = waitCount - ctx.Data["work_server_number"] = ctx.Cloudbrain.WorkServerNumber - } - - ctx.Data["model_name"] = ctx.Cloudbrain.ModelName - ctx.Data["label_name"] = ctx.Cloudbrain.LabelName - ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName - ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion - ctx.Data["pre_train_model_url"] = ctx.Cloudbrain.PreTrainModelUrl - - return nil -} - func prepareGrampusTrainSpecs(ctx *context.Context, computeResource string) { noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ JobType: models.JobTypeTrain, -- 2.34.1 From e2144b0c909f592bc4cc12f0c6ba2f225ba208e5 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Mon, 19 Sep 2022 09:48:55 +0800 Subject: [PATCH 39/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/cloudbrain.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 6fe9040bc9..ba4258352b 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -2600,7 +2600,7 @@ func CloudBrainTrainJobNew(ctx *context.Context) { } func CloudBrainTrainJobVersionNew(ctx *context.Context) { - err := cloudBrainVersionNewDataPrepare(ctx) + err := cloudBrainNewDataPrepare(ctx) if err != nil { ctx.ServerError("get new train-job info failed", err) return -- 2.34.1 From 32eb6af78a520d886c2fe2371bc48e23c1775838 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Mon, 19 Sep 2022 10:14:17 +0800 Subject: [PATCH 40/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/modelarts.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 13ae93dcf3..d1a54f45a5 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -977,12 +977,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.Data["spec_id"] = spec.ID } - var Parameters modelarts.Parameters - if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["params"] = Parameters.Parameter + ctx.Data["run_para_list"] = task.Parameters branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) if err != nil { @@ -1005,7 +1000,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { ctx.Data["work_server_number"] = task.WorkServerNumber ctx.Data["flavor_name"] = task.FlavorName ctx.Data["engine_name"] = task.EngineName - ctx.Data["uuid"] = task.Uuid + ctx.Data["attachment"] = task.Uuid ctx.Data["flavor_code"] = task.FlavorCode ctx.Data["engine_id"] = task.EngineID ctx.Data["datasetType"] = models.TypeCloudBrainTwo -- 2.34.1 From e24f5830187a50431cc46770fd1c237b88bd67ed Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Mon, 19 Sep 2022 10:20:14 +0800 Subject: [PATCH 41/57] fix issue --- templates/repo/grampus/trainjob/gpu/new.tmpl | 24 +--------- .../repo/grampus/trainjob/gpu/versionnew.tmpl | 46 ++----------------- templates/repo/grampus/trainjob/npu/new.tmpl | 5 +- .../repo/grampus/trainjob/npu/versionnew.tmpl | 42 +---------------- .../js/components/dataset/selectDataset.vue | 7 +++ 5 files changed, 17 insertions(+), 107 deletions(-) diff --git a/templates/repo/grampus/trainjob/gpu/new.tmpl b/templates/repo/grampus/trainjob/gpu/new.tmpl index 3de1d06275..670bedd367 100755 --- a/templates/repo/grampus/trainjob/gpu/new.tmpl +++ b/templates/repo/grampus/trainjob/gpu/new.tmpl @@ -62,7 +62,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -185,27 +185,7 @@

- +
diff --git a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl index c3b531c463..53dbf51feb 100644 --- a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl +++ b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl @@ -62,7 +62,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -75,8 +75,6 @@ - -

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

@@ -183,45 +181,10 @@ {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}
- {{if ne 0 (len .params)}} - {{range $k ,$v := .params}} -
-
- -
-
- -
- - - - -
- {{end}} - {{end}} +
- +
@@ -258,12 +221,11 @@ .tab(); - $(document).ready(function(){ + $(document).ready(function(){ let params = $('.dynamic.field').data('params') params&¶ms.parameter.forEach((item,index)=>{ Add_parameter(index,flag=true,item) }) - }) // 参数增加、删除、修改、保存 function Add_parameter(i,flag=false,paramsObject={}) { diff --git a/templates/repo/grampus/trainjob/npu/new.tmpl b/templates/repo/grampus/trainjob/npu/new.tmpl index 064c0c4231..34f56bb49d 100755 --- a/templates/repo/grampus/trainjob/npu/new.tmpl +++ b/templates/repo/grampus/trainjob/npu/new.tmpl @@ -57,7 +57,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -268,12 +268,11 @@ .tab(); - $(document).ready(function(){ + $(document).ready(function(){ let params = $('.dynamic.field').data('params') params&¶ms.parameter.forEach((item,index)=>{ Add_parameter(index,flag=true,item) }) - }) // 参数增加、删除、修改、保存 function Add_parameter(i,flag=false,paramsObject={}) { diff --git a/templates/repo/grampus/trainjob/npu/versionnew.tmpl b/templates/repo/grampus/trainjob/npu/versionnew.tmpl index f7126c3c30..8675fa7da8 100644 --- a/templates/repo/grampus/trainjob/npu/versionnew.tmpl +++ b/templates/repo/grampus/trainjob/npu/versionnew.tmpl @@ -57,7 +57,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -69,8 +69,6 @@ - -

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

@@ -193,46 +191,10 @@ {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}
- {{if ne 0 (len .params)}} - {{range $k ,$v := .params}} -
-
- -
-
- -
- - - - -
- {{end}} - {{end}} +
-
diff --git a/web_src/js/components/dataset/selectDataset.vue b/web_src/js/components/dataset/selectDataset.vue index fc573dc7f5..77d54033f5 100755 --- a/web_src/js/components/dataset/selectDataset.vue +++ b/web_src/js/components/dataset/selectDataset.vue @@ -21,6 +21,11 @@ v-if="confirmDatasetList && confirmFlag" > +
Date: Mon, 19 Sep 2022 10:34:05 +0800 Subject: [PATCH 42/57] fix issue --- templates/repo/grampus/trainjob/npu/new.tmpl | 21 --- templates/repo/modelarts/trainjob/new.tmpl | 151 ++++-------------- .../repo/modelarts/trainjob/version_new.tmpl | 136 +++++----------- 3 files changed, 67 insertions(+), 241 deletions(-) diff --git a/templates/repo/grampus/trainjob/npu/new.tmpl b/templates/repo/grampus/trainjob/npu/new.tmpl index 34f56bb49d..a4108056d0 100755 --- a/templates/repo/grampus/trainjob/npu/new.tmpl +++ b/templates/repo/grampus/trainjob/npu/new.tmpl @@ -197,27 +197,6 @@
-
diff --git a/templates/repo/modelarts/trainjob/new.tmpl b/templates/repo/modelarts/trainjob/new.tmpl index 8d429f4962..4acbdfd27f 100755 --- a/templates/repo/modelarts/trainjob/new.tmpl +++ b/templates/repo/modelarts/trainjob/new.tmpl @@ -62,7 +62,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -216,22 +216,6 @@ {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}}
- {{if ne 0 (len .params)}} - {{range $k ,$v := .params}} -
-
- -
-
- -
- - - - -
- {{end}} - {{end}}

@@ -260,27 +244,6 @@
-
@@ -353,25 +316,38 @@ case 13:return false; } }); + $(document).ready(function(){ + let params = $('.dynamic.field').data('params') + params&¶ms.parameter.forEach((item,index)=>{ + Add_parameter(index,flag=true,item) + }) + }) // 参数增加、删除、修改、保存 - function Add_parameter(i) { + function Add_parameter(i,flag=false,paramsObject={}) { + let value = '' + value += `
` + value += '
' let placeholder_value='{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}' let placeholder_name='{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}' - value = '
' + - '
' + - '' + - '
' + - '
' + - '' + - '
' + - '' + - '' + - '' + - '' + - '
' + if(flag){ + value +=`` + }else{ + value +='' + } + value += '
' + value += '
' + if(flag){ + value +=`` + }else{ + value +='' + } + value += '
' + value += '' + value += '
' $(".dynamic.field").append(value) - } + } + $('#add_run_para').click(function () { var len = $(".dynamic.field .two.fields").length Add_parameter(len) @@ -387,80 +363,13 @@ }) }); - $('.ui.parameter.green.button').click(function () { - var parameters = []; - $('table tr').each(function () { - $(this).find('td:eq(1)').each(function () { - parameters.push($(this).text()); - }) - $(this).find('input').each(function () { - parameters.push($(this).text()) - }) - }); - $('.ui.parameter.modal') - .modal('hide'); - for (var i = 2; i < parameters.length; i++) { - switch (i) { - // 数据集uuid待完成 - // case (2): - // console.log(1) - // break; - // $("#trainjob_datasets").val(parameters[i]); - // console.log($("#trainjob_datasets").val()) - case (3): - $("input[name='boot_file']").val(parameters[i]); - break; - case (4): - var para = parameters[i].split(" ") - for (var j = 0; j < para.length; j++) { - var para_name = para[j].split('=')[0] - var para_value = para[j].split('=')[1] - var len = $(".dynamic.field .two.fields").length - Add_parameter(len) - var pid = 'para' + len - $(".dynamic.field" + " #" + pid + "").find("input[name=shipping_first-name]").val(para_name) - $(".dynamic.field" + " #" + pid + "").find("input[name=shipping_last-name]").val(para_value) - } - break; - // 数据集pool_id待完成 - // case (5): - // $("select[name='pool_id']").val(parameters[i]); - // break; - case (6): - // $("input[name='work_server_number']").val(parameters[i]); - break; - } - } - }) - - $('.ui.save.checkbox').click(function () { - $(this).checkbox({ - onChange: function () { - if ($('.ui.save.checkbox').checkbox('is checked')) { - $('#save_para').removeClass("disabled") - - } else { - $('#save_para').addClass("disabled") - } - } - }); - }) $('.question.circle.icon').hover(function () { $(this).popup('show') }); - $(".item.active.parameter_config").click(function () { - $('.ui.parameter.modal') - .modal('setting', 'closable', false) - .modal('show'); - }) - $('.ui.deny.button').click(function () { - $('.ui.parameter.modal') - .modal('hide'); - }) $('select.dropdown') .dropdown(); @@ -528,8 +437,8 @@ var run_parameters = [] var msg = {} $(".dynamic.field .two.fields").each(function () { - var para_name = $(this).find('input[name=shipping_first-name]').val() - var para_value = $(this).find('input[name=shipping_last-name]').val() + var para_name = $(this).find('input.shipping_first-name').val() + var para_value = $(this).find('input.shipping_last-name').val() run_parameters.push({ "label": para_name, "value": para_value }) }) msg["parameter"] = run_parameters diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl index ffc1045e8d..6d23831140 100644 --- a/templates/repo/modelarts/trainjob/version_new.tmpl +++ b/templates/repo/modelarts/trainjob/version_new.tmpl @@ -202,23 +202,8 @@ {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} -
- {{if ne 0 (len .params)}} - {{range $k ,$v := .params}} -
-
- -
-
- -
- - - - -
- {{end}} - {{end}} +
+
@@ -320,100 +305,53 @@ } }) + $(document).ready(function(){ + let params = $('.dynamic.field').data('params') + params&¶ms.parameter.forEach((item,index)=>{ + Add_parameter(index,flag=true,item) + }) + }) // 参数增加、删除、修改、保存 - function Add_parameter(i){ + function Add_parameter(i,flag=false,paramsObject={}) { + let value = '' + value += `
` + value += '
' let placeholder_value='{{.i18n.Tr "repo.modelarts.train_job.parameter_value"}}' let placeholder_name='{{.i18n.Tr "repo.modelarts.train_job.parameter_name"}}' - value = '
' + - '
' + - '' + - '
' + - '
' + - '' + - '
'+ - '' + - '' + - '' + - '' + - '
' + if(flag){ + value +=`` + }else{ + value +='' + } + value += '
' + value += '
' + if(flag){ + value +=`` + }else{ + value +='' + } + value += '
' + value += '' + value += '
' $(".dynamic.field").append(value) - } - $('#add_run_para').click(function(){ + } + + $('#add_run_para').click(function () { var len = $(".dynamic.field .two.fields").length Add_parameter(len) }); - $(".dynamic.field").on("click",".trash.icon", function() { + $(".dynamic.field").on("click", ".trash.icon", function () { var index = $(this).parent().parent().index() $(this).parent().parent().remove() var len = $(".dynamic.field .two.fields").length - $(".dynamic.field .two.fields").each(function(){ + $(".dynamic.field .two.fields").each(function () { var cur_index = $(this).index() $(this).attr('id', 'para' + cur_index) }) }); - $('.ui.parameter.green.button').click(function(){ - var parameters = []; - $('table tr').each(function() { - $(this).find('td:eq(1)').each(function(){ - parameters.push($(this).text()); - }) - $(this).find('input').each(function(){ - parameters.push($(this).text()) - }) - - }); - $('.ui.parameter.modal') - .modal('hide'); - for(var i = 2; i < parameters.length; i++){ - switch(i) { - // 数据集uuid待完成 - // case (2): - // console.log(1) - // break; - // $("#trainjob_datasets").val(parameters[i]); - // console.log($("#trainjob_datasets").val()) - case (3): - $("input[name='boot_file']").val(parameters[i]); - break; - case (4): - var para = parameters[i].split(" ") - for(var j = 0; j < para.length; j++){ - var para_name = para[j].split('=')[0] - var para_value = para[j].split('=')[1] - var len = $(".dynamic.field .two.fields").length - Add_parameter(len) - var pid = 'para' + len - $(".dynamic.field"+ " #" + pid + "").find("input[name=shipping_first-name]").val(para_name) - $(".dynamic.field"+ " #" + pid + "").find("input[name=shipping_last-name]").val(para_value) - } - break; - // 数据集pool_id待完成 - // case (5): - // $("select[name='pool_id']").val(parameters[i]); - // break; - case (6): - $("input[name='work_server_number']").val(parameters[i]); - break; - } - } - }) - - $('.ui.save.checkbox').click(function(){ - $(this).checkbox({ - onChange: function(){ - if ($('.ui.save.checkbox').checkbox('is checked')){ - $('#save_para').removeClass("disabled") - - }else{ - $('#save_para').addClass("disabled") - } - } - }); - }) - $('.question.circle.icon').hover(function(){ $(this).popup('show') }); @@ -497,13 +435,13 @@ document.getElementById("mask").style.display = "none" } } - function send_run_para(){ + function send_run_para() { var run_parameters = [] var msg = {} - $(".dynamic.field .two.fields").each(function(){ - var para_name = $(this).find('input[name=shipping_first-name]').val() - var para_value = $(this).find('input[name=shipping_last-name]').val() - run_parameters.push({"label": para_name, "value": para_value}) + $(".dynamic.field .two.fields").each(function () { + var para_name = $(this).find('input.shipping_first-name').val() + var para_value = $(this).find('input.shipping_last-name').val() + run_parameters.push({ "label": para_name, "value": para_value }) }) msg["parameter"] = run_parameters msg = JSON.stringify(msg) -- 2.34.1 From 26cbbbaf384845ce7fd47b23fb1962cf09acc180 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Mon, 19 Sep 2022 10:57:25 +0800 Subject: [PATCH 43/57] fix issue --- templates/repo/cloudbrain/trainjob/new.tmpl | 47 +------------------ .../repo/grampus/trainjob/gpu/versionnew.tmpl | 2 - templates/repo/modelarts/trainjob/new.tmpl | 2 +- .../repo/modelarts/trainjob/version_new.tmpl | 2 +- 4 files changed, 3 insertions(+), 50 deletions(-) diff --git a/templates/repo/cloudbrain/trainjob/new.tmpl b/templates/repo/cloudbrain/trainjob/new.tmpl index 427d32995c..6835eb5f2e 100755 --- a/templates/repo/cloudbrain/trainjob/new.tmpl +++ b/templates/repo/cloudbrain/trainjob/new.tmpl @@ -70,7 +70,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} @@ -176,28 +176,6 @@

-
@@ -232,29 +210,6 @@
-
diff --git a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl index 53dbf51feb..b9803d77f9 100644 --- a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl +++ b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl @@ -72,8 +72,6 @@ {{.CsrfTokenHtml}} - -

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

diff --git a/templates/repo/modelarts/trainjob/new.tmpl b/templates/repo/modelarts/trainjob/new.tmpl index 4acbdfd27f..e7bb8bf579 100755 --- a/templates/repo/modelarts/trainjob/new.tmpl +++ b/templates/repo/modelarts/trainjob/new.tmpl @@ -215,7 +215,7 @@ {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} -
+
diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl index 6d23831140..8b4e510805 100644 --- a/templates/repo/modelarts/trainjob/version_new.tmpl +++ b/templates/repo/modelarts/trainjob/version_new.tmpl @@ -55,7 +55,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} -- 2.34.1 From c793a94b0a8d7495cdb7efc53779181e83539643 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Mon, 19 Sep 2022 11:08:41 +0800 Subject: [PATCH 44/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/cloudbrain.go | 2 +- routers/repo/modelarts.go | 227 +++++-------------------------------- routers/routes/routes.go | 2 +- 3 files changed, 33 insertions(+), 198 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index ba4258352b..1cfe533d10 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -141,7 +141,7 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { if ctx.Cloudbrain != nil { ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName - ctx.Data["image_name"] = ctx.Cloudbrain.Image + ctx.Data["image"] = ctx.Cloudbrain.Image ctx.Data["image_id"] = ctx.Cloudbrain.ImageID ctx.Data["boot_file"] = ctx.Cloudbrain.BootFile ctx.Data["description"] = ctx.Cloudbrain.Description diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index d1a54f45a5..66260cfcc4 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -834,84 +834,6 @@ func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) { } } -func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error { - ctx.Data["PageIsCloudBrain"] = true - - //can, err := canUserCreateTrainJob(ctx.User.ID) - //if err != nil { - // ctx.ServerError("canUserCreateTrainJob", err) - // return - //} - // - //if !can { - // log.Error("the user can not create train-job") - // ctx.ServerError("the user can not create train-job", fmt.Errorf("the user can not create train-job")) - // return - //} - - t := time.Now() - var displayJobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] - ctx.Data["display_job_name"] = displayJobName - - attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) - if err != nil { - ctx.ServerError("GetAllUserAttachments failed:", err) - return err - } - ctx.Data["attachments"] = attachs - - var resourcePools modelarts.ResourcePool - if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["resource_pools"] = resourcePools.Info - - var engines modelarts.Engine - if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["engines"] = engines.Info - - var versionInfos modelarts.VersionInfo - if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["engine_versions"] = versionInfos.Version - - prepareCloudbrainTwoTrainSpecs(ctx) - - configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) - if err != nil { - ctx.ServerError("getConfigList failed:", err) - return err - } - var Parameters modelarts.Parameters - if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["params"] = Parameters.Parameter - ctx.Data["config_list"] = configList.ParaConfigs - ctx.Data["bootFile"] = form.BootFile - ctx.Data["uuid"] = form.Attachment - _, datasetNames, err := models.GetDatasetInfo(form.Attachment) - if err != nil { - log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) - return nil - } - ctx.Data["dataset_name"] = datasetNames - ctx.Data["branch_name"] = form.BranchName - ctx.Data["datasetType"] = models.TypeCloudBrainTwo - waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") - ctx.Data["WaitCount"] = waitCount - setMultiNodeIfConfigureMatch(ctx) - - return nil -} - func TrainJobNewVersion(ctx *context.Context) { err := trainJobNewVersionDataPrepare(ctx) @@ -1024,93 +946,6 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { return nil } -func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error { - ctx.Data["PageIsCloudBrain"] = true - var jobID = ctx.Params(":jobid") - // var versionName = ctx.Params(":version-name") - var versionName = ctx.Query("version_name") - - task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) - if err != nil { - log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) - return err - } - - t := time.Now() - var jobName = jobNamePrefixValid(cutString(ctx.User.Name, 5)) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] - ctx.Data["job_name"] = task.JobName - - attachs, err := models.GetModelArtsTrainAttachments(ctx.User.ID) - if err != nil { - ctx.ServerError("GetAllUserAttachments failed:", err) - return err - } - ctx.Data["attachments"] = attachs - - var resourcePools modelarts.ResourcePool - if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["resource_pools"] = resourcePools.Info - - var engines modelarts.Engine - if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["engines"] = engines.Info - - var versionInfos modelarts.VersionInfo - if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["engine_versions"] = versionInfos.Version - - prepareCloudbrainTwoTrainSpecs(ctx) - - var Parameters modelarts.Parameters - if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["params"] = Parameters.Parameter - - outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath - ctx.Data["train_url"] = outputObsPath - - branches, _, err := ctx.Repo.GitRepo.GetBranches(0, 0) - if err != nil { - ctx.ServerError("GetBranches error:", err) - return err - } - ctx.Data["branches"] = branches - ctx.Data["description"] = form.Description - ctx.Data["dataset_name"] = task.DatasetName - ctx.Data["work_server_number"] = form.WorkServerNumber - ctx.Data["flavor_name"] = form.FlavorName - ctx.Data["engine_name"] = form.EngineName - ctx.Data["flavor_code"] = task.FlavorCode - ctx.Data["engine_id"] = task.EngineID - ctx.Data["version_name"] = form.VersionName - - ctx.Data["bootFile"] = form.BootFile - ctx.Data["uuid"] = form.Attachment - ctx.Data["branch_name"] = form.BranchName - configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) - if err != nil { - ctx.ServerError("getConfigList failed:", err) - return err - } - ctx.Data["config_list"] = configList.ParaConfigs - ctx.Data["datasetType"] = models.TypeCloudBrainTwo - waitCount := cloudbrain.GetWaitingCloudbrainCount(models.TypeCloudBrainTwo, "") - ctx.Data["WaitCount"] = waitCount - - return nil -} - func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { ctx.Data["PageIsTrainJob"] = true VersionOutputPath := modelarts.GetOutputPathByCount(modelarts.TotalVersionCount) @@ -1138,7 +973,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber) if errStr != "" { - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form) return } @@ -1146,13 +981,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) if err != nil { log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("system error", tplModelArtsTrainJobNew, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsTrainJobNew, &form) return } @@ -1160,7 +995,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } @@ -1168,7 +1003,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) if err != nil || !bootFileExist { log.Error("Get bootfile error:", err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsTrainJobNew, &form) return } @@ -1179,7 +1014,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) Cluster: models.OpenICluster, AiCenterCode: models.AICenterOfCloudBrainTwo}) if err != nil || spec == nil { - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobNew, &form) return } @@ -1188,14 +1023,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) if err == nil { if len(tasks) != 0 { log.Error("the job name did already exist", ctx.Data["MsgID"]) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("the job name did already exist", tplModelArtsTrainJobNew, &form) return } } else { if !models.IsErrJobNotExist(err) { log.Error("system error, %v", err, ctx.Data["MsgID"]) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("system error", tplModelArtsTrainJobNew, &form) return } @@ -1212,7 +1047,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) if err := downloadCode(repo, codeLocalPath, branchName); err != nil { log.Error("downloadCode failed, server timed out: %s (%v)", repo.FullName(), err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobNew, &form) return } @@ -1220,14 +1055,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) //todo: upload code (send to file_server todo this work?) if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) return } if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) return } @@ -1236,7 +1071,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { // if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobNew, &form) return } @@ -1248,7 +1083,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) err := json.Unmarshal([]byte(params), ¶meters) if err != nil { log.Error("Failed to Unmarshal params: %s (%v)", params, err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobNew, &form) return } @@ -1274,7 +1109,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid) if err != nil { log.Error("Failed to getDatasUrlListByUUIDS: %v", err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("Failed to getDatasUrlListByUUIDS:"+err.Error(), tplModelArtsTrainJobNew, &form) return } @@ -1282,7 +1117,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) jsondatas, err := json.Marshal(datasUrlList) if err != nil { log.Error("Failed to Marshal: %v", err) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr("json error:"+err.Error(), tplModelArtsTrainJobNew, &form) return } @@ -1390,7 +1225,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) err = modelarts.GenerateTrainJob(ctx, req) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) - trainJobErrorNewDataPrepare(ctx, form) + trainJobNewDataPrepare(ctx) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) return } @@ -1475,7 +1310,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber) if errStr != "" { - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form) return } @@ -1483,13 +1318,13 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) if err != nil { log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("system error", tplModelArtsTrainJobVersionNew, &form) return } else { if count >= 1 { log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsTrainJobVersionNew, &form) return } @@ -1526,14 +1361,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ canNewJob, _ := canUserCreateTrainJobVersion(ctx, latestTask.UserID) if !canNewJob { - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("user cann't new trainjob", tplModelArtsTrainJobVersionNew, &form) return } if err := paramCheckCreateTrainJob(form); err != nil { log.Error("paramCheckCreateTrainJob failed:(%v)", err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) return } @@ -1541,7 +1376,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) if err != nil || !bootFileExist { log.Error("Get bootfile error:", err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_bootfile_err"), tplModelArtsTrainJobVersionNew, &form) return } @@ -1552,7 +1387,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ Cluster: models.OpenICluster, AiCenterCode: models.AICenterOfCloudBrainTwo}) if err != nil || spec == nil { - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobVersionNew, &form) return } @@ -1567,7 +1402,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ commitID, _ := gitRepo.GetBranchCommitID(branchName) if err := downloadCode(repo, codeLocalPath, branchName); err != nil { log.Error("Failed git clone repo to local(!: %s (%v)", repo.FullName(), err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobVersionNew, &form) return } @@ -1575,14 +1410,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ //todo: upload code (send to file_server todo this work?) if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) return } if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) return } @@ -1592,7 +1427,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ // if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tplModelArtsTrainJobVersionNew, &form) return } @@ -1606,7 +1441,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ err := json.Unmarshal([]byte(params), ¶meters) if err != nil { log.Error("Failed to Unmarshal params: %s (%v)", params, err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) return } @@ -1632,7 +1467,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid) if err != nil { log.Error("Failed to getDatasUrlListByUUIDS: %v", err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("Failed to getDatasUrlListByUUIDS:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) return } @@ -1640,7 +1475,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ jsondatas, err := json.Marshal(datasUrlList) if err != nil { log.Error("Failed to Marshal: %v", err) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr("json error:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) return } @@ -1749,7 +1584,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) if err != nil { log.Error("GenerateTrainJob failed:%v", err.Error()) - versionErrorDataPrepare(ctx, form) + trainJobNewVersionDataPrepare(ctx) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) return } diff --git a/routers/routes/routes.go b/routers/routes/routes.go index e97d4a0ca5..3b0ec71a0f 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1138,7 +1138,7 @@ func RegisterRoutes(m *macaron.Macaron) { //m.Get("/get_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo.GetLogFromModelDir) //m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.CloudBrainTrainJobVersionNew) - m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateGrampusTrainJobForm{}), repo.CloudBrainTrainJobVersionCreate) + m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainTrainJobVersionCreate) }) m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.CloudBrainTrainJobNew) m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateCloudBrainForm{}), repo.CloudBrainCreate) -- 2.34.1 From 2a3b7b222dd2a56e12935c198956a144df1617ca Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Mon, 19 Sep 2022 12:24:36 +0800 Subject: [PATCH 45/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- templates/repo/cloudbrain/trainjob/versionnew.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/repo/cloudbrain/trainjob/versionnew.tmpl b/templates/repo/cloudbrain/trainjob/versionnew.tmpl index a622db8fbf..6a625c35b7 100644 --- a/templates/repo/cloudbrain/trainjob/versionnew.tmpl +++ b/templates/repo/cloudbrain/trainjob/versionnew.tmpl @@ -70,7 +70,7 @@
{{template "repo/header" .}}
- + {{template "base/alert" .}}

{{.i18n.Tr "repo.modelarts.train_job.new"}} -- 2.34.1 From fc86884ed72e076695c50eed77eca54089a1cfce Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Mon, 19 Sep 2022 15:04:00 +0800 Subject: [PATCH 46/57] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routers/repo/grampus.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index fc4cca5160..023ba9a6ad 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -146,13 +146,15 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err ctx.Data["branch_name"] = ctx.Cloudbrain.BranchName ctx.Data["engine_name"] = ctx.Cloudbrain.EngineName ctx.Data["WorkServerNumber"] = ctx.Cloudbrain.WorkServerNumber - ctx.Data["image"] = ctx.Cloudbrain.Image + if ctx.Cloudbrain.Image != "" { + ctx.Data["image"] = ctx.Cloudbrain.Image + } else { + ctx.Data["image"] = ctx.Cloudbrain.EngineName + } ctx.Data["dataset_name"] = ctx.Cloudbrain.DatasetName ctx.Data["model_name"] = ctx.Cloudbrain.ModelName ctx.Data["model_version"] = ctx.Cloudbrain.ModelVersion - ctx.Data["dataset_name"] = ctx.Cloudbrain.DatasetName - ctx.Data["model_name"] = ctx.Cloudbrain.ModelName ctx.Data["ckpt_name"] = ctx.Cloudbrain.CkptName ctx.Data["label_names"] = ctx.Cloudbrain.LabelName ctx.Data["PreTrainModelUrl"] = ctx.Cloudbrain.PreTrainModelUrl -- 2.34.1 From c6f8397751c1999501a155b9455ff274a7987792 Mon Sep 17 00:00:00 2001 From: zhoupzh Date: Mon, 19 Sep 2022 15:07:16 +0800 Subject: [PATCH 47/57] fix issue --- .../repo/modelarts/trainjob/version_new.tmpl | 354 +++++++++--------- web_src/js/standalone/cloudbrainNew.js | 150 ++++++++ 2 files changed, 327 insertions(+), 177 deletions(-) create mode 100644 web_src/js/standalone/cloudbrainNew.js diff --git a/templates/repo/modelarts/trainjob/version_new.tmpl b/templates/repo/modelarts/trainjob/version_new.tmpl index 8b4e510805..26334864cc 100644 --- a/templates/repo/modelarts/trainjob/version_new.tmpl +++ b/templates/repo/modelarts/trainjob/version_new.tmpl @@ -202,7 +202,7 @@ {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} -
+
@@ -276,7 +276,7 @@

{{template "base/footer" .}} - + + + - + + - + - + - + - - diff --git a/templates/repo/grampus/trainjob/gpu/new.tmpl b/templates/repo/grampus/trainjob/gpu/new.tmpl index 5a925d3c36..5bbe0b536e 100755 --- a/templates/repo/grampus/trainjob/gpu/new.tmpl +++ b/templates/repo/grampus/trainjob/gpu/new.tmpl @@ -69,7 +69,7 @@

- + {{.CsrfTokenHtml}} diff --git a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl b/templates/repo/grampus/trainjob/gpu/versionnew.tmpl deleted file mode 100644 index 8ac4b583cd..0000000000 --- a/templates/repo/grampus/trainjob/gpu/versionnew.tmpl +++ /dev/null @@ -1,220 +0,0 @@ -{{template "base/head" .}} - -{{template "custom/global_mask" .}} -
- {{template "repo/header" .}} -
- - {{template "base/alert" .}} -

- {{.i18n.Tr "repo.modelarts.train_job.new"}} -

-
- - - {{.CsrfTokenHtml}} - - -

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

- -
- - -
-
- - {{template "custom/task_wait_count" .}} -
- - {{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/tmp/code" "/tmp/dataset" "/tmp/output" | Safe}} -
-
-
- - - -
- {{.i18n.Tr "repo.cloudbrain_jobname_err"}} -
- - {{if .description}} - - {{else}} - - {{end}} -
-
- -

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:

- - -
- - -
- {{template "custom/select_model" .}} -
-
- -
- - {{if .boot_file}} - - {{else}} - - {{end}} - - - - {{.i18n.Tr "cloudbrain.view_sample"}} -
- - -
- -
- -
- - {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} - -
- -
-
- -
- - -
- -
- - - {{.i18n.Tr "repo.cloudbrain.cancel"}} -
- - - - -
-
-
-{{template "base/footer" .}} - - - diff --git a/templates/repo/grampus/trainjob/npu/new.tmpl b/templates/repo/grampus/trainjob/npu/new.tmpl index 2f732cc643..ff58e4f4b2 100755 --- a/templates/repo/grampus/trainjob/npu/new.tmpl +++ b/templates/repo/grampus/trainjob/npu/new.tmpl @@ -64,7 +64,7 @@

-
+ {{.CsrfTokenHtml}} diff --git a/templates/repo/grampus/trainjob/npu/versionnew.tmpl b/templates/repo/grampus/trainjob/npu/versionnew.tmpl deleted file mode 100644 index ae1d368510..0000000000 --- a/templates/repo/grampus/trainjob/npu/versionnew.tmpl +++ /dev/null @@ -1,246 +0,0 @@ -{{template "base/head" .}} - -{{template "custom/global_mask" .}} -
- {{template "repo/header" .}} -
- - {{template "base/alert" .}} -

- {{.i18n.Tr "repo.modelarts.train_job.new"}} -

-
- - - {{.CsrfTokenHtml}} - - - -

{{.i18n.Tr "repo.modelarts.train_job.basic_info"}}:

- -
- - -
-
- - {{template "custom/task_wait_count" .}} -
- - {{.i18n.Tr "cloudbrain.new_train_gpu_tooltips" "/cache/code" "/cache/dataset" "/cache/output" | Safe}} -
-
-
- - - {{.i18n.Tr "repo.cloudbrain_jobname_err"}} -
- -
- - {{if .description}} - - {{else}} - - {{end}} -
-
- -

{{.i18n.Tr "repo.modelarts.train_job.parameter_setting"}}:

- -
- - -
- {{template "custom/select_model" .}} -
- - -
-
- - {{if .boot_file}} - - {{else}} - - {{end}} - - - - {{.i18n.Tr "cloudbrain.view_sample"}} -
- -
- -
- -
- - {{.i18n.Tr "repo.modelarts.train_job.add_run_parameter"}} - -
- -
-
- -
- - -
-
- - -
- - -
- -
- -
-
- -
- - - {{.i18n.Tr "repo.cloudbrain.cancel"}} -
- - - - -
-
-
-{{template "base/footer" .}} - - - diff --git a/web_src/js/features/cloudbrainShow.js b/web_src/js/features/cloudbrainShow.js index 0b82b807f4..dc265b5e4b 100644 --- a/web_src/js/features/cloudbrainShow.js +++ b/web_src/js/features/cloudbrainShow.js @@ -472,7 +472,7 @@ export default async function initCloudrainSow() { let faildTrainUrl = $('input[name="pre_train_model_url"]').val(); let faildCkptName = $('input[name="ckpt_name"]').val(); // 新建错误的表单返回初始化 - if (faildModelName) { + if (faildModelName && nameList.includes(faildModelName)) { $("#select_model").dropdown("set text", faildModelName); $("#select_model").dropdown("set value", faildModelName); $("#select_model_version").dropdown("set text", faildModelVersion); @@ -486,7 +486,7 @@ export default async function initCloudrainSow() { $("#select_model").dropdown({ onChange: function (value, text, $selectedItem) { $("#model_name_version").empty(); - if(value){ + if (value) { let html = ""; nameMap[value].forEach((element) => { let { TrainTaskInfo } = element; @@ -508,18 +508,18 @@ export default async function initCloudrainSow() { initVersionText, $("#model_name_version div.item:first-child") ); - }else{ - $("#select_model_version").dropdown("set text", ''); - $("#select_model_version").dropdown("set value", ''); - $("#select_model_checkpoint").dropdown("set text", ''); - $("#select_model_checkpoint").dropdown("set value", ''); + } else { + $("#select_model_version").dropdown("set text", ""); + $("#select_model_version").dropdown("set value", ""); + $("#select_model_checkpoint").dropdown("set text", ""); + $("#select_model_checkpoint").dropdown("set value", ""); } }, }); $("#select_model_version").dropdown({ onChange: function (value, text, $selectedItem) { - if(!value) return + if (!value) return; const dataID = $selectedItem && $selectedItem[0].getAttribute("data-id"); $("input#ai_model_version").val(text); -- 2.34.1