From 6366003ab1cd3443b42b4c3826f2b5783d7f082d Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 19 Aug 2022 10:23:45 +0800 Subject: [PATCH 01/34] #2701 find available specs --- models/cloudbrain.go | 4 ++ models/resource_specification.go | 60 +++++++++++++++++++ routers/repo/cloudbrain.go | 8 +++ .../resource/resource_specification.go | 25 ++++++++ 4 files changed, 97 insertions(+) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 61b7abd474..67d6f42440 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -119,6 +119,10 @@ const ( //AI center AICenterOfCloudBrainOne = "OpenIOne" AICenterOfCloudBrainTwo = "OpenITwo" + + //ComputeResource + GPU = "GPU" + NPU = "NPU" ) type Cloudbrain struct { diff --git a/models/resource_specification.go b/models/resource_specification.go index dca6647ab9..60e59b253c 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -141,6 +141,37 @@ func (r ResourceSpecAndQueue) ConvertToRes() *ResourceSpecAndQueueRes { } } +type FindSpecsOptions struct { + JobType JobType + ComputeResource string + Cluster string + AiCenterCode string +} + +type Specification struct { + ID int64 + SourceSpecId string + AccCardsNum int + AccCardType string + CpuCores int + MemGiB float32 + GPUMemGiB float32 + ShareMemGiB float32 + ComputeResource string + UnitPrice int + QueueId int64 + QueueCode string + Cluster string + AiCenterCode string + AiCenterName string + IsExclusive bool + ExclusiveOrg string +} + +func (Specification) TableName() string { + return "resource_specification" +} + func InsertResourceSpecification(r ResourceSpecification) (int64, error) { return x.Insert(&r) } @@ -283,3 +314,32 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS return sess.Commit() } + +func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { + var cond = builder.NewCond() + if opts.JobType != "" { + cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) + } + if opts.ComputeResource != "" { + cond = cond.And(builder.Eq{"resource_queue.compute_resource": opts.ComputeResource}) + } + if opts.ComputeResource != "" { + cond = cond.And(builder.Eq{"resource_queue.cluster": opts.Cluster}) + } + if opts.AiCenterCode != "" { + cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode}) + } + cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) + + r := make([]Specification, 0) + err := x.Where(cond). + Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). + Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). + Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id"). + OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc"). + Unscoped().Find(&r) + if err != nil { + return nil, err + } + return r, nil +} diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 76bf9b0764..97ba9bde43 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -2,6 +2,7 @@ package repo import ( "bufio" + "code.gitea.io/gitea/services/cloudbrain/resource" "encoding/json" "errors" "fmt" @@ -166,6 +167,13 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { if cloudbrain.InferenceResourceSpecs != nil { ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec } + specs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobTypeDebug, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne, + }) + ctx.Data["Specs"] = specs if cloudbrain.SpecialPools != nil { var debugGpuTypes []*models.GpuInfo diff --git a/services/cloudbrain/resource/resource_specification.go b/services/cloudbrain/resource/resource_specification.go index 680b989336..31c8b3b25b 100644 --- a/services/cloudbrain/resource/resource_specification.go +++ b/services/cloudbrain/resource/resource_specification.go @@ -184,3 +184,28 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod Comment: comment, }) } + +func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Specification, error) { + r, err := models.FindAvailableSpecs(opts) + if err != nil { + log.Error("FindAvailableSpecs error.%v", err) + return nil, err + } + specs := make([]models.Specification, 0, len(r)) + //filter exclusive spec + for i := 0; i < len(r); i++ { + spec := r[i] + if !spec.IsExclusive { + specs = append(specs, spec) + continue + } + orgs := strings.Split(spec.ExclusiveOrg, ";") + for _, org := range orgs { + isMember, _ := models.IsOrganizationMemberByOrgName(org, userId) + if isMember { + specs = append(specs, spec) + } + } + } + return specs, err +} -- 2.34.1 From cbd18153ba7b394f5c3dccbefe3a6b02a65cb5d8 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 19 Aug 2022 11:29:16 +0800 Subject: [PATCH 02/34] #2701 update --- routers/repo/cloudbrain.go | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 97ba9bde43..6423960aba 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -167,13 +167,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { if cloudbrain.InferenceResourceSpecs != nil { ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec } - specs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ - JobType: models.JobTypeDebug, - ComputeResource: models.GPU, - Cluster: models.OpenICluster, - AiCenterCode: models.AICenterOfCloudBrainOne, - }) - ctx.Data["Specs"] = specs + + prepareCloudbrainOneSpecs(ctx) if cloudbrain.SpecialPools != nil { var debugGpuTypes []*models.GpuInfo @@ -226,6 +221,34 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { return nil } +func prepareCloudbrainOneSpecs(ctx *context.Context) { + debugSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobTypeDebug, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne, + }) + ctx.Data["debug_specs"] = debugSpecs + b, _ := json.Marshal(debugSpecs) + log.Info("%s", string(b)) + + trainSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobTypeTrain, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne, + }) + ctx.Data["train_specs"] = trainSpecs + + inferenceSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobTypeInference, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne, + }) + ctx.Data["inference_specs"] = inferenceSpecs +} + func CloudBrainNew(ctx *context.Context) { err := cloudBrainNewDataPrepare(ctx) if err != nil { -- 2.34.1 From 170cbacbf48e2b28b424060eba4ddb4bdc7d7de6 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 19 Aug 2022 18:02:51 +0800 Subject: [PATCH 03/34] #2701 update --- models/resource_specification.go | 8 ++- modules/auth/cloudbrain.go | 2 + modules/cloudbrain/cloudbrain.go | 52 +++---------------- routers/repo/cloudbrain.go | 48 ++++++++++++++++- .../resource/resource_specification.go | 25 ++++++++- 5 files changed, 83 insertions(+), 52 deletions(-) diff --git a/models/resource_specification.go b/models/resource_specification.go index 60e59b253c..8ef95a8da8 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -146,6 +146,7 @@ type FindSpecsOptions struct { ComputeResource string Cluster string AiCenterCode string + SpecId int64 } type Specification struct { @@ -315,7 +316,7 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS return sess.Commit() } -func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { +func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) { var cond = builder.NewCond() if opts.JobType != "" { cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) @@ -329,9 +330,12 @@ func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { if opts.AiCenterCode != "" { cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode}) } + if opts.SpecId > 0 { + cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId}) + } cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) - r := make([]Specification, 0) + r := make([]*Specification, 0) err := x.Where(cond). Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). diff --git a/modules/auth/cloudbrain.go b/modules/auth/cloudbrain.go index 39685990d1..5bd294f2a7 100755 --- a/modules/auth/cloudbrain.go +++ b/modules/auth/cloudbrain.go @@ -24,6 +24,7 @@ type CreateCloudBrainForm struct { Params string `form:"run_para_list"` BranchName string `form:"branch_name"` DatasetName string `form:"dataset_name"` + SpecId int64 `form:"spec_id"` } type CommitImageCloudBrainForm struct { @@ -72,6 +73,7 @@ type CreateCloudBrainInferencForm struct { CkptName string `form:"ckpt_name" binding:"Required"` LabelName string `form:"label_names" binding:"Required"` DatasetName string `form:"dataset_name"` + SpecId int64 `form:"spec_id"` } func (f *CreateCloudBrainForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 1872375da6..03b73e5594 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -79,6 +79,7 @@ type GenerateCloudBrainTaskReq struct { ModelVersion string CkptName string LabelName string + Spec *models.Specification } func GetCloudbrainDebugCommand() string { @@ -227,50 +228,9 @@ func AdminOrImageCreaterRight(ctx *context.Context) { } func GenerateTask(req GenerateCloudBrainTaskReq) error { - var resourceSpec *models.ResourceSpec var versionCount int if req.JobType == string(models.JobTypeTrain) { versionCount = 1 - if TrainResourceSpecs == nil { - json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) - } - for _, spec := range TrainResourceSpecs.ResourceSpec { - if req.ResourceSpecId == spec.Id { - resourceSpec = spec - break - } - } - } else if req.JobType == string(models.JobTypeInference) { - if InferenceResourceSpecs == nil { - json.Unmarshal([]byte(setting.InferenceResourceSpecs), &InferenceResourceSpecs) - } - for _, spec := range InferenceResourceSpecs.ResourceSpec { - if req.ResourceSpecId == spec.Id { - resourceSpec = spec - break - } - } - - } else { - if ResourceSpecs == nil { - json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) - } - for _, spec := range ResourceSpecs.ResourceSpec { - if req.ResourceSpecId == spec.Id { - resourceSpec = spec - break - } - } - - } - //如果没有匹配到spec信息,尝试从专属资源池获取 - if resourceSpec == nil && SpecialPools != nil { - resourceSpec = geMatchResourceSpec(req.JobType, req.GpuQueue, req.ResourceSpecId) - } - - if resourceSpec == nil { - log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) - return errors.New("no such resourceSpec") } volumes := []models.Volume{ @@ -342,7 +302,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { jobResult, err := CreateJob(req.JobName, models.CreateJobParams{ JobName: req.JobName, RetryCount: 1, - GpuType: req.GpuQueue, + GpuType: req.Spec.QueueCode, Image: req.Image, TaskRoles: []models.TaskRole{ { @@ -350,10 +310,10 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { TaskNumber: 1, MinSucceededTaskCount: 1, MinFailedTaskCount: 1, - CPUNumber: resourceSpec.CpuNum, - GPUNumber: resourceSpec.GpuNum, - MemoryMB: resourceSpec.MemMiB, - ShmMB: resourceSpec.ShareMemMiB, + CPUNumber: req.Spec.CpuCores, + GPUNumber: req.Spec.AccCardsNum, + MemoryMB: int(req.Spec.MemGiB * 1024), + ShmMB: int(req.Spec.ShareMemGiB * 1024), Command: req.Command, NeedIBDevice: false, IsMainRole: false, diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 6423960aba..ec7ab858c4 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -368,6 +368,17 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobType(jobType), + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("Illegal resource specification", tpl, &form) + return + } + req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -393,6 +404,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { BenchmarkChildTypeID: 0, ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + Spec: spec, } err = cloudbrain.GenerateTask(req) @@ -515,7 +527,16 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } - + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeInference, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("Illegal resource specification", tpl, &form) + return + } req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -544,6 +565,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra CkptName: form.CkptName, TrainUrl: form.TrainUrl, LabelName: labelName, + Spec: spec, } err = cloudbrain.GenerateTask(req) @@ -2453,6 +2475,17 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo return } + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeBenchmark, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("Illegal resource specification", tplCloudBrainBenchmarkNew, &form) + return + } + req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -2478,6 +2511,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo BenchmarkChildTypeID: benchmarkChildTypeID, ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + Spec: spec, } err = cloudbrain.GenerateTask(req) @@ -2581,7 +2615,16 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } - + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeBenchmark, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("Illegal resource specification", tpl, &form) + return + } req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -2607,6 +2650,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) BenchmarkChildTypeID: benchmarkChildTypeID, ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + Spec: spec, } err = cloudbrain.GenerateTask(req) diff --git a/services/cloudbrain/resource/resource_specification.go b/services/cloudbrain/resource/resource_specification.go index 31c8b3b25b..db104a9ac9 100644 --- a/services/cloudbrain/resource/resource_specification.go +++ b/services/cloudbrain/resource/resource_specification.go @@ -185,18 +185,23 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod }) } -func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Specification, error) { +func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) { r, err := models.FindAvailableSpecs(opts) if err != nil { log.Error("FindAvailableSpecs error.%v", err) return nil, err } - specs := make([]models.Specification, 0, len(r)) + specs := make([]*models.Specification, 0, len(r)) + specMap := make(map[int64]string, 0) //filter exclusive spec for i := 0; i < len(r); i++ { spec := r[i] + if _, has := specMap[spec.ID]; has { + continue + } if !spec.IsExclusive { specs = append(specs, spec) + specMap[spec.ID] = "" continue } orgs := strings.Split(spec.ExclusiveOrg, ";") @@ -204,8 +209,24 @@ func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Sp isMember, _ := models.IsOrganizationMemberByOrgName(org, userId) if isMember { specs = append(specs, spec) + specMap[spec.ID] = "" } } } return specs, err } + +func GetAndCheckSpec(userId int64, specId int64, opts models.FindSpecsOptions) (*models.Specification, error) { + if specId == 0 { + return nil, nil + } + opts.SpecId = specId + r, err := FindAvailableSpecs(userId, opts) + if err != nil { + return nil, err + } + if r == nil || len(r) == 0 { + return nil, nil + } + return r[0], nil +} -- 2.34.1 From 3bb312dd4c0b87a22dedd61130192356c18155bf Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Mon, 22 Aug 2022 11:27:56 +0800 Subject: [PATCH 04/34] #2701 update --- models/cloudbrain.go | 16 +- models/cloudbrain_spec.go | 85 ++++++++ models/models.go | 1 + modules/cloudbrain/cloudbrain.go | 2 + routers/repo/cloudbrain.go | 201 ++++++++---------- .../resource/resource_specification.go | 40 ++++ 6 files changed, 226 insertions(+), 119 deletions(-) create mode 100644 models/cloudbrain_spec.go diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 67d6f42440..7e986be52a 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -193,6 +193,7 @@ type Cloudbrain struct { BenchmarkTypeRankLink string `xorm:"-"` StartTime timeutil.TimeStamp EndTime timeutil.TimeStamp + Spec *Specification `xorm:"-"` } func (task *Cloudbrain) ComputeAndSetDuration() { @@ -1656,11 +1657,24 @@ func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, e } func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) { + session := x.NewSession() + defer session.Close() + + err = session.Begin() cloudbrain.TrainJobDuration = DURATION_STR_ZERO - if _, err = x.NoAutoTime().Insert(cloudbrain); err != nil { + if _, err = session.NoAutoTime().InsertOne(cloudbrain); err != nil { + session.Rollback() return err } + if cloudbrain.Spec != nil { + if _, err = session.Insert(NewCloudBrainSpec(cloudbrain.ID, *cloudbrain.Spec)); err != nil { + session.Rollback() + return err + } + } + session.Commit() + go IncreaseDatasetUseCount(cloudbrain.Uuid) return nil } diff --git a/models/cloudbrain_spec.go b/models/cloudbrain_spec.go new file mode 100644 index 0000000000..32f1840376 --- /dev/null +++ b/models/cloudbrain_spec.go @@ -0,0 +1,85 @@ +package models + +import ( + "code.gitea.io/gitea/modules/timeutil" +) + +type CloudbrainSpec struct { + CloudbrainID int64 `xorm:"pk"` + SpecId int64 `xorm:"index"` + SourceSpecId string + AccCardsNum int + AccCardType string + CpuCores int + MemGiB float32 + GPUMemGiB float32 + ShareMemGiB float32 + ComputeResource string + UnitPrice int + QueueId int64 + QueueCode string + Cluster string + AiCenterCode string + AiCenterName string + IsExclusive bool + ExclusiveOrg string + CreatedTime timeutil.TimeStamp `xorm:"created"` + UpdatedTime timeutil.TimeStamp `xorm:"updated"` +} + +func (s CloudbrainSpec) ConvertToSpecification() *Specification { + return &Specification{ + ID: s.SpecId, + SourceSpecId: s.SourceSpecId, + AccCardsNum: s.AccCardsNum, + AccCardType: s.AccCardType, + CpuCores: s.CpuCores, + MemGiB: s.MemGiB, + GPUMemGiB: s.GPUMemGiB, + ShareMemGiB: s.ShareMemGiB, + ComputeResource: s.ComputeResource, + UnitPrice: s.UnitPrice, + QueueId: s.QueueId, + QueueCode: s.QueueCode, + Cluster: s.Cluster, + AiCenterCode: s.AiCenterCode, + AiCenterName: s.AiCenterName, + IsExclusive: s.IsExclusive, + ExclusiveOrg: s.ExclusiveOrg, + } +} + +func NewCloudBrainSpec(cloudbrainId int64, s Specification) CloudbrainSpec { + return CloudbrainSpec{ + CloudbrainID: cloudbrainId, + SpecId: s.ID, + SourceSpecId: s.SourceSpecId, + AccCardsNum: s.AccCardsNum, + AccCardType: s.AccCardType, + CpuCores: s.CpuCores, + MemGiB: s.MemGiB, + GPUMemGiB: s.GPUMemGiB, + ShareMemGiB: s.ShareMemGiB, + ComputeResource: s.ComputeResource, + UnitPrice: s.UnitPrice, + QueueId: s.QueueId, + QueueCode: s.QueueCode, + Cluster: s.Cluster, + AiCenterCode: s.AiCenterCode, + AiCenterName: s.AiCenterName, + IsExclusive: s.IsExclusive, + ExclusiveOrg: s.ExclusiveOrg, + } +} + +func InsertCloudbrainSpec(c CloudbrainSpec) (int64, error) { + return x.Insert(&c) +} + +func GetCloudbrainSpecByID(cloudbrainId int64) (*CloudbrainSpec, error) { + r := &CloudbrainSpec{} + if _, err := x.Where("cloudbrain_id = ?", cloudbrainId).Get(r); err != nil { + return nil, err + } + return r, nil +} diff --git a/models/models.go b/models/models.go index 906acc58b1..192d2c1ab1 100755 --- a/models/models.go +++ b/models/models.go @@ -150,6 +150,7 @@ func init() { new(ResourceScene), new(ResourceSceneSpec), new(AdminOperateLog), + new(CloudbrainSpec), ) tablesStatistic = append(tablesStatistic, diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 03b73e5594..f18b61e97f 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -365,6 +365,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { CreatedUnix: createTime, UpdatedUnix: createTime, CommitID: req.CommitID, + Spec: req.Spec, }) if err != nil { @@ -376,6 +377,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { log.Error("GetCloudbrainByJobID failed: %v", err.Error()) return err } + stringId := strconv.FormatInt(task.ID, 10) if IsBenchmarkJob(req.JobType) { diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index ec7ab858c4..4a1df232a5 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -743,128 +743,13 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo ctx.NotFound(ctx.Req.URL.RequestURI(), nil) return } - hasSpec := false - if task.JobType == string(models.JobTypeTrain) { - if cloudbrain.TrainResourceSpecs == nil { - json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) - } - - for _, tmp := range cloudbrain.TrainResourceSpecs.ResourceSpec { - if tmp.Id == task.ResourceSpecId { - hasSpec = true - ctx.Data["GpuNum"] = tmp.GpuNum - ctx.Data["CpuNum"] = tmp.CpuNum - ctx.Data["MemMiB"] = tmp.MemMiB - ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB - break - } - } - - } else if task.JobType == string(models.JobTypeInference) { - if cloudbrain.InferenceResourceSpecs == nil { - json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) - } - for _, tmp := range cloudbrain.InferenceResourceSpecs.ResourceSpec { - if tmp.Id == task.ResourceSpecId { - hasSpec = true - ctx.Data["GpuNum"] = tmp.GpuNum - ctx.Data["CpuNum"] = tmp.CpuNum - ctx.Data["MemMiB"] = tmp.MemMiB - ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB - break - } - } - } else { - if cloudbrain.ResourceSpecs == nil { - json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) - } - for _, tmp := range cloudbrain.ResourceSpecs.ResourceSpec { - if tmp.Id == task.ResourceSpecId { - hasSpec = true - ctx.Data["GpuNum"] = tmp.GpuNum - ctx.Data["CpuNum"] = tmp.CpuNum - ctx.Data["MemMiB"] = tmp.MemMiB - ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB - break - - } - } - } - - if !hasSpec && cloudbrain.SpecialPools != nil { - - for _, specialPool := range cloudbrain.SpecialPools.Pools { - - if specialPool.ResourceSpec != nil { - - for _, spec := range specialPool.ResourceSpec { - if task.ResourceSpecId == spec.Id { - ctx.Data["GpuNum"] = spec.GpuNum - ctx.Data["CpuNum"] = spec.CpuNum - ctx.Data["MemMiB"] = spec.MemMiB - ctx.Data["ShareMemMiB"] = spec.ShareMemMiB - break - } - } - } - } + prepareSpec4Show(ctx, task) + if ctx.Written() { + return } if result != nil { jobRes, _ := models.ConvertToJobResultPayload(result.Payload) - jobRes.Resource.Memory = strings.ReplaceAll(jobRes.Resource.Memory, "Mi", "MB") - spec := "GPU数:" + strconv.Itoa(jobRes.Resource.NvidiaComGpu) + ",CPU数:" + strconv.Itoa(jobRes.Resource.CPU) + ",内存(MB):" + jobRes.Resource.Memory - ctx.Data["resource_spec"] = spec - if task.JobType == string(models.JobTypeTrain) { - if trainGpuInfos == nil { - json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) - } - for _, resourceType := range trainGpuInfos.GpuInfo { - if resourceType.Queue == jobRes.Config.GpuType { - ctx.Data["resource_type"] = resourceType.Value - } - } - - } else if task.JobType == string(models.JobTypeInference) { - if inferenceGpuInfos == nil { - json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos) - } - for _, resourceType := range inferenceGpuInfos.GpuInfo { - if resourceType.Queue == jobRes.Config.GpuType { - ctx.Data["resource_type"] = resourceType.Value - } - } - } else if cloudbrain.IsBenchmarkJob(task.JobType) { - if benchmarkGpuInfos == nil { - json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) - } - - for _, resourceType := range benchmarkGpuInfos.GpuInfo { - if resourceType.Queue == jobRes.Config.GpuType { - ctx.Data["resource_type"] = resourceType.Value - } - } - - } else { - if gpuInfos == nil { - json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) - } - for _, resourceType := range gpuInfos.GpuInfo { - if resourceType.Queue == jobRes.Config.GpuType { - ctx.Data["resource_type"] = resourceType.Value - } - } - } - - if cloudbrain.SpecialPools != nil { - for _, specialPool := range cloudbrain.SpecialPools.Pools { - for _, resourceType := range specialPool.Pool { - if resourceType.Queue == jobRes.Config.GpuType { - ctx.Data["resource_type"] = resourceType.Value - } - } - } - } taskRoles := jobRes.TaskRoles taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) ctx.Data["taskRes"] = taskRes @@ -988,6 +873,86 @@ func CloudBrainDebug(ctx *context.Context) { ctx.Redirect(debugUrl) } +func prepareSpec4Show(ctx *context.Context, task *models.Cloudbrain) { + s, err := resource.GetCloudbrainSpec(task.ID) + if err != nil { + log.Info("error:" + err.Error()) + ctx.NotFound(ctx.Req.URL.RequestURI(), nil) + return + } + + ctx.Data["Spec"] = s +} + +func oldPrepareSpec4Show(ctx *context.Context, task *models.Cloudbrain) { + hasSpec := false + if task.JobType == string(models.JobTypeTrain) { + if cloudbrain.TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) + } + + for _, tmp := range cloudbrain.TrainResourceSpecs.ResourceSpec { + if tmp.Id == task.ResourceSpecId { + hasSpec = true + ctx.Data["GpuNum"] = tmp.GpuNum + ctx.Data["CpuNum"] = tmp.CpuNum + ctx.Data["MemMiB"] = tmp.MemMiB + ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB + break + } + } + + } else if task.JobType == string(models.JobTypeInference) { + if cloudbrain.InferenceResourceSpecs == nil { + json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) + } + for _, tmp := range cloudbrain.InferenceResourceSpecs.ResourceSpec { + if tmp.Id == task.ResourceSpecId { + hasSpec = true + ctx.Data["GpuNum"] = tmp.GpuNum + ctx.Data["CpuNum"] = tmp.CpuNum + ctx.Data["MemMiB"] = tmp.MemMiB + ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB + break + } + } + } else { + if cloudbrain.ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) + } + for _, tmp := range cloudbrain.ResourceSpecs.ResourceSpec { + if tmp.Id == task.ResourceSpecId { + hasSpec = true + ctx.Data["GpuNum"] = tmp.GpuNum + ctx.Data["CpuNum"] = tmp.CpuNum + ctx.Data["MemMiB"] = tmp.MemMiB + ctx.Data["ShareMemMiB"] = tmp.ShareMemMiB + break + + } + } + } + + if !hasSpec && cloudbrain.SpecialPools != nil { + + for _, specialPool := range cloudbrain.SpecialPools.Pools { + + if specialPool.ResourceSpec != nil { + + for _, spec := range specialPool.ResourceSpec { + if task.ResourceSpecId == spec.Id { + ctx.Data["GpuNum"] = spec.GpuNum + ctx.Data["CpuNum"] = spec.CpuNum + ctx.Data["MemMiB"] = spec.MemMiB + ctx.Data["ShareMemMiB"] = spec.ShareMemMiB + break + } + } + } + } + } +} + func CloudBrainCommitImageShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true ctx.Data["Type"] = ctx.Cloudbrain.Type diff --git a/services/cloudbrain/resource/resource_specification.go b/services/cloudbrain/resource/resource_specification.go index db104a9ac9..e27d88ef16 100644 --- a/services/cloudbrain/resource/resource_specification.go +++ b/services/cloudbrain/resource/resource_specification.go @@ -230,3 +230,43 @@ func GetAndCheckSpec(userId int64, specId int64, opts models.FindSpecsOptions) ( } return r[0], nil } + +func InsertCloudbrainSpec(cloudbrainId int64, s *models.Specification) error { + c := models.CloudbrainSpec{ + CloudbrainID: cloudbrainId, + SpecId: s.ID, + SourceSpecId: s.SourceSpecId, + AccCardsNum: s.AccCardsNum, + AccCardType: s.AccCardType, + CpuCores: s.CpuCores, + MemGiB: s.MemGiB, + GPUMemGiB: s.GPUMemGiB, + ShareMemGiB: s.ShareMemGiB, + ComputeResource: s.ComputeResource, + UnitPrice: s.UnitPrice, + QueueId: s.QueueId, + QueueCode: s.QueueCode, + Cluster: s.Cluster, + AiCenterCode: s.AiCenterCode, + AiCenterName: s.AiCenterName, + IsExclusive: s.IsExclusive, + ExclusiveOrg: s.ExclusiveOrg, + } + _, err := models.InsertCloudbrainSpec(c) + if err != nil { + log.Error("InsertCloudbrainSpec error.CloudbrainSpec=%v. err=%v", c, err) + return err + } + return nil +} + +func GetCloudbrainSpec(cloudbrainId int64) (*models.Specification, error) { + c, err := models.GetCloudbrainSpecByID(cloudbrainId) + if err != nil { + return nil, err + } + if c == nil { + return nil, nil + } + return c.ConvertToSpecification(), nil +} -- 2.34.1 From 2898c40fcc469e78bc3db0ee47320497b2edfb67 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 23 Aug 2022 16:39:45 +0800 Subject: [PATCH 05/34] #2701 update --- models/cloudbrain.go | 6 + models/cloudbrain_spec.go | 23 ++ models/resource_specification.go | 125 ++++++++- modules/auth/modelarts.go | 3 + modules/modelarts/modelarts.go | 31 +- routers/admin/resources.go | 36 +++ routers/private/internal.go | 2 + routers/repo/cloudbrain.go | 109 +------- routers/repo/modelarts.go | 124 ++++---- .../resource/resource_specification.go | 264 +++++++++++++++++- 10 files changed, 547 insertions(+), 176 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 7e986be52a..3c4a3ae98e 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -2313,3 +2313,9 @@ func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) { In("id", ids). Find(&cloudbrains) } + +func GetCloudbrainWithDeletedByIDs(ids []int64) ([]*Cloudbrain, error) { + cloudbrains := make([]*Cloudbrain, 0) + return cloudbrains, x. + In("id", ids).Unscoped().Find(&cloudbrains) +} diff --git a/models/cloudbrain_spec.go b/models/cloudbrain_spec.go index 32f1840376..c891c2fad0 100644 --- a/models/cloudbrain_spec.go +++ b/models/cloudbrain_spec.go @@ -83,3 +83,26 @@ func GetCloudbrainSpecByID(cloudbrainId int64) (*CloudbrainSpec, error) { } return r, nil } + +func FindNoSpecHistoricTask(page, pageSize int) ([]*Cloudbrain, error) { + r := make([]*Cloudbrain, 0) + err := x.Unscoped(). + Where(" 1=1 and not exists (select 1 from cloudbrain_spec where cloudbrain.id = cloudbrain_spec.cloudbrain_id)"). + Limit(pageSize, (page-1)*pageSize). + OrderBy("cloudbrain.id"). + Find(&r) + if err != nil { + return nil, err + } + return r, nil +} + +func CountNoSpecHistoricTask() (int64, error) { + n, err := x.Unscoped(). + Where(" 1=1 and not exists (select 1 from cloudbrain_spec where cloudbrain.id = cloudbrain_spec.cloudbrain_id)"). + Count(&Cloudbrain{}) + if err != nil { + return 0, err + } + return n, nil +} diff --git a/models/resource_specification.go b/models/resource_specification.go index 8ef95a8da8..b2eadf2a12 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -147,6 +147,21 @@ type FindSpecsOptions struct { Cluster string AiCenterCode string SpecId int64 + QueueCode string + SourceSpecId string + AccCardsNum int + UseAccCardsNum bool + AccCardType string + CpuCores int + UseCpuCores bool + MemGiB float32 + UseMemGiB bool + GPUMemGiB float32 + UseGPUMemGiB bool + ShareMemGiB float32 + UseShareMemGiB bool + //if true,find specs no matter used or not used in scene. if false,only find specs used in scene + RequestAll bool } type Specification struct { @@ -316,9 +331,10 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS return sess.Commit() } -func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) { +//FindSpecs +func FindSpecs(opts FindSpecsOptions) ([]*Specification, error) { var cond = builder.NewCond() - if opts.JobType != "" { + if !opts.RequestAll && opts.JobType != "" { cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) } if opts.ComputeResource != "" { @@ -333,17 +349,108 @@ func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) { if opts.SpecId > 0 { cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId}) } - cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) - + if opts.QueueCode != "" { + cond = cond.And(builder.Eq{"resource_queue.queue_code": opts.QueueCode}) + } + if opts.SourceSpecId != "" { + cond = cond.And(builder.Eq{"resource_specification.source_spec_id": opts.SourceSpecId}) + } + if opts.UseAccCardsNum { + cond = cond.And(builder.Eq{"resource_specification.acc_cards_num": opts.AccCardsNum}) + } + if opts.AccCardType != "" { + cond = cond.And(builder.Eq{"resource_queue.acc_card_type": opts.AccCardType}) + } + if opts.UseCpuCores { + cond = cond.And(builder.Eq{"resource_specification.cpu_cores": opts.CpuCores}) + } + if opts.UseMemGiB { + cond = cond.And(builder.Eq{"resource_specification.mem_gi_b": opts.MemGiB}) + } + if opts.UseGPUMemGiB { + cond = cond.And(builder.Eq{"resource_specification.gpu_mem_gi_b": opts.GPUMemGiB}) + } + if opts.UseShareMemGiB { + cond = cond.And(builder.Eq{"resource_specification.share_mem_gi_b": opts.ShareMemGiB}) + } r := make([]*Specification, 0) - err := x.Where(cond). - Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). - Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). - Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id"). - OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc"). + s := x.Where(cond). + Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id") + + if !opts.RequestAll { + s = s.Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). + Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id") + } + err := s.OrderBy("resource_queue.compute_resource asc,resource_queue.acc_card_type asc,resource_specification.acc_cards_num asc"). Unscoped().Find(&r) if err != nil { return nil, err } return r, nil } + +func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specification, error) { + sess := x.NewSession() + defer sess.Close() + + sess.Begin() + param := ResourceQueue{ + QueueCode: queue.QueueCode, + Cluster: queue.Cluster, + AiCenterCode: queue.AiCenterCode, + ComputeResource: queue.ComputeResource, + AccCardType: queue.AccCardType, + } + _, err := sess.Get(¶m) + if err != nil { + sess.Rollback() + return nil, err + } + if param.ID == 0 { + _, err = sess.InsertOne(&queue) + if err != nil { + sess.Rollback() + return nil, err + } + } else { + queue = param + } + + spec.QueueId = queue.ID + _, err = sess.InsertOne(&spec) + if err != nil { + sess.Rollback() + return nil, err + } + sess.Commit() + return &Specification{ + ID: spec.ID, + SourceSpecId: spec.SourceSpecId, + AccCardsNum: spec.AccCardsNum, + AccCardType: queue.AccCardType, + CpuCores: spec.CpuCores, + MemGiB: spec.MemGiB, + GPUMemGiB: spec.GPUMemGiB, + ShareMemGiB: spec.ShareMemGiB, + ComputeResource: queue.ComputeResource, + UnitPrice: spec.UnitPrice, + QueueId: queue.ID, + QueueCode: queue.QueueCode, + Cluster: queue.Cluster, + AiCenterCode: queue.AiCenterCode, + AiCenterName: queue.AiCenterName, + }, nil +} + +func GetCloudbrainOneAccCardType(queueCode string) string { + switch queueCode { + case "a100": + return "A100" + case "openidebug": + return "T4" + case "openidgx": + return "V100" + + } + return "" +} diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index ce41f5d1e4..23e1f325af 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -22,6 +22,7 @@ type CreateModelArtsNotebookForm struct { Description string `form:"description"` Flavor string `form:"flavor" binding:"Required"` ImageId string `form:"image_id" binding:"Required"` + SpecId int64 `form:"spec_id" binding:"Required"` } func (f *CreateModelArtsNotebookForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { @@ -46,6 +47,7 @@ type CreateModelArtsTrainJobForm struct { VersionName string `form:"version_name" binding:"Required"` FlavorName string `form:"flaver_names" binding:"Required"` EngineName string `form:"engine_names" binding:"Required"` + SpecId int64 `form:"spec_id" binding:"Required"` } type CreateModelArtsInferenceJobForm struct { @@ -71,6 +73,7 @@ type CreateModelArtsInferenceJobForm struct { ModelName string `form:"model_name" binding:"Required"` ModelVersion string `form:"model_version" binding:"Required"` CkptName string `form:"ckpt_name" binding:"Required"` + SpecId int64 `form:"spec_id" binding:"Required"` } func (f *CreateModelArtsTrainJobForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 8dcf1b1a92..637f7a3b51 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -84,7 +84,6 @@ type GenerateTrainJobReq struct { BootFileUrl string DataUrl string TrainUrl string - FlavorCode string LogUrl string PoolID string WorkServerNumber int @@ -96,6 +95,7 @@ type GenerateTrainJobReq struct { BranchName string PreVersionId int64 PreVersionName string + FlavorCode string FlavorName string VersionCount int EngineName string @@ -103,6 +103,7 @@ type GenerateTrainJobReq struct { UserImageUrl string UserCommand string DatasetName string + Spec *models.Specification } type GenerateInferenceJobReq struct { @@ -115,7 +116,6 @@ type GenerateInferenceJobReq struct { BootFileUrl string DataUrl string TrainUrl string - FlavorCode string LogUrl string PoolID string WorkServerNumber int @@ -134,6 +134,7 @@ type GenerateInferenceJobReq struct { ModelVersion string CkptName string ResultUrl string + Spec *models.Specification } type VersionInfo struct { @@ -256,7 +257,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin return nil } -func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error { +func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, description, imageId string, spec *models.Specification) error { if poolInfos == nil { json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) } @@ -270,7 +271,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc jobResult, err := createNotebook2(models.CreateNotebook2Params{ JobName: jobName, Description: description, - Flavor: flavor, + Flavor: spec.SourceSpecId, Duration: autoStopDurationMs, ImageID: imageId, PoolID: poolInfos.PoolInfo[0].PoolId, @@ -292,7 +293,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc RepoID: ctx.Repo.Repository.ID, JobID: jobResult.ID, JobName: jobName, - FlavorCode: flavor, + FlavorCode: spec.SourceSpecId, DisplayJobName: displayJobName, JobType: string(models.JobTypeDebug), Type: models.TypeCloudBrainTwo, @@ -302,6 +303,7 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc Description: description, CreatedUnix: createTime, UpdatedUnix: createTime, + Spec: spec, }) if err != nil { @@ -335,7 +337,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error PoolID: req.PoolID, CreateVersion: true, Flavor: models.Flavor{ - Code: req.FlavorCode, + Code: req.Spec.SourceSpecId, }, Parameter: req.Parameters, UserImageUrl: req.UserImageUrl, @@ -357,7 +359,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error PoolID: req.PoolID, CreateVersion: true, Flavor: models.Flavor{ - Code: req.FlavorCode, + Code: req.Spec.SourceSpecId, }, Parameter: req.Parameters, }, @@ -391,7 +393,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error BootFile: req.BootFile, DataUrl: req.DataUrl, LogUrl: req.LogUrl, - FlavorCode: req.FlavorCode, + FlavorCode: req.Spec.SourceSpecId, Description: req.Description, WorkServerNumber: req.WorkServerNumber, FlavorName: req.FlavorName, @@ -400,6 +402,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error TotalVersionCount: req.TotalVersionCount, CreatedUnix: createTime, UpdatedUnix: createTime, + Spec: req.Spec, }) if createErr != nil { @@ -451,7 +454,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job LogUrl: req.LogUrl, PoolID: req.PoolID, Flavor: models.Flavor{ - Code: req.FlavorCode, + Code: req.Spec.SourceSpecId, }, Parameter: req.Parameters, PreVersionId: req.PreVersionId, @@ -472,7 +475,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job LogUrl: req.LogUrl, PoolID: req.PoolID, Flavor: models.Flavor{ - Code: req.FlavorCode, + Code: req.Spec.SourceSpecId, }, Parameter: req.Parameters, PreVersionId: req.PreVersionId, @@ -524,7 +527,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job DataUrl: req.DataUrl, LogUrl: req.LogUrl, PreVersionId: req.PreVersionId, - FlavorCode: req.FlavorCode, + FlavorCode: req.Spec.SourceSpecId, Description: req.Description, WorkServerNumber: req.WorkServerNumber, FlavorName: req.FlavorName, @@ -533,6 +536,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job VersionCount: VersionListCount + 1, CreatedUnix: createTime, UpdatedUnix: createTime, + Spec: req.Spec, }) if createErr != nil { log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error()) @@ -716,7 +720,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e PoolID: req.PoolID, CreateVersion: true, Flavor: models.Flavor{ - Code: req.FlavorCode, + Code: req.Spec.SourceSpecId, }, Parameter: req.Parameters, }, @@ -753,7 +757,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e BootFile: req.BootFile, DataUrl: req.DataUrl, LogUrl: req.LogUrl, - FlavorCode: req.FlavorCode, + FlavorCode: req.Spec.SourceSpecId, Description: req.Description, WorkServerNumber: req.WorkServerNumber, FlavorName: req.FlavorName, @@ -769,6 +773,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e ResultUrl: req.ResultUrl, CreatedUnix: createTime, UpdatedUnix: createTime, + Spec: req.Spec, }) if err != nil { diff --git a/routers/admin/resources.go b/routers/admin/resources.go index 7d267c19c2..808c44f3fd 100644 --- a/routers/admin/resources.go +++ b/routers/admin/resources.go @@ -8,6 +8,8 @@ import ( "code.gitea.io/gitea/routers/response" "code.gitea.io/gitea/services/cloudbrain/resource" "net/http" + "strconv" + "strings" ) const ( @@ -246,3 +248,37 @@ func UpdateResourceScene(ctx *context.Context, req models.ResourceSceneReq) { } ctx.JSON(http.StatusOK, response.Success()) } + +func RefreshHistorySpec(ctx *context.Context) { + scope := ctx.Query("scope") + list := ctx.Query("list") + + var scopeAll = false + if scope == "all" { + scopeAll = true + } + var ids = make([]int64, 0) + if list != "" { + strs := strings.Split(list, "|") + for _, s := range strs { + i, err := strconv.ParseInt(s, 10, 64) + if err != nil { + ctx.JSON(http.StatusOK, response.ServerError(err.Error())) + return + } + ids = append(ids, i) + } + + } + + total, success, err := resource.RefreshHistorySpec(scopeAll, ids) + if err != nil { + log.Error("RefreshHistorySpec error. %v", err) + ctx.JSON(http.StatusOK, response.ServerError(err.Error())) + return + } + r := make(map[string]interface{}, 0) + r["success"] = success + r["total"] = total + ctx.JSON(http.StatusOK, response.SuccessWithData(r)) +} diff --git a/routers/private/internal.go b/routers/private/internal.go index 4731463b1b..3e2eeab31a 100755 --- a/routers/private/internal.go +++ b/routers/private/internal.go @@ -6,6 +6,7 @@ package private import ( + "code.gitea.io/gitea/routers/admin" "strings" "code.gitea.io/gitea/routers/repo" @@ -51,6 +52,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Get("/tool/org_stat", OrgStatisticManually) m.Post("/tool/update_repo_visit/:date", UpdateRepoVisit) m.Post("/task/history_handle/duration", repo.HandleTaskWithNoDuration) + m.Post("/resources/specification/handle_historical_task", admin.RefreshHistorySpec) }, CheckInternalToken) } diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 4a1df232a5..a297ed133e 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -122,89 +122,8 @@ func cloudBrainNewDataPrepare(ctx *context.Context) error { ctx.Data["QueuesDetail"] = queuesDetail } - cloudbrain.InitSpecialPool() - - if gpuInfos == nil { - json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) - } - ctx.Data["gpu_types"] = gpuInfos.GpuInfo - - if trainGpuInfos == nil { - json.Unmarshal([]byte(setting.TrainGpuTypes), &trainGpuInfos) - } - ctx.Data["train_gpu_types"] = trainGpuInfos.GpuInfo - - if inferenceGpuInfos == nil && setting.InferenceGpuTypes != "" { - json.Unmarshal([]byte(setting.InferenceGpuTypes), &inferenceGpuInfos) - } - if inferenceGpuInfos != nil { - ctx.Data["inference_gpu_types"] = inferenceGpuInfos.GpuInfo - } - - if benchmarkGpuInfos == nil { - json.Unmarshal([]byte(setting.BenchmarkGpuTypes), &benchmarkGpuInfos) - } - ctx.Data["benchmark_gpu_types"] = benchmarkGpuInfos.GpuInfo - - if benchmarkResourceSpecs == nil { - json.Unmarshal([]byte(setting.BenchmarkResourceSpecs), &benchmarkResourceSpecs) - } - ctx.Data["benchmark_resource_specs"] = benchmarkResourceSpecs.ResourceSpec - - if cloudbrain.ResourceSpecs == nil { - json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) - } - ctx.Data["resource_specs"] = cloudbrain.ResourceSpecs.ResourceSpec - - if cloudbrain.TrainResourceSpecs == nil { - json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) - } - ctx.Data["train_resource_specs"] = cloudbrain.TrainResourceSpecs.ResourceSpec - - if cloudbrain.InferenceResourceSpecs == nil && setting.InferenceResourceSpecs != "" { - json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) - } - if cloudbrain.InferenceResourceSpecs != nil { - ctx.Data["inference_resource_specs"] = cloudbrain.InferenceResourceSpecs.ResourceSpec - } - prepareCloudbrainOneSpecs(ctx) - if cloudbrain.SpecialPools != nil { - var debugGpuTypes []*models.GpuInfo - var trainGpuTypes []*models.GpuInfo - - for _, pool := range cloudbrain.SpecialPools.Pools { - isOrgMember, _ := models.IsOrganizationMemberByOrgName(pool.Org, ctx.User.ID) - if isOrgMember { - for _, jobType := range pool.JobType { - if jobType == string(models.JobTypeDebug) { - debugGpuTypes = append(debugGpuTypes, pool.Pool...) - if pool.ResourceSpec != nil { - ctx.Data["resource_specs"] = pool.ResourceSpec - } - } else if jobType == string(models.JobTypeTrain) { - trainGpuTypes = append(trainGpuTypes, pool.Pool...) - if pool.ResourceSpec != nil { - ctx.Data["train_resource_specs"] = pool.ResourceSpec - } - } - } - break - } - - } - - if len(debugGpuTypes) > 0 { - ctx.Data["gpu_types"] = debugGpuTypes - } - - if len(trainGpuTypes) > 0 { - ctx.Data["train_gpu_types"] = trainGpuTypes - } - - } - ctx.Data["params"] = "" ctx.Data["branchName"] = ctx.Repo.BranchName @@ -229,8 +148,6 @@ func prepareCloudbrainOneSpecs(ctx *context.Context) { AiCenterCode: models.AICenterOfCloudBrainOne, }) ctx.Data["debug_specs"] = debugSpecs - b, _ := json.Marshal(debugSpecs) - log.Info("%s", string(b)) trainSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ JobType: models.JobTypeTrain, @@ -247,6 +164,14 @@ func prepareCloudbrainOneSpecs(ctx *context.Context) { AiCenterCode: models.AICenterOfCloudBrainOne, }) ctx.Data["inference_specs"] = inferenceSpecs + + benchmarkSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobTypeBenchmark, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne, + }) + ctx.Data["benchmark_specs"] = benchmarkSpecs } func CloudBrainNew(ctx *context.Context) { @@ -348,18 +273,10 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { command = commandTrain } - errStr := checkCloudBrainSpecialPool(ctx, jobType, gpuQueue, resourceSpecId) - - if errStr != "" { - cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr(errStr, tpl, &form) - return - } - if branchName == "" { branchName = cloudbrain.DefaultBranchName } - errStr = loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath) + errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ModelMountPath) if errStr != "" { cloudBrainNewDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr(errStr), tpl, &form) @@ -375,7 +292,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { AiCenterCode: models.AICenterOfCloudBrainOne}) if err != nil || spec == nil { cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("Illegal resource specification", tpl, &form) + ctx.RenderWithErr("Resource specification not available", tpl, &form) return } @@ -534,7 +451,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra AiCenterCode: models.AICenterOfCloudBrainOne}) if err != nil || spec == nil { cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("Illegal resource specification", tpl, &form) + ctx.RenderWithErr("Resource specification not available", tpl, &form) return } req := cloudbrain.GenerateCloudBrainTaskReq{ @@ -2447,7 +2364,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo AiCenterCode: models.AICenterOfCloudBrainOne}) if err != nil || spec == nil { cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("Illegal resource specification", tplCloudBrainBenchmarkNew, &form) + ctx.RenderWithErr("Resource specification not available", tplCloudBrainBenchmarkNew, &form) return } @@ -2587,7 +2504,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) AiCenterCode: models.AICenterOfCloudBrainOne}) if err != nil || spec == nil { cloudBrainNewDataPrepare(ctx) - ctx.RenderWithErr("Illegal resource specification", tpl, &form) + ctx.RenderWithErr("Resource specification not available", tpl, &form) return } req := cloudbrain.GenerateCloudBrainTaskReq{ diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 948a0e7511..cbe2589b26 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -2,6 +2,7 @@ package repo import ( "archive/zip" + "code.gitea.io/gitea/services/cloudbrain/resource" "encoding/json" "errors" "fmt" @@ -141,11 +142,7 @@ func notebookNewDataPrepare(ctx *context.Context) error { } ctx.Data["images"] = modelarts.ImageInfos.ImageInfo - if modelarts.FlavorInfos == nil { - json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos) - } - ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo - setSpecBySpecialPoolConfig(ctx, string(models.JobTypeDebug)) + prepareCloudbrainTwoDebugSpecs(ctx) ctx.Data["datasetType"] = models.TypeCloudBrainTwo @@ -155,6 +152,16 @@ func notebookNewDataPrepare(ctx *context.Context) error { return nil } +func prepareCloudbrainTwoDebugSpecs(ctx *context.Context) { + noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobTypeDebug, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo, + }) + ctx.Data["Specs"] = noteBookSpecs +} + func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { ctx.Data["PageIsNotebook"] = true jobName := form.JobName @@ -205,7 +212,6 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm jobName := util.ConvertDisplayJobNameToJobName(displayJobName) uuid := form.Attachment description := form.Description - flavor := form.Flavor imageId := form.ImageId repo := ctx.Repo.Repository @@ -241,14 +247,17 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm } } - errStr := checkModelArtsSpecialPool(ctx, flavor, string(models.JobTypeDebug)) - if errStr != "" { + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeDebug, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo}) + if err != nil || spec == nil { notebookNewDataPrepare(ctx) - ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsNotebookNew, &form) + ctx.RenderWithErr("Resource specification not available", tplModelArtsNotebookNew, &form) return } - - err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId) + err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, imageId, spec) if err != nil { log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"]) notebookNewDataPrepare(ctx) @@ -728,14 +737,7 @@ func trainJobNewDataPrepare(ctx *context.Context) error { } ctx.Data["engine_versions"] = versionInfos.Version - var flavorInfos modelarts.Flavor - if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["flavor_infos"] = flavorInfos.Info - - setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) + prepareCloudbrainTwoTrainSpecs(ctx) ctx.Data["params"] = "" ctx.Data["branchName"] = ctx.Repo.BranchName @@ -753,6 +755,16 @@ func trainJobNewDataPrepare(ctx *context.Context) error { return nil } +func prepareCloudbrainTwoTrainSpecs(ctx *context.Context) { + noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobTypeTrain, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo, + }) + ctx.Data["Specs"] = noteBookSpecs +} + func setSpecBySpecialPoolConfig(ctx *context.Context, jobType string) { modelarts.InitSpecialPool() @@ -835,13 +847,7 @@ func trainJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArts } ctx.Data["engine_versions"] = versionInfos.Version - var flavorInfos modelarts.Flavor - if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["flavor_infos"] = flavorInfos.Info - setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) + prepareCloudbrainTwoTrainSpecs(ctx) configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) if err != nil { @@ -1020,13 +1026,7 @@ func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrai } ctx.Data["engine_versions"] = versionInfos.Version - var flavorInfos modelarts.Flavor - if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["flavor_infos"] = flavorInfos.Info - setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) + prepareCloudbrainTwoTrainSpecs(ctx) var Parameters modelarts.Parameters if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { @@ -1079,7 +1079,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) workServerNumber := form.WorkServerNumber engineID := form.EngineID bootFile := strings.TrimSpace(form.BootFile) - flavorCode := form.Flavor params := form.Params poolID := form.PoolID //isSaveParam := form.IsSaveParam @@ -1117,10 +1116,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) return } - errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain)) - if errStr != "" { + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeTrain, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo}) + if err != nil || spec == nil { trainJobErrorNewDataPrepare(ctx, form) - ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form) + ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobNew, &form) return } //Determine whether the task name of the task in the project is duplicated @@ -1283,7 +1286,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) BootFileUrl: codeObsPath + bootFile, BootFile: bootFile, TrainUrl: outputObsPath, - FlavorCode: flavorCode, WorkServerNumber: workServerNumber, EngineID: int64(engineID), LogUrl: logObsPath, @@ -1299,6 +1301,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) VersionCount: VersionCount, TotalVersionCount: modelarts.TotalVersionCount, DatasetName: datasetNames, + Spec: spec, } userCommand, userImageUrl := getUserCommand(engineID, req) req.UserCommand = userCommand @@ -1384,7 +1387,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ workServerNumber := form.WorkServerNumber engineID := form.EngineID bootFile := strings.TrimSpace(form.BootFile) - flavorCode := form.Flavor params := form.Params poolID := form.PoolID //isSaveParam := form.IsSaveParam @@ -1414,10 +1416,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ return } - errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain)) - if errStr != "" { + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeTrain, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo}) + if err != nil || spec == nil { versionErrorDataPrepare(ctx, form) - ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form) + ctx.RenderWithErr("Resource specification not available", tplModelArtsTrainJobVersionNew, &form) return } @@ -1571,7 +1577,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ BootFileUrl: codeObsPath + bootFile, BootFile: bootFile, TrainUrl: outputObsPath, - FlavorCode: flavorCode, WorkServerNumber: workServerNumber, IsLatestVersion: isLatestVersion, EngineID: int64(engineID), @@ -1588,6 +1593,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ PreVersionName: PreVersionName, TotalVersionCount: latestTask.TotalVersionCount + 1, DatasetName: datasetNames, + Spec: spec, } userCommand, userImageUrl := getUserCommand(engineID, req) req.UserCommand = userCommand @@ -2016,7 +2022,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference workServerNumber := form.WorkServerNumber engineID := form.EngineID bootFile := strings.TrimSpace(form.BootFile) - flavorCode := form.Flavor params := form.Params poolID := form.PoolID repo := ctx.Repo.Repository @@ -2078,13 +2083,16 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference } } - errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeInference)) - if errStr != "" { + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeInference, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo}) + if err != nil || spec == nil { inferenceJobErrorNewDataPrepare(ctx, form) - ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form) + ctx.RenderWithErr("Resource specification not available", tplModelArtsInferenceJobNew, &form) return } - //todo: del the codeLocalPath _, err = ioutil.ReadDir(codeLocalPath) if err == nil { @@ -2170,7 +2178,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference BootFileUrl: codeObsPath + bootFile, BootFile: bootFile, TrainUrl: trainUrl, - FlavorCode: flavorCode, WorkServerNumber: workServerNumber, EngineID: int64(engineID), LogUrl: logObsPath, @@ -2369,14 +2376,7 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error { } ctx.Data["engine_versions"] = versionInfos.Version - var flavorInfos modelarts.Flavor - if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - - ctx.Data["flavor_infos"] = flavorInfos.Info - setSpecBySpecialPoolConfig(ctx, string(models.JobTypeInference)) + prepareCloudbrainTwoInferenceSpecs(ctx) ctx.Data["params"] = "" ctx.Data["branchName"] = ctx.Repo.BranchName @@ -2407,6 +2407,16 @@ func inferenceJobNewDataPrepare(ctx *context.Context) error { return nil } +func prepareCloudbrainTwoInferenceSpecs(ctx *context.Context) { + noteBookSpecs, _ := resource.FindAvailableSpecs(ctx.User.ID, models.FindSpecsOptions{ + JobType: models.JobTypeInference, + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo, + }) + ctx.Data["Specs"] = noteBookSpecs +} + func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) error { ctx.Data["PageIsCloudBrain"] = true diff --git a/services/cloudbrain/resource/resource_specification.go b/services/cloudbrain/resource/resource_specification.go index e27d88ef16..bab0d3096e 100644 --- a/services/cloudbrain/resource/resource_specification.go +++ b/services/cloudbrain/resource/resource_specification.go @@ -2,12 +2,17 @@ package resource import ( "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/grampus" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/routers/response" "code.gitea.io/gitea/services/admin/operate_log" + "encoding/json" + "errors" "fmt" "strings" + "time" ) func AddResourceSpecification(doerId int64, req models.ResourceSpecificationReq) error { @@ -186,7 +191,7 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod } func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) { - r, err := models.FindAvailableSpecs(opts) + r, err := models.FindSpecs(opts) if err != nil { log.Error("FindAvailableSpecs error.%v", err) return nil, err @@ -270,3 +275,260 @@ func GetCloudbrainSpec(cloudbrainId int64) (*models.Specification, error) { } return c.ConvertToSpecification(), nil } + +func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { + var success int64 + var total int64 + + if !scopeAll { + if ids == nil || len(ids) == 0 { + return 0, 0, nil + } + total = int64(len(ids)) + tasks, err := models.GetCloudbrainWithDeletedByIDs(ids) + if err != nil { + return total, 0, err + } + for _, task := range tasks { + err = RefreshOneHistorySpec(task) + if err != nil { + log.Error("RefreshOneHistorySpec error.%v", err) + continue + } + success++ + } + + } else { + page := 1 + pageSize := 100 + n, err := models.CountNoSpecHistoricTask() + if err != nil { + log.Error("FindNoSpecHistoricTask CountNoSpecHistoricTask error. e=%v", err) + return 0, 0, err + } + total = n + for i := 0; i < 1000; i++ { + list, err := models.FindNoSpecHistoricTask(page, pageSize) + if err != nil { + log.Error("FindNoSpecHistoricTask error.page=%d pageSize=%d e=%v", page, pageSize, err) + return total, success, err + } + if len(list) == 0 { + log.Info("RefreshHistorySpec. list is empty") + break + } + for _, task := range list { + time.Sleep(1 * time.Second) + err = RefreshOneHistorySpec(task) + if err != nil { + log.Error("RefreshOneHistorySpec error.%v", err) + continue + } + success++ + } + if len(list) < pageSize { + log.Info("RefreshHistorySpec. list < pageSize") + break + } + } + } + return total, success, nil + +} + +func RefreshOneHistorySpec(task *models.Cloudbrain) error { + var spec *models.Specification + var err error + switch task.Type { + case models.TypeCloudBrainOne: + spec, err = getCloudbrainOneSpec(task) + } + if err != nil { + log.Error("find spec error,task.ID=%d err=%v", task.ID, err) + return err + } + if spec == nil { + log.Error("find spec failed,task.ID=%d", task.ID) + return errors.New("find spec failed") + } + return InsertCloudbrainSpec(task.ID, spec) +} + +func getCloudbrainOneSpec(task *models.Cloudbrain) (*models.Specification, error) { + //find from remote + result, err := cloudbrain.GetJob(task.JobID) + if err != nil { + log.Error("getCloudbrainOneSpec error. %v", err) + return nil, err + } + if result != nil { + jobRes, _ := models.ConvertToJobResultPayload(result.Payload) + memSize, _ := models.ParseMemSizeFromGrampus(jobRes.Resource.Memory) + if task.ComputeResource == "CPU/GPU" { + task.ComputeResource = models.GPU + } + var shmMB float32 + if jobRes.Config.TaskRoles != nil && len(jobRes.Config.TaskRoles) > 0 { + shmMB = float32(jobRes.Config.TaskRoles[0].ShmMB) / 1024 + } + + opt := models.FindSpecsOptions{ + ComputeResource: task.ComputeResource, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne, + QueueCode: task.GpuQueue, + AccCardsNum: jobRes.Resource.NvidiaComGpu, + UseAccCardsNum: true, + CpuCores: jobRes.Resource.CPU, + UseCpuCores: true, + MemGiB: memSize, + UseMemGiB: memSize > 0, + ShareMemGiB: shmMB, + UseShareMemGiB: shmMB > 0, + RequestAll: true, + } + specs, err := models.FindSpecs(opt) + if err != nil { + log.Error("getCloudbrainOneSpec from remote error,%v", err) + return nil, err + } + if len(specs) == 1 { + return specs[0], nil + } + if len(specs) == 0 { + s, err := InitQueueAndSpec(opt, "云脑一", "处理历史云脑任务时自动添加") + if err != nil { + log.Error("getCloudbrainOneSpec InitQueueAndSpec error.err=%v", err) + return nil, nil + } + return s, nil + } + if len(specs) > 1 { + log.Error("Too many results matched.size=%d opt=%+v", len(specs), opt) + return nil, nil + } + + } else { + //find from config + var specConfig *models.ResourceSpec + hasSpec := false + if task.JobType == string(models.JobTypeTrain) { + if cloudbrain.TrainResourceSpecs == nil { + json.Unmarshal([]byte(setting.TrainResourceSpecs), &cloudbrain.TrainResourceSpecs) + } + for _, tmp := range cloudbrain.TrainResourceSpecs.ResourceSpec { + if tmp.Id == task.ResourceSpecId { + hasSpec = true + specConfig = tmp + break + } + } + } else if task.JobType == string(models.JobTypeInference) { + if cloudbrain.InferenceResourceSpecs == nil { + json.Unmarshal([]byte(setting.InferenceResourceSpecs), &cloudbrain.InferenceResourceSpecs) + } + for _, tmp := range cloudbrain.InferenceResourceSpecs.ResourceSpec { + if tmp.Id == task.ResourceSpecId { + hasSpec = true + specConfig = tmp + break + } + } + } else { + if cloudbrain.ResourceSpecs == nil { + json.Unmarshal([]byte(setting.ResourceSpecs), &cloudbrain.ResourceSpecs) + } + for _, tmp := range cloudbrain.ResourceSpecs.ResourceSpec { + if tmp.Id == task.ResourceSpecId { + hasSpec = true + specConfig = tmp + break + + } + } + } + if !hasSpec && cloudbrain.SpecialPools != nil { + + for _, specialPool := range cloudbrain.SpecialPools.Pools { + + if specialPool.ResourceSpec != nil { + + for _, spec := range specialPool.ResourceSpec { + if task.ResourceSpecId == spec.Id { + hasSpec = true + specConfig = spec + break + } + } + } + } + } + if specConfig == nil { + log.Error("getCloudbrainOneSpec from config failed,task.ResourceSpecId=%d", task.ResourceSpecId) + return nil, nil + } + opt := models.FindSpecsOptions{ + JobType: models.JobType(task.JobType), + ComputeResource: task.ComputeResource, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne, + QueueCode: task.GpuQueue, + AccCardsNum: specConfig.GpuNum, + UseAccCardsNum: true, + CpuCores: specConfig.GpuNum, + UseCpuCores: true, + MemGiB: float32(specConfig.MemMiB) / 1024, + UseMemGiB: true, + ShareMemGiB: float32(specConfig.ShareMemMiB) / 1024, + UseShareMemGiB: true, + RequestAll: true, + } + specs, err := models.FindSpecs(opt) + if err != nil { + log.Error("getCloudbrainOneSpec from config error,%v", err) + return nil, err + } + if len(specs) > 1 { + log.Error("Too many results matched.size=%d opt=%+v", len(specs), opt) + return nil, nil + } + if len(specs) == 0 { + s, err := InitQueueAndSpec(opt, "云脑一", "处理历史云脑任务时自动添加") + if err != nil { + log.Error("getCloudbrainOneSpec InitQueueAndSpec error.err=%v", err) + return nil, nil + } + return s, nil + } + return specs[0], nil + } + return nil, nil + +} + +func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error { + return nil +} + +func RefreshC2NetSpec(task *models.Cloudbrain) error { + return nil +} + +func InitQueueAndSpec(opt models.FindSpecsOptions, aiCenterName string, remark string) (*models.Specification, error) { + return models.InitQueueAndSpec(models.ResourceQueue{ + QueueCode: opt.QueueCode, + Cluster: opt.Cluster, + AiCenterCode: opt.AiCenterCode, + AiCenterName: aiCenterName, + ComputeResource: opt.ComputeResource, + AccCardType: models.GetCloudbrainOneAccCardType(opt.QueueCode), + Remark: remark, + }, models.ResourceSpecification{ + AccCardsNum: opt.AccCardsNum, + CpuCores: opt.CpuCores, + MemGiB: opt.MemGiB, + GPUMemGiB: opt.GPUMemGiB, + ShareMemGiB: opt.ShareMemGiB, + Status: models.SpecOffShelf, + }) +} -- 2.34.1 From 24f7aa6d4a2b26edbced6a2ec0d786dc540d2f59 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 23 Aug 2022 17:13:12 +0800 Subject: [PATCH 06/34] #2701 update --- routers/repo/modelarts.go | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index cbe2589b26..f764a2dae9 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -5,7 +5,6 @@ import ( "code.gitea.io/gitea/services/cloudbrain/resource" "encoding/json" "errors" - "fmt" "io" "io/ioutil" "net/http" @@ -320,24 +319,7 @@ func NotebookShow(ctx *context.Context) { if err == nil { task.User = user } - if modelarts.FlavorInfos == nil { - json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos) - } - - findSpec := false - if modelarts.FlavorInfos != nil { - ctx.Data["resource_spec"] = modelarts.FlavorInfos.FlavorInfo[0].Desc - for _, f := range modelarts.FlavorInfos.FlavorInfo { - if fmt.Sprint(f.Value) == task.FlavorCode { - ctx.Data["resource_spec"] = f.Desc - findSpec = true - break - } - } - } - - setShowSpecBySpecialPoolConfig(ctx, findSpec, task) - + prepareSpec4Show(ctx, task) if task.TrainJobDuration == "" { if task.Duration == 0 { var duration int64 @@ -1781,7 +1763,6 @@ func TrainJobShow(ctx *context.Context) { for i, task := range VersionListTasks { var parameters models.Parameters - err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), ¶meters) if err != nil { log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err) @@ -1802,6 +1783,14 @@ func TrainJobShow(ctx *context.Context) { datasetList = append(datasetList, GetCloudBrainDataSetInfo(task.Uuid, false)) VersionListTasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) VersionListTasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) + + //add spec + s, err := resource.GetCloudbrainSpec(task.Cloudbrain.ID) + if err != nil { + log.Error("TrainJobShow GetCloudbrainSpec error:" + err.Error()) + continue + } + VersionListTasks[i].Cloudbrain.Spec = s } pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) @@ -2533,7 +2522,7 @@ func InferenceJobShow(ctx *context.Context) { } else { task.Parameters = "" } - + prepareSpec4Show(ctx, task) LabelName := strings.Fields(task.LabelName) ctx.Data["labelName"] = LabelName ctx.Data["jobID"] = jobID -- 2.34.1 From 0e4d8844b82470622b0109a5f95c776c34bb23e6 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Tue, 23 Aug 2022 17:39:39 +0800 Subject: [PATCH 07/34] #2701 update --- models/cloudbrain.go | 9 +++++++- modules/cloudbrain/cloudbrain.go | 29 +++++------------------- routers/repo/cloudbrain.go | 39 ++++++++++++-------------------- 3 files changed, 29 insertions(+), 48 deletions(-) diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 3c4a3ae98e..c099511591 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -1947,11 +1947,18 @@ func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) { return err } - if _, err = sess.NoAutoTime().Insert(new); err != nil { + if _, err = sess.NoAutoTime().InsertOne(new); err != nil { sess.Rollback() return err } + if new.Spec != nil { + if _, err = sess.Insert(NewCloudBrainSpec(new.ID, *new.Spec)); err != nil { + sess.Rollback() + return err + } + } + if err = sess.Commit(); err != nil { return err } diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index f18b61e97f..90ed21e4d7 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -409,25 +409,7 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) error { jobName := task.JobName - var resourceSpec *models.ResourceSpec - if ResourceSpecs == nil { - json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) - } - for _, spec := range ResourceSpecs.ResourceSpec { - if task.ResourceSpecId == spec.Id { - resourceSpec = spec - } - } - - //如果没有匹配到spec信息,尝试从专属资源池获取 - if resourceSpec == nil && SpecialPools != nil { - resourceSpec = geMatchResourceSpec(task.JobType, task.GpuQueue, task.ResourceSpecId) - } - - if resourceSpec == nil { - log.Error("no such resourceSpecId(%d)", task.ResourceSpecId, ctx.Data["MsgID"]) - return errors.New("no such resourceSpec") - } + spec := task.Spec var datasetInfos map[string]models.DatasetInfo if task.Uuid != "" { var err error @@ -509,10 +491,10 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e TaskNumber: 1, MinSucceededTaskCount: 1, MinFailedTaskCount: 1, - CPUNumber: resourceSpec.CpuNum, - GPUNumber: resourceSpec.GpuNum, - MemoryMB: resourceSpec.MemMiB, - ShmMB: resourceSpec.ShareMemMiB, + CPUNumber: spec.CpuCores, + GPUNumber: spec.AccCardsNum, + MemoryMB: int(spec.MemGiB * 1024), + ShmMB: int(spec.ShareMemGiB * 1024), Command: GetCloudbrainDebugCommand(), //Command, NeedIBDevice: false, IsMainRole: false, @@ -550,6 +532,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e CreatedUnix: createTime, UpdatedUnix: createTime, BranchName: task.BranchName, + Spec: spec, } err = models.RestartCloudbrain(task, newTask) diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index a297ed133e..13fa718a5d 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -560,34 +560,25 @@ func CloudBrainRestart(ctx *context.Context) { break } - var hasSameResource bool - if gpuInfos == nil { - json.Unmarshal([]byte(setting.GpuTypes), &gpuInfos) - } - for _, resourceType := range gpuInfos.GpuInfo { - if resourceType.Queue == task.GpuQueue { - hasSameResource = true - break - } - } - if !hasSameResource && cloudbrain.SpecialPools != nil { - - for _, specialPool := range cloudbrain.SpecialPools.Pools { - cloudbrain.IsElementExist(specialPool.JobType, string(models.JobTypeDebug)) - for _, pool := range specialPool.Pool { - if pool.Queue == task.GpuQueue { - hasSameResource = true - } - } - } + specOld, err := resource.GetCloudbrainSpec(task.ID) + if err != nil { + log.Error("CloudBrainRestart GetCloudbrainSpec error.task.id = %d", task.ID) + resultCode = "-1" + errorMsg = "Resource specification not support any more" + break } - - if !hasSameResource { - log.Error("has no same resource, can not restart", ctx.Data["MsgID"]) + spec, err := resource.GetAndCheckSpec(ctx.User.ID, specOld.ID, models.FindSpecsOptions{ + JobType: models.JobType(task.JobType), + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + log.Error("CloudBrainRestart GetAndCheckSpec error.task.id = %d", task.ID) resultCode = "-1" - errorMsg = "the job's version is too old and can not be restarted" + errorMsg = "Resource specification not support any more" break } + task.Spec = spec count, err := models.GetCloudbrainCountByUserID(ctx.User.ID, string(models.JobTypeDebug)) if err != nil { -- 2.34.1 From 77dc319ab8967c1ebd987cfbe55c968fcf88255b Mon Sep 17 00:00:00 2001 From: chenshihai Date: Wed, 24 Aug 2022 11:11:17 +0800 Subject: [PATCH 08/34] specs useage --- options/locale/locale_en-US.ini | 3 ++ options/locale/locale_zh-CN.ini | 3 ++ templates/repo/cloudbrain/benchmark/new.tmpl | 41 +++++++++++++++---- templates/repo/cloudbrain/benchmark/show.tmpl | 18 +++++++- templates/repo/cloudbrain/inference/new.tmpl | 25 ++++++++--- templates/repo/cloudbrain/inference/show.tmpl | 18 +++++++- templates/repo/cloudbrain/new.tmpl | 30 +++++++++++--- templates/repo/cloudbrain/show.tmpl | 23 ++++++++--- templates/repo/cloudbrain/trainjob/new.tmpl | 30 +++++++++++--- templates/repo/cloudbrain/trainjob/show.tmpl | 18 +++++++- .../repo/modelarts/inferencejob/new.tmpl | 20 ++++++++- .../repo/modelarts/inferencejob/show.tmpl | 17 +++++++- templates/repo/modelarts/notebook/new.tmpl | 20 ++++++++- templates/repo/modelarts/notebook/show.tmpl | 18 +++++++- templates/repo/modelarts/trainjob/new.tmpl | 20 ++++++++- templates/repo/modelarts/trainjob/show.tmpl | 19 ++++++++- web_src/js/standalone/specsuse.js | 27 ++++++++++++ 17 files changed, 305 insertions(+), 45 deletions(-) create mode 100644 web_src/js/standalone/specsuse.js diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 833d4a2c7a..b79f66e22c 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -3134,6 +3134,9 @@ gpu_num = GPU cpu_num = CPU memory = Memory shared_memory = Shared Memory +gpu_memory = GPU Memory +free = Free +point_hr = Point/hr DEBUG = DEBUG diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 46056e03aa..41ad0b58b7 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -3150,6 +3150,9 @@ gpu_num = GPU数 cpu_num = CPU数 memory = 内存 shared_memory = 共享内存 +gpu_memory = 显存 +free = 免费 +point_hr = 积分/时 DEBUG = 调试任务 SNN4IMAGENET = 评测任务 diff --git a/templates/repo/cloudbrain/benchmark/new.tmpl b/templates/repo/cloudbrain/benchmark/new.tmpl index f8e83e1ae1..8469280e5f 100755 --- a/templates/repo/cloudbrain/benchmark/new.tmpl +++ b/templates/repo/cloudbrain/benchmark/new.tmpl @@ -71,7 +71,7 @@ onkeyup="this.value=this.value.substring(0, 255)">{{.description}} -
+
  @@ -112,7 +112,7 @@
{{template "custom/select_dataset_train" .}} -
+ +
+ +
@@ -167,7 +174,7 @@ onkeyup="this.value=this.value.substring(0, 255)">{{.description}}
-
+
 
-
+ + +
+ +
@@ -249,7 +264,7 @@
{{template "base/footer" .}} - + \ No newline at end of file diff --git a/templates/repo/cloudbrain/benchmark/show.tmpl b/templates/repo/cloudbrain/benchmark/show.tmpl index add7d34d4a..58cb4ca796 100755 --- a/templates/repo/cloudbrain/benchmark/show.tmpl +++ b/templates/repo/cloudbrain/benchmark/show.tmpl @@ -453,7 +453,7 @@ {{$.i18n.Tr "cloudbrain.gpu_type"}} - +
{{$.resource_type}}
@@ -464,7 +464,7 @@ {{$.i18n.Tr "repo.modelarts.train_job.standard"}} - +
{{$.resource_spec}}
@@ -571,6 +571,7 @@
{{template "base/footer" .}} + \ No newline at end of file diff --git a/templates/repo/cloudbrain/inference/new.tmpl b/templates/repo/cloudbrain/inference/new.tmpl index fc2c37bc7c..cc9a9cd6bf 100644 --- a/templates/repo/cloudbrain/inference/new.tmpl +++ b/templates/repo/cloudbrain/inference/new.tmpl @@ -164,7 +164,7 @@
-
+ {{template "custom/select_dataset_train" .}} @@ -221,7 +221,7 @@
-
+ +
+ +
-
@@ -258,7 +262,7 @@
{{template "base/footer" .}} - + diff --git a/templates/repo/cloudbrain/inference/show.tmpl b/templates/repo/cloudbrain/inference/show.tmpl index f4e57e685e..d8e3d8748a 100644 --- a/templates/repo/cloudbrain/inference/show.tmpl +++ b/templates/repo/cloudbrain/inference/show.tmpl @@ -340,7 +340,7 @@ {{$.i18n.Tr "repo.modelarts.train_job.resource_type"}} - +
{{$.resource_type}}
@@ -494,7 +494,7 @@ {{$.i18n.Tr "repo.modelarts.train_job.standard"}} - +
{{$.i18n.Tr "cloudbrain.gpu_num"}}:{{$.GpuNum}},{{$.i18n.Tr "cloudbrain.cpu_num"}}:{{$.CpuNum}},{{$.i18n.Tr "cloudbrain.memory"}}(MB):{{$.MemMiB}},{{$.i18n.Tr "cloudbrain.shared_memory"}}(MB):{{$.ShareMemMiB}}
@@ -551,6 +551,7 @@
{{template "base/footer" .}} + \ No newline at end of file diff --git a/templates/repo/cloudbrain/new.tmpl b/templates/repo/cloudbrain/new.tmpl index e2d1b2993e..f1c59550fd 100755 --- a/templates/repo/cloudbrain/new.tmpl +++ b/templates/repo/cloudbrain/new.tmpl @@ -108,8 +108,8 @@ {{end}} {{end}} -
-
+
+
@@ -125,8 +125,8 @@
- -
+ + + +
+ +
@@ -188,6 +196,7 @@
{{template "base/footer" .}} + \ No newline at end of file diff --git a/templates/repo/cloudbrain/show.tmpl b/templates/repo/cloudbrain/show.tmpl index d111fe123a..782326fc2f 100755 --- a/templates/repo/cloudbrain/show.tmpl +++ b/templates/repo/cloudbrain/show.tmpl @@ -345,7 +345,7 @@ {{$.i18n.Tr "cloudbrain.gpu_type"}} - +
{{$.resource_type}}
@@ -424,10 +424,8 @@ {{$.i18n.Tr "repo.modelarts.train_job.standard"}} - -
- {{$.i18n.Tr "cloudbrain.gpu_num"}}:{{$.GpuNum}},{{$.i18n.Tr "cloudbrain.cpu_num"}}:{{$.CpuNum}},{{$.i18n.Tr "cloudbrain.memory"}}(MB):{{$.MemMiB}},{{$.i18n.Tr "cloudbrain.shared_memory"}}(MB):{{$.ShareMemMiB}} -
+ +
@@ -552,7 +550,7 @@
{{template "base/footer" .}} - + \ No newline at end of file diff --git a/templates/repo/cloudbrain/trainjob/new.tmpl b/templates/repo/cloudbrain/trainjob/new.tmpl index 72e22f7303..d216617da0 100755 --- a/templates/repo/cloudbrain/trainjob/new.tmpl +++ b/templates/repo/cloudbrain/trainjob/new.tmpl @@ -14,7 +14,9 @@ .width { width: 100% !important; } - + .width48 { + width: 48.5% !important; + } .width80 { width: 80.7% !important; margin-left: 10px; @@ -167,7 +169,7 @@
-
+
@@ -224,7 +226,7 @@
-
+ + +
+ +
@@ -264,7 +273,7 @@
{{template "base/footer" .}} - + \ No newline at end of file diff --git a/templates/repo/cloudbrain/trainjob/show.tmpl b/templates/repo/cloudbrain/trainjob/show.tmpl index 72cbaee026..ff9d475e12 100644 --- a/templates/repo/cloudbrain/trainjob/show.tmpl +++ b/templates/repo/cloudbrain/trainjob/show.tmpl @@ -355,7 +355,7 @@ {{$.i18n.Tr "repo.modelarts.train_job.resource_type"}} - +
{{$.resource_type}}
@@ -366,7 +366,7 @@ {{$.i18n.Tr "repo.modelarts.train_job.standard"}} - +
{{$.i18n.Tr "cloudbrain.gpu_num"}}:{{$.GpuNum}},{{$.i18n.Tr "cloudbrain.cpu_num"}}:{{$.CpuNum}},{{$.i18n.Tr "cloudbrain.memory"}}(MB):{{$.MemMiB}},{{$.i18n.Tr "cloudbrain.shared_memory"}}(MB):{{$.ShareMemMiB}}
@@ -640,6 +640,7 @@ {{template "base/footer" .}} + \ No newline at end of file diff --git a/templates/repo/modelarts/inferencejob/new.tmpl b/templates/repo/modelarts/inferencejob/new.tmpl index 3b2150e410..5e04715d25 100644 --- a/templates/repo/modelarts/inferencejob/new.tmpl +++ b/templates/repo/modelarts/inferencejob/new.tmpl @@ -241,7 +241,7 @@ -
+ +
+ +
@@ -285,7 +289,7 @@
{{template "base/footer" .}} - + diff --git a/templates/repo/modelarts/inferencejob/show.tmpl b/templates/repo/modelarts/inferencejob/show.tmpl index 1137aad017..b7632f8720 100644 --- a/templates/repo/modelarts/inferencejob/show.tmpl +++ b/templates/repo/modelarts/inferencejob/show.tmpl @@ -435,7 +435,7 @@ td, th { {{$.i18n.Tr "repo.modelarts.train_job.standard"}} - +
{{.FlavorName}}
@@ -511,6 +511,7 @@ td, th { {{template "base/footer" .}} + diff --git a/templates/repo/modelarts/notebook/new.tmpl b/templates/repo/modelarts/notebook/new.tmpl index 30449a1a49..3e6d886379 100755 --- a/templates/repo/modelarts/notebook/new.tmpl +++ b/templates/repo/modelarts/notebook/new.tmpl @@ -65,7 +65,7 @@ --> -
+ +
+ +
+
+ +
@@ -307,7 +311,7 @@
{{template "base/footer" .}} - + \ No newline at end of file diff --git a/templates/repo/modelarts/trainjob/show.tmpl b/templates/repo/modelarts/trainjob/show.tmpl index 7ae5d5c6bc..ccd2842855 100755 --- a/templates/repo/modelarts/trainjob/show.tmpl +++ b/templates/repo/modelarts/trainjob/show.tmpl @@ -1,5 +1,7 @@ {{template "base/head" .}} + +