From cc587b9021810af222c476b9ebb904a6b6be4408 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 3 Aug 2023 10:02:07 +0800 Subject: [PATCH 01/24] #4553 fix bug --- entity/container.go | 10 +- .../container_builder/code_builder.go | 7 +- .../container_builder/container_builder.go | 1 + .../container_builder_chan.go | 3 + .../container_builder/dataset_builder.go | 7 +- .../file_notebook_code_builder.go | 7 +- .../container_builder/log_path_builder.go | 7 +- .../container_builder/output_path_builder.go | 7 +- .../container_builder/pre_model_builder.go | 158 +++++------------- .../ai_task_service/storage_helper/client.go | 1 + .../ai_task_service/storage_helper/copy.go | 39 +++++ .../ai_task_service/storage_helper/minio.go | 10 ++ .../ai_task_service/storage_helper/obs.go | 14 ++ services/ai_task_service/task/opt_handler.go | 20 ++- 14 files changed, 156 insertions(+), 135 deletions(-) create mode 100644 services/ai_task_service/storage_helper/copy.go diff --git a/entity/container.go b/entity/container.go index 7814f8ada8..806336d1ea 100644 --- a/entity/container.go +++ b/entity/container.go @@ -45,7 +45,7 @@ type ContainerBuildOpts struct { MKDIR bool } -func (opts ContainerBuildOpts) IsStorageTypeIn(storageType StorageType) bool { +func (opts ContainerBuildOpts) IsStorageTypeAccept(storageType StorageType) bool { for _, s := range opts.AcceptStorageType { if string(s) == string(storageType) { return true @@ -53,9 +53,17 @@ func (opts ContainerBuildOpts) IsStorageTypeIn(storageType StorageType) bool { } return false } + func (opts ContainerBuildOpts) GetLocalPath() string { if opts.StorageRelativePath != "" { return opts.StorageRelativePath } return opts.ContainerPath } + +func (opts ContainerBuildOpts) GetPriorityStorageType() StorageType { + if len(opts.AcceptStorageType) == 0 { + return "" + } + return opts.AcceptStorageType[0] +} diff --git a/services/ai_task_service/container_builder/code_builder.go b/services/ai_task_service/container_builder/code_builder.go index e5cd34e127..1970ccba34 100644 --- a/services/ai_task_service/container_builder/code_builder.go +++ b/services/ai_task_service/container_builder/code_builder.go @@ -21,6 +21,10 @@ func init() { RegisterContainerBuilder(o) } +func (b *CodeBuilder) ShouldBuild(ctx *context.CreationContext) bool { + return !b.Opts.Disable +} + func (b *CodeBuilder) SetOpts(opts *entity.ContainerBuildOpts) { b.Opts = opts } @@ -31,9 +35,6 @@ func (b *CodeBuilder) GetContainerType() entity.ContainerDataType { func (b *CodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { opts := b.Opts - if opts.Disable { - return nil, nil - } storageTypes := opts.AcceptStorageType if storageTypes == nil || len(storageTypes) == 0 { return nil, response.SYSTEM_ERROR diff --git a/services/ai_task_service/container_builder/container_builder.go b/services/ai_task_service/container_builder/container_builder.go index aefd624096..235c251fbf 100644 --- a/services/ai_task_service/container_builder/container_builder.go +++ b/services/ai_task_service/container_builder/container_builder.go @@ -13,6 +13,7 @@ type ContainerBuilder interface { Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) GetContainerType() entity.ContainerDataType SetOpts(opts *entity.ContainerBuildOpts) + ShouldBuild(ctx *context.CreationContext) bool } var containerBuilderMap = map[entity.ContainerDataType]reflect.Type{} diff --git a/services/ai_task_service/container_builder/container_builder_chan.go b/services/ai_task_service/container_builder/container_builder_chan.go index 21211acb81..fbaafcf5cb 100644 --- a/services/ai_task_service/container_builder/container_builder_chan.go +++ b/services/ai_task_service/container_builder/container_builder_chan.go @@ -25,6 +25,9 @@ func (c *BuilderChain) Run(ctx *context.CreationContext) *response.BizError { if current != nil && len(current) > 0 { continue } + if !builder.ShouldBuild(ctx) { + continue + } d, err := builder.Build(ctx) if err != nil { return err diff --git a/services/ai_task_service/container_builder/dataset_builder.go b/services/ai_task_service/container_builder/dataset_builder.go index e2cfabcf92..8d5fbcd110 100644 --- a/services/ai_task_service/container_builder/dataset_builder.go +++ b/services/ai_task_service/container_builder/dataset_builder.go @@ -21,14 +21,15 @@ func init() { RegisterContainerBuilder(o) } +func (b *DatasetBuilder) ShouldBuild(ctx *context.CreationContext) bool { + return !b.Opts.Disable +} + func (b *DatasetBuilder) SetOpts(opts *entity.ContainerBuildOpts) { b.Opts = opts } func (b *DatasetBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { - if b.Opts.Disable { - return nil, nil - } uuid := ctx.Request.DatasetUUIDStr if uuid == "" { return nil, nil diff --git a/services/ai_task_service/container_builder/file_notebook_code_builder.go b/services/ai_task_service/container_builder/file_notebook_code_builder.go index d23d626cf4..d77a540fe0 100644 --- a/services/ai_task_service/container_builder/file_notebook_code_builder.go +++ b/services/ai_task_service/container_builder/file_notebook_code_builder.go @@ -18,6 +18,10 @@ func init() { RegisterContainerBuilder(o) } +func (b *FileNoteBookCodeBuilder) ShouldBuild(ctx *context.CreationContext) bool { + return !b.Opts.Disable +} + func (b *FileNoteBookCodeBuilder) SetOpts(opts *entity.ContainerBuildOpts) { b.Opts = opts } @@ -27,9 +31,6 @@ func (b *FileNoteBookCodeBuilder) GetContainerType() entity.ContainerDataType { } func (b *FileNoteBookCodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { - if b.Opts.Disable { - return nil, nil - } repo := ctx.Request.FileRepository if repo == nil { return nil, nil diff --git a/services/ai_task_service/container_builder/log_path_builder.go b/services/ai_task_service/container_builder/log_path_builder.go index f277d818f2..366b6bcab3 100644 --- a/services/ai_task_service/container_builder/log_path_builder.go +++ b/services/ai_task_service/container_builder/log_path_builder.go @@ -18,14 +18,15 @@ func init() { RegisterContainerBuilder(o) } +func (b *LogPathBuilder) ShouldBuild(ctx *context.CreationContext) bool { + return !b.Opts.Disable +} + func (b *LogPathBuilder) SetOpts(opts *entity.ContainerBuildOpts) { b.Opts = opts } func (b *LogPathBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { - if b.Opts.Disable { - return nil, nil - } storageTypes := b.Opts.AcceptStorageType if storageTypes == nil || len(storageTypes) == 0 { return nil, response.SYSTEM_ERROR diff --git a/services/ai_task_service/container_builder/output_path_builder.go b/services/ai_task_service/container_builder/output_path_builder.go index 98f4d86697..c0e0402118 100644 --- a/services/ai_task_service/container_builder/output_path_builder.go +++ b/services/ai_task_service/container_builder/output_path_builder.go @@ -19,14 +19,15 @@ func init() { RegisterContainerBuilder(o) } +func (b *OutputPathBuilder) ShouldBuild(ctx *context.CreationContext) bool { + return !b.Opts.Disable +} + func (b *OutputPathBuilder) SetOpts(opts *entity.ContainerBuildOpts) { b.Opts = opts } func (b *OutputPathBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { - if b.Opts.Disable { - return nil, nil - } storageTypes := b.Opts.AcceptStorageType if storageTypes == nil || len(storageTypes) == 0 { return nil, response.SYSTEM_ERROR diff --git a/services/ai_task_service/container_builder/pre_model_builder.go b/services/ai_task_service/container_builder/pre_model_builder.go index e87f75038a..8c8dfac3ff 100644 --- a/services/ai_task_service/container_builder/pre_model_builder.go +++ b/services/ai_task_service/container_builder/pre_model_builder.go @@ -2,19 +2,14 @@ package container_builder import ( "code.gitea.io/gitea/routers/response" - "fmt" "path" "strings" "code.gitea.io/gitea/entity" "code.gitea.io/gitea/models" - "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/services/ai_task_service/context" "code.gitea.io/gitea/services/ai_task_service/storage_helper" - "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" ) type PretrainModelBuilder struct { @@ -26,139 +21,74 @@ func init() { RegisterContainerBuilder(o) } +func (b *PretrainModelBuilder) ShouldBuild(ctx *context.CreationContext) bool { + return !b.Opts.Disable +} + func (b *PretrainModelBuilder) SetOpts(opts *entity.ContainerBuildOpts) { b.Opts = opts } func (b *PretrainModelBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { - if b.Opts.Disable { + modelId := ctx.Request.PretrainModelId + if modelId == "" { return nil, nil } - form := ctx.Request - storageTypes := b.Opts.AcceptStorageType - if storageTypes == nil || len(storageTypes) == 0 { - return nil, response.SYSTEM_ERROR - } - //未选择预训练模型,跳过此步 - if form.PretrainModelName == "" { - return nil, nil - } - if form.PretrainModelId == "" { - //异常数据,理论上应该都有modelId - return nil, response.RESULT_CLEARD - } - //查出模型数据 - m, err := models.QueryModelById(form.PretrainModelId) - if err != nil { - log.Error("Can not find model", err) - return nil, response.MODEL_NOT_EXISTS - } - preTrainModelUrl := m.Path - if err != nil { - log.Error("Can not find model", err) + + var pretrainModel *models.AiModelManage + var err error + if pretrainModel, err = models.QueryModelById(modelId); err != nil || pretrainModel == nil { + log.Error("Can not find pretrainModel", err) return nil, response.MODEL_NOT_EXISTS } - //模型文件存储方式 - oldStorageType := entity.GetStorageTypeFromCloudbrainType(m.Type) - if oldStorageType == "" { - log.Error("model storage type error.modelId=%d", m.ID) + preTrainModelUrl := pretrainModel.Path + objectKeyPrefix := getPreTrainModelObjectPrefix(preTrainModelUrl) + //模型当前的存储类型 + currentStorageType := entity.GetStorageTypeFromCloudbrainType(pretrainModel.Type) + if currentStorageType == "" { + log.Error("pretrainModel storage type error.modelId=%d", pretrainModel.ID) return nil, response.SYSTEM_ERROR } - - var preTrainModelPath string - var preTrainModelEntity []entity.ContainerData - storageType := oldStorageType - ckptNames := strings.Split(form.PretrainModelCkptName, ";") - for _, ckptName := range ckptNames { - isExists, size := cloudbrainTask.CheckAndGetFileSize(m, ckptName) - if !isExists { - log.Error("model file not exist.name = %s", ckptName) - return nil, response.MODEL_NOT_EXISTS - } - preTrainModelPath = getPreTrainModelPath(preTrainModelUrl, ckptName) - if !b.Opts.IsStorageTypeIn(oldStorageType) { - //意味着模型之前存储的位置不符合要求,需要转存到指定存储 - newStorageType := b.Opts.AcceptStorageType[0] - //todo 可优化 - if newStorageType == entity.MINIO && oldStorageType == entity.OBS { - //复用以前代码 - minioPreModelURL, err := dealModelInfo(form.PretrainModelId, form.JobName, ckptName) - if err != nil { - log.Error("Can not find model,modelId=%d err=%v", form.PretrainModelId, err) - return nil, response.MODEL_NOT_EXISTS - } - preTrainModelUrl = minioPreModelURL - preTrainModelPath = getPreTrainModelPath(minioPreModelURL, ckptName) - storageType = entity.MINIO - } - } - uploader := storage_helper.SelectUploaderFromStorageType(storageType) - modelData := entity.ContainerData{ - Name: ckptName, - Bucket: uploader.GetBucket(), - EndPoint: uploader.GetEndpoint(), - ObjectKey: preTrainModelPath, - ReadOnly: b.Opts.ReadOnly, - ContainerPath: path.Join(b.Opts.ContainerPath, ckptName), - RealPath: uploader.GetRealPath(preTrainModelPath), - S3DownloadUrl: uploader.GetS3DownloadUrl(preTrainModelPath), - IsDir: false, - Size: size, + currentStorageHelper := storage_helper.SelectUploaderFromStorageType(currentStorageType) + if !b.Opts.IsStorageTypeAccept(currentStorageType) { + //意味着模型当前存储的位置不符合要求,需要转存到指定存储 + newStorageType := b.Opts.GetPriorityStorageType() + err := storage_helper.CopyDirToStorage(currentStorageType, objectKeyPrefix, newStorageType) + if err != nil { + log.Error("copy pretrainModel dir error.currentStorageType=%s newStorageType=%s objectKeyPrefix=%s err=%v", currentStorageType, newStorageType, pretrainModel.Path, err) + return nil, response.SYSTEM_ERROR } - preTrainModelEntity = append(preTrainModelEntity, modelData) + currentStorageHelper = storage_helper.SelectUploaderFromStorageType(newStorageType) + preTrainModelUrl = path.Join(currentStorageHelper.GetBucket(), pretrainModel.Path) } - form.PreTrainModelUrl = preTrainModelUrl - return preTrainModelEntity, nil + + ctx.Request.PreTrainModelUrl = preTrainModelUrl + return []entity.ContainerData{{ + Name: pretrainModel.Name, + Bucket: currentStorageHelper.GetBucket(), + EndPoint: currentStorageHelper.GetEndpoint(), + ObjectKey: objectKeyPrefix, + ReadOnly: b.Opts.ReadOnly, + ContainerPath: b.Opts.ContainerPath, + RealPath: currentStorageHelper.GetRealPath(objectKeyPrefix), + S3DownloadUrl: currentStorageHelper.GetS3DownloadUrl(objectKeyPrefix), + IsDir: true, + Size: pretrainModel.Size, + }}, nil } func (b *PretrainModelBuilder) GetContainerType() entity.ContainerDataType { return entity.ContainerPreTrainModel } -func getPreTrainModelPath(pretrainModelDir string, fileName string) string { +//getPreTrainModelObjectPrefix 模型存储的路径包含了bucket,过滤掉bucket以后的路径就是模型存储的Object Prefix +func getPreTrainModelObjectPrefix(pretrainModelDir string) string { index := strings.Index(pretrainModelDir, "/") if index > 0 { filterBucket := pretrainModelDir[index+1:] - return filterBucket + fileName + return filterBucket } else { return "" } } - -func dealModelInfo(modelId string, jobName string, ckptName string) (string, error) { - preModel, err := models.QueryModelById(modelId) - if err != nil || preModel == nil || preModel.ID == "" { - log.Error("Can not find model", err) - return "", fmt.Errorf("Can not find model: %v", ckptName) - } - minioPreModelURL, err := downloadModelFromObs(preModel, jobName, cloudbrain.PretrainModelMountPath, ckptName) - if err != nil { - log.Error("Can not find model", err) - - return "", err - } - return minioPreModelURL, nil -} - -func downloadModelFromObs(preModel *models.AiModelManage, jobName, suffixPath string, ckptFileName string) (string, error) { - destPath := setting.CBCodePathPrefix + jobName + suffixPath + "/" - destFile := destPath + ckptFileName - returnStr := setting.Attachment.Minio.Bucket + "/" + destPath - srcUrl := preModel.Path[len(setting.Bucket)+1:] + ckptFileName - log.Info("dest model Path=" + returnStr + " src path=" + preModel.Path + ckptFileName) - body, err := storage.ObsDownloadAFile(setting.Bucket, srcUrl) - if err == nil { - defer body.Close() - _, err = storage.Attachments.UploadContent(setting.Attachment.Minio.Bucket, destFile, body) - if err != nil { - log.Error("UploadObject(%s) failed: %s", preModel.Path+ckptFileName, err.Error()) - return "", err - } - } else { - log.Info("download model failed. as " + err.Error()) - return "", err - } - log.Info("download model from obs succeed") - return returnStr, nil -} diff --git a/services/ai_task_service/storage_helper/client.go b/services/ai_task_service/storage_helper/client.go index 1b442ca30c..65f09bbc74 100644 --- a/services/ai_task_service/storage_helper/client.go +++ b/services/ai_task_service/storage_helper/client.go @@ -14,6 +14,7 @@ type UploaderConfig struct { type StorageHelper interface { UploadDir(codePath, jobName string) error + UploadContent(reader io.ReadCloser, objectKey string) error GetRealPath(objectKey string) string GetBucket() string GetEndpoint() string diff --git a/services/ai_task_service/storage_helper/copy.go b/services/ai_task_service/storage_helper/copy.go new file mode 100644 index 0000000000..cdaae88521 --- /dev/null +++ b/services/ai_task_service/storage_helper/copy.go @@ -0,0 +1,39 @@ +package storage_helper + +import ( + "code.gitea.io/gitea/entity" + "code.gitea.io/gitea/modules/log" +) + +func CopyDirToStorage(srcStorage entity.StorageType, srcDirKey string, dstStorage entity.StorageType) error { + srcStorageHelper := SelectUploaderFromStorageType(srcStorage) + objects, err := srcStorageHelper.GetAllObjectsUnderDir(srcDirKey) + if err != nil { + log.Error("copyDirToStorage err.srcStorage=%s,srcDirKey=%s dstStorage=%s. err=%v", srcStorage, srcDirKey, dstStorage, err) + return err + } + for _, object := range objects { + err = CopyObjectToStorage(srcStorage, object.RelativePath, dstStorage) + if err != nil { + log.Error("Copy object error when copy dir. objectKey=%s srcStorage=%s,srcDirKey=%s dstStorage=%s. err=%v", object.RelativePath, srcStorage, srcDirKey, dstStorage, err) + return err + } + } + return nil +} + +func CopyObjectToStorage(srcStorage entity.StorageType, srcObjectKey string, dstStorage entity.StorageType) error { + srcStorageHelper := SelectUploaderFromStorageType(srcStorage) + dstStorageHelper := SelectUploaderFromStorageType(dstStorage) + reader, err := srcStorageHelper.OpenFile(srcObjectKey) + if err != nil { + log.Error("copyFileToStorage err.srcStorage=%s,srcObjectKey=%s dstStorage=%s. err=%v", srcStorage, srcObjectKey, dstStorage, err) + return err + } + err = dstStorageHelper.UploadContent(reader, srcObjectKey) + if err != nil { + log.Error("copyFileToStorage err.srcStorage=%s,srcObjectKey=%s dstStorage=%s. err=%v", srcStorage, srcObjectKey, dstStorage, err) + return err + } + return nil +} diff --git a/services/ai_task_service/storage_helper/minio.go b/services/ai_task_service/storage_helper/minio.go index b7fc364030..65bf396de2 100644 --- a/services/ai_task_service/storage_helper/minio.go +++ b/services/ai_task_service/storage_helper/minio.go @@ -21,6 +21,16 @@ type MinioHelper struct { func (m *MinioHelper) UploadDir(codePath, objectKeyPrefix string) error { return UploadDirToMinio(codePath, objectKeyPrefix, "") } + +func (m *MinioHelper) UploadContent(reader io.ReadCloser, objectKey string) error { + _, err := storage.Attachments.UploadContent(m.GetBucket(), objectKey, reader) + if err != nil { + log.Error("UploadContent failed:objectKey=%s err=%v", objectKey, err) + return err + } + return nil +} + func (m *MinioHelper) GetJobDefaultObjectKeyPrefix(jobName string) string { return path.Join(setting.CBCodePathPrefix, jobName) } diff --git a/services/ai_task_service/storage_helper/obs.go b/services/ai_task_service/storage_helper/obs.go index 1620166d75..5ff1f4a0c0 100644 --- a/services/ai_task_service/storage_helper/obs.go +++ b/services/ai_task_service/storage_helper/obs.go @@ -19,6 +19,20 @@ func (m *OBSHelper) UploadDir(codePath, objectKeyPrefix string) error { return UploadDirToObs(codePath, objectKeyPrefix, "") } +func (m *OBSHelper) UploadContent(reader io.ReadCloser, objectKey string) error { + log.Info("UploadContent objectKey=%s", objectKey) + input := &obs.PutObjectInput{} + input.Bucket = m.GetBucket() + input.Key = objectKey + input.Body = reader + _, err := storage.ObsCli.PutObject(input) + if err != nil { + log.Error("UploadContent err.objectKey=%s err=%v", objectKey, err) + return err + } + return nil +} + func (m *OBSHelper) GetJobDefaultObjectKeyPrefix(jobName string) string { return path.Join(setting.CodePathPrefix, jobName) } diff --git a/services/ai_task_service/task/opt_handler.go b/services/ai_task_service/task/opt_handler.go index 8aaf91a414..9800e84443 100644 --- a/services/ai_task_service/task/opt_handler.go +++ b/services/ai_task_service/task/opt_handler.go @@ -170,14 +170,24 @@ func (DefaultCreationHandler) CheckBranchExists(ctx *context.CreationContext) *r func (DefaultCreationHandler) CheckModel(ctx *context.CreationContext) *response.BizError { log.Info("Start to CheckModel.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) - if hasModelNumOverLimit(ctx.Request.PretrainModelCkptName) { //检查模型数量是否超出限制 - log.Info("CheckModel hasModelNumOverLimit displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) - return response.MODEL_NUM_OVER_LIMIT + pretrainModelId := ctx.Request.PretrainModelId + if pretrainModelId == "" { + return nil } - if hasModelFileDeleted(ctx.Request.PretrainModelId, ctx.Request.PretrainModelCkptName) { //检查模型文件是否存在 - log.Info("CheckModel hasModelFileDeleted.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) + m, err := models.QueryModelById(ctx.Request.PretrainModelId) + if err != nil || m == nil { + log.Error("Can not find model", err) return response.MODEL_NOT_EXISTS } + + //if hasModelNumOverLimit(ctx.Request.PretrainModelCkptName) { //检查模型数量是否超出限制 + // log.Info("CheckModel hasModelNumOverLimit displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) + // return response.MODEL_NUM_OVER_LIMIT + //} + //if hasModelFileDeleted(ctx.Request.PretrainModelId, ctx.Request.PretrainModelCkptName) { //检查模型文件是否存在 + // log.Info("CheckModel hasModelFileDeleted.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) + // return response.MODEL_NOT_EXISTS + //} log.Info("CheckModel success.displayJobName=%s jobType=%s cluster=%s", ctx.Request.DisplayJobName, ctx.Request.JobType, ctx.Request.Cluster) return nil } -- 2.34.1 From 75076cd9c487cc20438b975802670d4bd2821b38 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 3 Aug 2023 15:22:19 +0800 Subject: [PATCH 02/24] #4553 fix bug --- services/ai_task_service/cluster/c2net.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/ai_task_service/cluster/c2net.go b/services/ai_task_service/cluster/c2net.go index 6e49ff1720..a48ef30ddf 100644 --- a/services/ai_task_service/cluster/c2net.go +++ b/services/ai_task_service/cluster/c2net.go @@ -215,11 +215,15 @@ func convertContainerArray2Grampus(containerDatas []entity.ContainerData) models } func convertContainer2Grampus(d entity.ContainerData) models.GrampusDataset { + objectKey := d.ObjectKey + if d.IsDir { + objectKey = strings.TrimSuffix(objectKey, "/") + "/" + } return models.GrampusDataset{ Name: d.Name, Bucket: d.Bucket, EndPoint: d.EndPoint, - ObjectKey: d.ObjectKey, + ObjectKey: objectKey, ContainerPath: d.ContainerPath, ReadOnly: d.ReadOnly, GetBackEndpoint: d.GetBackEndpoint, -- 2.34.1 From 6d5278e00bb2d32c90f86235ec49b5d8080395f9 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 3 Aug 2023 17:16:29 +0800 Subject: [PATCH 03/24] #4556 fix bug --- entity/ai_task.go | 7 ++++ entity/creation.go | 24 ++++++------- models/resource_queue.go | 1 + models/resource_specification.go | 33 ++++++++++++++---- modules/grampus/grampus.go | 21 ++++++++---- .../task/cloudbrain_one_notebook_task.go | 8 +++-- .../task/cloudbrain_one_train_task.go | 34 ++++++++++++------- .../task/cloudbrain_two_notebook_task.go | 7 ++-- .../task/cloudbrain_two_train_task.go | 20 ++++++----- .../task/grampus_notebook_task.go | 10 ++++-- .../task/grampus_online_infer_task.go | 10 ++++-- .../task/grampus_train_task.go | 34 ++++++++++++------- services/ai_task_service/task/task_base.go | 9 ++--- .../task/task_creation_info.go | 28 +++++++++++++-- 14 files changed, 169 insertions(+), 77 deletions(-) diff --git a/entity/ai_task.go b/entity/ai_task.go index 0573ee1c30..4df069ac04 100644 --- a/entity/ai_task.go +++ b/entity/ai_task.go @@ -45,6 +45,7 @@ type CreateReq struct { IsContinueRequest bool `json:"is_continue"` SourceCloudbrainId int64 `json:"source_cloudbrain_id"` AppName string `json:"app_name"` + HasInternet int `json:"has_internet"` //0 all;1 no internet;2 has internet ParamArray models.Parameters ComputeSource *models.ComputeSource ReqCommitID string @@ -451,6 +452,12 @@ type GetResourceUsageOpts struct { NodeId int LogFileName string } +type GetSpecOpts struct { + UserId int64 + ComputeSource models.ComputeSource + JobType models.JobType + HasInternet int //0 all;1 no internet;2 has internet +} type AITaskNodeInfo struct { ID int `json:"id"` diff --git a/entity/creation.go b/entity/creation.go index a99fe673c1..1b6a822065 100644 --- a/entity/creation.go +++ b/entity/creation.go @@ -7,18 +7,18 @@ import ( type CreationRequiredInfo struct { //排队信息、代码分支信息、查询是否有正在运行的任务、查询镜像列表、查询资源规格(积分余额,开关) - Specs []*structs.SpecificationShow `json:"specs"` - Images []ClusterImage `json:"images"` - CanUseAllImages bool `json:"can_use_all_images"` - Branches []string `json:"branches"` - DefaultBranch string `json:"default_branch"` - WaitCount int64 `json:"wait_count"` - NotStopTaskCount int `json:"not_stop_task_count"` - DisplayJobName string `json:"display_job_name"` - PointAccount *PointAccountInfo `json:"point_account"` - PaySwitch bool `json:"pay_switch"` - Config AITaskCreationConfig `json:"config"` - AllowedWorkerNum []int `json:"allowed_worker_num"` + Specs map[string][]*structs.SpecificationShow `json:"specs"` + Images []ClusterImage `json:"images"` + CanUseAllImages bool `json:"can_use_all_images"` + Branches []string `json:"branches"` + DefaultBranch string `json:"default_branch"` + WaitCount int64 `json:"wait_count"` + NotStopTaskCount int `json:"not_stop_task_count"` + DisplayJobName string `json:"display_job_name"` + PointAccount *PointAccountInfo `json:"point_account"` + PaySwitch bool `json:"pay_switch"` + Config AITaskCreationConfig `json:"config"` + AllowedWorkerNum []int `json:"allowed_worker_num"` } type AITaskCreationConfig struct { diff --git a/models/resource_queue.go b/models/resource_queue.go index f54be572e9..09b658fe91 100644 --- a/models/resource_queue.go +++ b/models/resource_queue.go @@ -17,6 +17,7 @@ type ResourceQueue struct { ComputeResource string AccCardType string CardsTotalNum int + HasInternet int //0 unknown;1 no internet;2 has internet IsAutomaticSync bool Remark string DeletedTime timeutil.TimeStamp `xorm:"deleted"` diff --git a/models/resource_specification.go b/models/resource_specification.go index f165f152d3..972bb6019d 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -1,6 +1,7 @@ package models import ( + "code.gitea.io/gitea/entity" "code.gitea.io/gitea/modules/timeutil" "fmt" "strings" @@ -20,6 +21,14 @@ const ( SearchSpecOrder4Standard ) +type AICenterInternetStatus int + +const ( + Unknown AICenterInternetStatus = -1 + NoInternet AICenterInternetStatus = 1 + HasInternet AICenterInternetStatus = 2 +) + type ResourceSpecification struct { ID int64 `xorm:"pk autoincr"` QueueId int64 `xorm:"INDEX"` @@ -31,6 +40,7 @@ type ResourceSpecification struct { ShareMemGiB float32 UnitPrice int Status int + HasInternet AICenterInternetStatus IsAvailable bool IsAutomaticSync bool CreatedTime timeutil.TimeStamp `xorm:"created"` @@ -201,8 +211,9 @@ type FindSpecsOptions struct { ShareMemGiB float32 UseShareMemGiB bool //if true,find specs no matter used or not used in scene. if false,only find specs used in scene - RequestAll bool - SpecStatus int + RequestAll bool + SpecStatus int + HasInternet int //0 all,1 no internet,2 has internet } type Specification struct { @@ -218,6 +229,7 @@ type Specification struct { UnitPrice int QueueId int64 QueueCode string + HasInternet int Cluster string AiCenterCode string AiCenterName string @@ -231,7 +243,7 @@ func (Specification) TableName() string { return "resource_specification" } -func (s *Specification) loadRelatedSpecs(jobType JobType) { +func (s *Specification) loadRelatedSpecs(jobType JobType, hasInternet int) { if s.RelatedSpecs != nil { return } @@ -247,6 +259,7 @@ func (s *Specification) loadRelatedSpecs(jobType JobType) { RequestAll: false, SpecStatus: SpecOnShelf, JobType: jobType, + HasInternet: hasInternet, }) if err != nil { s.RelatedSpecs = defaultSpecs @@ -254,15 +267,15 @@ func (s *Specification) loadRelatedSpecs(jobType JobType) { } s.RelatedSpecs = r } -func (s *Specification) GetAvailableCenterIds(userId int64, jobType JobType) []string { - s.loadRelatedSpecs(jobType) +func (s *Specification) GetAvailableCenterIds(opts entity.GetSpecOpts) []string { + s.loadRelatedSpecs(opts.JobType, opts.HasInternet) if len(s.RelatedSpecs) == 0 { return make([]string, 0) } //filter exclusive specs - specs := FilterExclusiveSpecs(s.RelatedSpecs, userId) + specs := FilterExclusiveSpecs(s.RelatedSpecs, opts.UserId) centerIds := make([]string, len(specs)) for i, v := range specs { @@ -530,6 +543,14 @@ func FindSpecs(opts FindSpecsOptions) ([]*Specification, error) { if opts.SpecStatus > 0 { cond = cond.And(builder.Eq{"resource_specification.status": opts.SpecStatus}) } + if opts.HasInternet > 0 { + if opts.HasInternet == 1 { + cond = cond.And(builder.Eq{"resource_queue.has_internet": NoInternet}) + } else if opts.HasInternet == 2 { + cond = cond.And(builder.Eq{"resource_queue.has_internet": HasInternet}) + } + + } r := make([]*Specification, 0) s := x.Where(cond). Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id") diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 0cf43d5ec0..6593f4a4a6 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -1,6 +1,7 @@ package grampus import ( + "code.gitea.io/gitea/entity" "encoding/json" "fmt" "strconv" @@ -279,7 +280,10 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, Command: req.Command, - CenterID: req.Spec.GetAvailableCenterIds(ctx.User.ID, models.JobTypeDebug), + CenterID: req.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: models.JobTypeDebug, + }), }, }, }) @@ -438,12 +442,15 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str Name: req.JobName, Tasks: []models.GrampusTasks{ { - Name: req.JobName, - Command: req.Command, - ResourceSpecId: req.Spec.SourceSpecId, - ImageId: req.ImageId, - ImageUrl: req.ImageUrl, - CenterID: req.Spec.GetAvailableCenterIds(ctx.User.ID, models.JobTypeTrain), + Name: req.JobName, + Command: req.Command, + ResourceSpecId: req.Spec.SourceSpecId, + ImageId: req.ImageId, + ImageUrl: req.ImageUrl, + CenterID: req.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: models.JobTypeTrain, + }), ReplicaNum: 1, Datasets: datasetGrampus, Models: modelGrampus, diff --git a/services/ai_task_service/task/cloudbrain_one_notebook_task.go b/services/ai_task_service/task/cloudbrain_one_notebook_task.go index ce596b0dcc..b9d07297ab 100644 --- a/services/ai_task_service/task/cloudbrain_one_notebook_task.go +++ b/services/ai_task_service/task/cloudbrain_one_notebook_task.go @@ -146,8 +146,12 @@ func (g CloudbrainOneNotebookTaskTemplate) CallCreationAPI(ctx *context.Creation OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, - CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), - Spec: ctx.Spec, + CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: g.JobType, + HasInternet: form.HasInternet, + }), + Spec: ctx.Spec, }, }, } diff --git a/services/ai_task_service/task/cloudbrain_one_train_task.go b/services/ai_task_service/task/cloudbrain_one_train_task.go index ad20388118..533c45c9f4 100644 --- a/services/ai_task_service/task/cloudbrain_one_train_task.go +++ b/services/ai_task_service/task/cloudbrain_one_train_task.go @@ -108,13 +108,17 @@ func (g CloudbrainOneTrainTaskTemplate) CallCreationAPI(ctx *context.CreationCon ImageUrl: strings.TrimSpace(form.ImageUrl), Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), Code: ctx.GetContainerDataArray(entity.ContainerCode), - CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), - PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), - BootFile: form.BootFile, - OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), - LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath), - Params: form.ParamArray, - Spec: ctx.Spec, + CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: g.JobType, + HasInternet: form.HasInternet, + }), + PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), + BootFile: form.BootFile, + OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), + LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath), + Params: form.ParamArray, + Spec: ctx.Spec, }, }, } @@ -149,12 +153,16 @@ func (g CloudbrainOneTrainTaskTemplate) CallRestartAPI(ctx *context.CreationCont ImageUrl: strings.TrimSpace(form.ImageUrl), Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), Code: ctx.GetContainerDataArray(entity.ContainerCode), - CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), - PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), - BootFile: form.BootFile, - OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), - Params: form.ParamArray, - Spec: ctx.Spec, + CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: g.JobType, + HasInternet: form.HasInternet, + }), + PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), + BootFile: form.BootFile, + OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), + Params: form.ParamArray, + Spec: ctx.Spec, }, }, } diff --git a/services/ai_task_service/task/cloudbrain_two_notebook_task.go b/services/ai_task_service/task/cloudbrain_two_notebook_task.go index 9dcfd9dd71..a6eac5b04e 100644 --- a/services/ai_task_service/task/cloudbrain_two_notebook_task.go +++ b/services/ai_task_service/task/cloudbrain_two_notebook_task.go @@ -168,18 +168,19 @@ func (g CloudbrainTwoNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationC return nil } -func (g CloudbrainTwoNotebookTaskTemplate) GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) { +func (g CloudbrainTwoNotebookTaskTemplate) GetSpecs(opts entity.GetSpecOpts) ([]*api.SpecificationShow, *response.BizError) { var aiCenterCode = models.AICenterOfCloudBrainTwo if setting.ModelartsCD.Enabled { aiCenterCode = models.AICenterOfChengdu } var specs []*models.Specification var err error - specs, err = resource.FindAvailableSpecs(userId, models.FindSpecsOptions{ + specs, err = resource.FindAvailableSpecs(opts.UserId, models.FindSpecsOptions{ JobType: g.JobType, - ComputeResource: computeSource.Name, + ComputeResource: opts.ComputeSource.Name, Cluster: g.ClusterType.GetParentCluster(), AiCenterCode: aiCenterCode, + HasInternet: opts.HasInternet, }) if err != nil { diff --git a/services/ai_task_service/task/cloudbrain_two_train_task.go b/services/ai_task_service/task/cloudbrain_two_train_task.go index bccbb939c8..f928f70528 100644 --- a/services/ai_task_service/task/cloudbrain_two_train_task.go +++ b/services/ai_task_service/task/cloudbrain_two_train_task.go @@ -114,14 +114,18 @@ func (g CloudbrainTwoTrainTaskTemplate) CallCreationAPI(ctx *context.CreationCon Description: form.Description, Tasks: []entity.TrainTask{ { - Name: form.JobName, - ResourceSpecId: ctx.Spec.SourceSpecId, - ImageId: form.ImageID, - ImageUrl: strings.TrimSpace(form.ImageUrl), - Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), - Code: ctx.GetContainerDataArray(entity.ContainerCode), - LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath), - CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), + Name: form.JobName, + ResourceSpecId: ctx.Spec.SourceSpecId, + ImageId: form.ImageID, + ImageUrl: strings.TrimSpace(form.ImageUrl), + Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), + Code: ctx.GetContainerDataArray(entity.ContainerCode), + LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath), + CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: g.JobType, + HasInternet: form.HasInternet, + }), PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), BootFile: form.BootFile, OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), diff --git a/services/ai_task_service/task/grampus_notebook_task.go b/services/ai_task_service/task/grampus_notebook_task.go index b5fcc3abbf..c99dd9811b 100644 --- a/services/ai_task_service/task/grampus_notebook_task.go +++ b/services/ai_task_service/task/grampus_notebook_task.go @@ -39,7 +39,7 @@ func GetGrampusNoteBookConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseCon ContainerPath: codePath, ReadOnly: false, AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, - VolumeFolder: true, + VolumeFolder: true, }, entity.ContainerDataset: { ContainerPath: datasetPath, @@ -190,8 +190,12 @@ func (g GrampusNoteBookTaskTemplate) CallCreationAPI(ctx *context.CreationContex Code: ctx.GetContainerDataArray(entity.ContainerCode), AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, - CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), - Spec: ctx.Spec, + CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: g.JobType, + HasInternet: form.HasInternet, + }), + Spec: ctx.Spec, }, }, } diff --git a/services/ai_task_service/task/grampus_online_infer_task.go b/services/ai_task_service/task/grampus_online_infer_task.go index fb7d0f1c67..e2241fa83b 100644 --- a/services/ai_task_service/task/grampus_online_infer_task.go +++ b/services/ai_task_service/task/grampus_online_infer_task.go @@ -119,9 +119,13 @@ func (g GrampusOnlineInferTaskTemplate) CallCreationAPI(ctx *context.CreationCon OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), AutoStopDuration: -1, Capacity: setting.Capacity, - CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, models.JobTypeOnlineInference), - Spec: ctx.Spec, - BootFile: ctx.Request.BootFile, + CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: g.JobType, + HasInternet: form.HasInternet, + }), + Spec: ctx.Spec, + BootFile: ctx.Request.BootFile, }, }, } diff --git a/services/ai_task_service/task/grampus_train_task.go b/services/ai_task_service/task/grampus_train_task.go index 6914817cae..15d1f93b7b 100644 --- a/services/ai_task_service/task/grampus_train_task.go +++ b/services/ai_task_service/task/grampus_train_task.go @@ -140,13 +140,17 @@ func (g GrampusTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext) DisplayJobName: form.DisplayJobName, Tasks: []entity.TrainTask{ { - Name: form.JobName, - ResourceSpecId: ctx.Spec.SourceSpecId, - ImageId: form.ImageID, - ImageUrl: imageUrl, - Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), - Code: ctx.GetContainerDataArray(entity.ContainerCode), - CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), + Name: form.JobName, + ResourceSpecId: ctx.Spec.SourceSpecId, + ImageId: form.ImageID, + ImageUrl: imageUrl, + Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), + Code: ctx.GetContainerDataArray(entity.ContainerCode), + CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: g.JobType, + HasInternet: form.HasInternet, + }), PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), BootFile: form.BootFile, OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), @@ -189,12 +193,16 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) * ImageUrl: strings.TrimSpace(form.ImageUrl), Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), Code: ctx.GetContainerDataArray(entity.ContainerCode), - CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), - PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), - BootFile: form.BootFile, - OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), - Params: form.ParamArray, - Spec: ctx.Spec, + CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + UserId: ctx.User.ID, + JobType: g.JobType, + HasInternet: form.HasInternet, + }), + PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), + BootFile: form.BootFile, + OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), + Params: form.ParamArray, + Spec: ctx.Spec, }, }, } diff --git a/services/ai_task_service/task/task_base.go b/services/ai_task_service/task/task_base.go index 9ecbd8b8f4..604f58d2a0 100644 --- a/services/ai_task_service/task/task_base.go +++ b/services/ai_task_service/task/task_base.go @@ -61,7 +61,7 @@ type AITaskTemplate interface { GetOperationProfile(cloudbrainId int64) (*entity.OperationProfile, *response.BizError) GetResourceUsage(opts entity.GetResourceUsageOpts) (*entity.ResourceUsage, *response.BizError) GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError) - GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) + GetSpecs(opts entity.GetSpecOpts) ([]*api.SpecificationShow, *response.BizError) GetConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig GetNodeInfo(cloudbrainId int64) ([]entity.AITaskNodeInfo, *response.BizError) GetAllowedWorkerNum(userId int64, computeSource *models.ComputeSource) ([]int, *response.BizError) @@ -484,11 +484,12 @@ func (g DefaultAITaskTemplate) GetImages(computeSource models.ComputeSource) ([] return images, customFlag, nil } -func (g DefaultAITaskTemplate) GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) { - specs, err := resource.FindAvailableSpecs(userId, models.FindSpecsOptions{ +func (g DefaultAITaskTemplate) GetSpecs(opts entity.GetSpecOpts) ([]*api.SpecificationShow, *response.BizError) { + specs, err := resource.FindAvailableSpecs(opts.UserId, models.FindSpecsOptions{ JobType: g.JobType, - ComputeResource: computeSource.Name, + ComputeResource: opts.ComputeSource.Name, Cluster: g.ClusterType.GetParentCluster(), + HasInternet: opts.HasInternet, }) if err != nil { log.Error("GetSpecs err.%v", err) diff --git a/services/ai_task_service/task/task_creation_info.go b/services/ai_task_service/task/task_creation_info.go index 781ba45588..53560b4d85 100644 --- a/services/ai_task_service/task/task_creation_info.go +++ b/services/ai_task_service/task/task_creation_info.go @@ -6,6 +6,7 @@ import ( "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/routers/response" "code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" "code.gitea.io/gitea/services/reward/point/account" @@ -59,10 +60,31 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio result.Images = images result.CanUseAllImages = canUseAll } - //查询资源规格 - if specs, err := t.GetSpecs(req.User.ID, *req.ComputeSource); err == nil { - result.Specs = specs + specsMap := make(map[string][]*structs.SpecificationShow, 0) + //查询有网资源规格 + if specs, err := t.GetSpecs(entity.GetSpecOpts{ + UserId: req.User.ID, + ComputeSource: *req.ComputeSource, + HasInternet: 2, //0 all;1 no internet;2 has internet + }); err == nil { + specsMap["has_internet"] = specs } + //查询无网资源规格 + if specs, err := t.GetSpecs(entity.GetSpecOpts{ + UserId: req.User.ID, + ComputeSource: *req.ComputeSource, + HasInternet: 1, //0 all;1 no internet;2 has internet + }); err == nil { + specsMap["no_internet"] = specs + } + //查询所有资源规格 + if specs, err := t.GetSpecs(entity.GetSpecOpts{ + UserId: req.User.ID, + ComputeSource: *req.ComputeSource, + }); err == nil { + specsMap["all"] = specs + } + result.Specs = specsMap result.Config = entity.AITaskCreationConfig{ DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, } -- 2.34.1 From 9e720901fea2b3b0b90a7d05f4d84e4521bb46f2 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Thu, 3 Aug 2023 17:32:51 +0800 Subject: [PATCH 04/24] #4556 fix bug --- entity/ai_task.go | 54 +++++++++---------- models/resource_specification.go | 22 ++++++-- modules/grampus/grampus.go | 5 +- .../task/cloudbrain_one_notebook_task.go | 2 +- .../task/cloudbrain_one_train_task.go | 4 +- .../task/cloudbrain_two_train_task.go | 2 +- .../task/grampus_notebook_task.go | 2 +- .../task/grampus_online_infer_task.go | 2 +- .../task/grampus_train_task.go | 4 +- 9 files changed, 55 insertions(+), 42 deletions(-) diff --git a/entity/ai_task.go b/entity/ai_task.go index 4df069ac04..ea768ebe3d 100644 --- a/entity/ai_task.go +++ b/entity/ai_task.go @@ -20,32 +20,32 @@ import ( // todo 暂时保留之前各种云脑属性的定义 type CreateReq struct { - JobType models.JobType `json:"job_type" binding:"Required"` - DisplayJobName string `json:"display_job_name" binding:"Required"` - JobName string `json:"job_name"` - SpecId int64 `json:"spec_id" binding:"Required"` - ComputeSourceStr string `json:"compute_source" binding:"Required"` - Cluster ClusterType `json:"cluster" binding:"Required"` - WorkServerNumber int `json:"work_server_number"` - BranchName string `json:"branch_name"` - PreTrainModelUrl string `json:"pretrain_model_url"` - PretrainModelCkptName string `json:"pretrain_model_ckpt_name"` - ImageUrl string `json:"image_url"` - ImageID string `json:"image_id"` - ImageName string `json:"image_name"` - PretrainModelName string `json:"pretrain_model_name"` - PretrainModelVersion string `json:"pretrain_model_version"` - PretrainModelId string `json:"pretrain_model_id"` - Description string `json:"description"` - LabelName string `json:"label_names"` - DatasetUUIDStr string `json:"dataset_uuid_str"` - Params string `json:"params"` - BootFile string `json:"boot_file"` - PoolId string `json:"pool_id"` - IsContinueRequest bool `json:"is_continue"` - SourceCloudbrainId int64 `json:"source_cloudbrain_id"` - AppName string `json:"app_name"` - HasInternet int `json:"has_internet"` //0 all;1 no internet;2 has internet + JobType models.JobType `json:"job_type" binding:"Required"` + DisplayJobName string `json:"display_job_name" binding:"Required"` + JobName string `json:"job_name"` + SpecId int64 `json:"spec_id" binding:"Required"` + ComputeSourceStr string `json:"compute_source" binding:"Required"` + Cluster ClusterType `json:"cluster" binding:"Required"` + WorkServerNumber int `json:"work_server_number"` + BranchName string `json:"branch_name"` + PreTrainModelUrl string `json:"pretrain_model_url"` + PretrainModelCkptName string `json:"pretrain_model_ckpt_name"` + ImageUrl string `json:"image_url"` + ImageID string `json:"image_id"` + ImageName string `json:"image_name"` + PretrainModelName string `json:"pretrain_model_name"` + PretrainModelVersion string `json:"pretrain_model_version"` + PretrainModelId string `json:"pretrain_model_id"` + Description string `json:"description"` + LabelName string `json:"label_names"` + DatasetUUIDStr string `json:"dataset_uuid_str"` + Params string `json:"params"` + BootFile string `json:"boot_file"` + PoolId string `json:"pool_id"` + IsContinueRequest bool `json:"is_continue"` + SourceCloudbrainId int64 `json:"source_cloudbrain_id"` + AppName string `json:"app_name"` + HasInternet models.SpecInternetQuery `json:"has_internet"` //0 all;1 no internet;2 has internet ParamArray models.Parameters ComputeSource *models.ComputeSource ReqCommitID string @@ -456,7 +456,7 @@ type GetSpecOpts struct { UserId int64 ComputeSource models.ComputeSource JobType models.JobType - HasInternet int //0 all;1 no internet;2 has internet + HasInternet models.SpecInternetQuery //0 all;1 no internet;2 has internet } type AITaskNodeInfo struct { diff --git a/models/resource_specification.go b/models/resource_specification.go index 972bb6019d..1ecc5cd57d 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -1,7 +1,6 @@ package models import ( - "code.gitea.io/gitea/entity" "code.gitea.io/gitea/modules/timeutil" "fmt" "strings" @@ -29,6 +28,14 @@ const ( HasInternet AICenterInternetStatus = 2 ) +type SpecInternetQuery int + +const ( + QueryAllSpecs SpecInternetQuery = 0 + QueryNoInternetSpecs SpecInternetQuery = 1 + QueryHasInternetSpecs SpecInternetQuery = 2 +) + type ResourceSpecification struct { ID int64 `xorm:"pk autoincr"` QueueId int64 `xorm:"INDEX"` @@ -213,7 +220,7 @@ type FindSpecsOptions struct { //if true,find specs no matter used or not used in scene. if false,only find specs used in scene RequestAll bool SpecStatus int - HasInternet int //0 all,1 no internet,2 has internet + HasInternet SpecInternetQuery //0 all,1 no internet,2 has internet } type Specification struct { @@ -243,7 +250,7 @@ func (Specification) TableName() string { return "resource_specification" } -func (s *Specification) loadRelatedSpecs(jobType JobType, hasInternet int) { +func (s *Specification) loadRelatedSpecs(jobType JobType, hasInternet SpecInternetQuery) { if s.RelatedSpecs != nil { return } @@ -267,7 +274,14 @@ func (s *Specification) loadRelatedSpecs(jobType JobType, hasInternet int) { } s.RelatedSpecs = r } -func (s *Specification) GetAvailableCenterIds(opts entity.GetSpecOpts) []string { + +type GetAvailableCenterIdOpts struct { + UserId int64 + JobType JobType + HasInternet SpecInternetQuery +} + +func (s *Specification) GetAvailableCenterIds(opts GetAvailableCenterIdOpts) []string { s.loadRelatedSpecs(opts.JobType, opts.HasInternet) if len(s.RelatedSpecs) == 0 { diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index 6593f4a4a6..0deb7be862 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -1,7 +1,6 @@ package grampus import ( - "code.gitea.io/gitea/entity" "encoding/json" "fmt" "strconv" @@ -280,7 +279,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, Command: req.Command, - CenterID: req.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: req.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: models.JobTypeDebug, }), @@ -447,7 +446,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str ResourceSpecId: req.Spec.SourceSpecId, ImageId: req.ImageId, ImageUrl: req.ImageUrl, - CenterID: req.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: req.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: models.JobTypeTrain, }), diff --git a/services/ai_task_service/task/cloudbrain_one_notebook_task.go b/services/ai_task_service/task/cloudbrain_one_notebook_task.go index b9d07297ab..2f5f3658aa 100644 --- a/services/ai_task_service/task/cloudbrain_one_notebook_task.go +++ b/services/ai_task_service/task/cloudbrain_one_notebook_task.go @@ -146,7 +146,7 @@ func (g CloudbrainOneNotebookTaskTemplate) CallCreationAPI(ctx *context.Creation OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, - CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: g.JobType, HasInternet: form.HasInternet, diff --git a/services/ai_task_service/task/cloudbrain_one_train_task.go b/services/ai_task_service/task/cloudbrain_one_train_task.go index 533c45c9f4..4a8716a71e 100644 --- a/services/ai_task_service/task/cloudbrain_one_train_task.go +++ b/services/ai_task_service/task/cloudbrain_one_train_task.go @@ -108,7 +108,7 @@ func (g CloudbrainOneTrainTaskTemplate) CallCreationAPI(ctx *context.CreationCon ImageUrl: strings.TrimSpace(form.ImageUrl), Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), Code: ctx.GetContainerDataArray(entity.ContainerCode), - CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: g.JobType, HasInternet: form.HasInternet, @@ -153,7 +153,7 @@ func (g CloudbrainOneTrainTaskTemplate) CallRestartAPI(ctx *context.CreationCont ImageUrl: strings.TrimSpace(form.ImageUrl), Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), Code: ctx.GetContainerDataArray(entity.ContainerCode), - CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: g.JobType, HasInternet: form.HasInternet, diff --git a/services/ai_task_service/task/cloudbrain_two_train_task.go b/services/ai_task_service/task/cloudbrain_two_train_task.go index f928f70528..ba46c828cf 100644 --- a/services/ai_task_service/task/cloudbrain_two_train_task.go +++ b/services/ai_task_service/task/cloudbrain_two_train_task.go @@ -121,7 +121,7 @@ func (g CloudbrainTwoTrainTaskTemplate) CallCreationAPI(ctx *context.CreationCon Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), Code: ctx.GetContainerDataArray(entity.ContainerCode), LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath), - CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: g.JobType, HasInternet: form.HasInternet, diff --git a/services/ai_task_service/task/grampus_notebook_task.go b/services/ai_task_service/task/grampus_notebook_task.go index c99dd9811b..5ffeb5fca7 100644 --- a/services/ai_task_service/task/grampus_notebook_task.go +++ b/services/ai_task_service/task/grampus_notebook_task.go @@ -190,7 +190,7 @@ func (g GrampusNoteBookTaskTemplate) CallCreationAPI(ctx *context.CreationContex Code: ctx.GetContainerDataArray(entity.ContainerCode), AutoStopDuration: autoStopDurationMs, Capacity: setting.Capacity, - CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: g.JobType, HasInternet: form.HasInternet, diff --git a/services/ai_task_service/task/grampus_online_infer_task.go b/services/ai_task_service/task/grampus_online_infer_task.go index e2241fa83b..ac1920f5ec 100644 --- a/services/ai_task_service/task/grampus_online_infer_task.go +++ b/services/ai_task_service/task/grampus_online_infer_task.go @@ -119,7 +119,7 @@ func (g GrampusOnlineInferTaskTemplate) CallCreationAPI(ctx *context.CreationCon OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), AutoStopDuration: -1, Capacity: setting.Capacity, - CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: g.JobType, HasInternet: form.HasInternet, diff --git a/services/ai_task_service/task/grampus_train_task.go b/services/ai_task_service/task/grampus_train_task.go index 15d1f93b7b..fe47a1cfd1 100644 --- a/services/ai_task_service/task/grampus_train_task.go +++ b/services/ai_task_service/task/grampus_train_task.go @@ -146,7 +146,7 @@ func (g GrampusTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext) ImageUrl: imageUrl, Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), Code: ctx.GetContainerDataArray(entity.ContainerCode), - CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: g.JobType, HasInternet: form.HasInternet, @@ -193,7 +193,7 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) * ImageUrl: strings.TrimSpace(form.ImageUrl), Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), Code: ctx.GetContainerDataArray(entity.ContainerCode), - CenterID: ctx.Spec.GetAvailableCenterIds(entity.GetSpecOpts{ + CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{ UserId: ctx.User.ID, JobType: g.JobType, HasInternet: form.HasInternet, -- 2.34.1 From 369c1a34684418f18f6687131a715e01a2d8ca8f Mon Sep 17 00:00:00 2001 From: chenshihai Date: Fri, 4 Aug 2023 09:59:26 +0800 Subject: [PATCH 05/24] #4556 --- .../components/cloudbrain/NetworkType.vue | 71 +++++++++++++++++++ .../components/cloudbrain/SpecSelect.vue | 26 +++++-- web_src/vuepages/pages/cloudbrain/configs.js | 16 +++++ .../pages/cloudbrain/create/index.vue | 60 ++++++++++++---- .../pages/supercompute/create/index.vue | 47 ++++++++---- 5 files changed, 186 insertions(+), 34 deletions(-) create mode 100644 web_src/vuepages/components/cloudbrain/NetworkType.vue diff --git a/web_src/vuepages/components/cloudbrain/NetworkType.vue b/web_src/vuepages/components/cloudbrain/NetworkType.vue new file mode 100644 index 0000000000..219a710479 --- /dev/null +++ b/web_src/vuepages/components/cloudbrain/NetworkType.vue @@ -0,0 +1,71 @@ + + + + + diff --git a/web_src/vuepages/components/cloudbrain/SpecSelect.vue b/web_src/vuepages/components/cloudbrain/SpecSelect.vue index bef4a81739..7d2ba7ebc8 100644 --- a/web_src/vuepages/components/cloudbrain/SpecSelect.vue +++ b/web_src/vuepages/components/cloudbrain/SpecSelect.vue @@ -42,7 +42,8 @@
{{ $t('specObj.resSelectTips') }}
- {{ $t('cloudbrainObj.specDescr') }} + {{ + $t('cloudbrainObj.specDescr') }} @@ -56,6 +57,7 @@ export default { value: { type: String, required: true, }, configs: { type: Object, required: true, }, required: { type: Boolean, default: true }, + networkType: { type: String, default: 'no_internet' }, workServerNum: { type: Number, default: 1 }, }, data() { @@ -85,6 +87,13 @@ export default { this.renderSpec(); } }, + networkType: { + immediate: true, + handler() { + const resetSpec = true; + this.renderSpec(resetSpec); + } + }, workServerNum: { immediate: true, handler() { @@ -93,12 +102,15 @@ export default { }, }, methods: { - renderSpec() { + renderSpec(resetSpec) { const showPoint = this.configs.showPoint || false; - const specs = this.configs.specs || []; + const specs = this.configs.specs[this.networkType] || []; this.list = specs.map((item) => { return renderSpecObject(item, showPoint) }); + if (resetSpec) { + this.spec = specs.length ? specs[0].id.toString() : ''; + } this.changeSpec(this.spec); }, changeSpec() { @@ -146,8 +158,7 @@ export default { return this.list.filter(item => item.id == this.spec)[0]; }, }, - beforeMount() { - } + beforeMount() { } }; @@ -159,13 +170,15 @@ export default { flex: inherit; width: 50%; margin-right: 5px; - .spec-list-c{ + + .spec-list-c { min-height: 37.6px; border-radius: 4px; border: 1px solid #DCDFE6; box-sizing: border-box; color: #606266; padding: 4px 15px; + .spec-item-placeholder { height: 27px; line-height: 27px; @@ -174,6 +187,7 @@ export default { font-size: 14px; } } + .spec-info { flex: 1; diff --git a/web_src/vuepages/pages/cloudbrain/configs.js b/web_src/vuepages/pages/cloudbrain/configs.js index 2a17343498..fddfc6eac5 100644 --- a/web_src/vuepages/pages/cloudbrain/configs.js +++ b/web_src/vuepages/pages/cloudbrain/configs.js @@ -52,6 +52,7 @@ export const CreatePageConfigs = { model: { required: false, multiple: true }, imagev1: { required: true }, dataset: { required: false, type: 0, useExceedSize: true }, + networkType: { required: true }, spec: { required: true }, /* just test */ // imagev2: { required: true }, @@ -72,6 +73,7 @@ export const CreatePageConfigs = { model: { required: false, multiple: true }, imagev2: { required: true }, dataset: { required: false, type: 1, useExceedSize: true }, + networkType: { required: true }, spec: { required: true }, }, }], @@ -95,6 +97,7 @@ export const CreatePageConfigs = { model: { required: false, multiple: true }, imagev1: { required: true, type: 2 }, dataset: { required: false, type: 0, useExceedSize: true }, + networkType: { required: true }, spec: {}, }, }], @@ -109,6 +112,7 @@ export const CreatePageConfigs = { model: { required: false, multiple: true }, imagev2: { required: true }, dataset: { required: false, type: 1, useExceedSize: true }, + networkType: { required: true }, spec: { required: true }, }, }], @@ -127,6 +131,7 @@ export const CreatePageConfigs = { model: { required: false, multiple: true }, imagev2: { required: true }, dataset: { required: false, useExceedSize: true }, + networkType: { required: true }, spec: { required: true }, }, }], @@ -145,6 +150,7 @@ export const CreatePageConfigs = { model: { required: false, multiple: true }, imagev2: { required: true }, dataset: { required: false, useExceedSize: true }, + networkType: { required: true }, spec: { required: true }, }, }], @@ -163,6 +169,7 @@ export const CreatePageConfigs = { model: { required: false, multiple: true }, imagev2: { required: true }, dataset: { required: false, useExceedSize: true }, + networkType: { required: true }, spec: { required: true }, }, }], @@ -194,6 +201,7 @@ export const CreatePageConfigs = { imagev1: { required: true }, bootFile: { required: true, sampleUrl: 'https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU' }, dataset: { required: true, type: 0 }, + networkType: { required: true }, runParameters: { required: false }, spec: { required: true }, /* just test */ @@ -225,6 +233,7 @@ export const CreatePageConfigs = { bootFile: { required: true, sampleUrl: 'https://openi.pcl.ac.cn/OpenIOSSG/MINIST_Example' }, dataset: { required: true, type: 1 }, runParameters: { required: false }, + networkType: { required: true }, spec: { required: true }, workServerNum: { required: true }, }, @@ -256,6 +265,7 @@ export const CreatePageConfigs = { bootFile: { required: true, sampleUrl: 'https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU/src/branch/master/train_for_c2net.py' }, dataset: { required: true, type: 0 }, runParameters: { required: false }, + networkType: { required: true }, spec: {}, }, modify: { showIsContinue: false, }, @@ -277,6 +287,7 @@ export const CreatePageConfigs = { bootFile: { required: true, sampleUrl: 'https://openi.pcl.ac.cn/OpenIOSSG/MNIST_Example/src/branch/master/train_for_c2net.py' }, dataset: { required: true, type: 1 }, runParameters: { required: false }, + networkType: { required: true }, spec: { required: true }, workServerNum: { required: true }, }, @@ -300,6 +311,7 @@ export const CreatePageConfigs = { bootFile: { required: true, sampleUrl: 'https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GCU/src/branch/master/train_for_c2net.py' }, dataset: { required: true }, runParameters: { required: false }, + networkType: { required: true }, spec: { required: true }, workServerNum: { required: true }, }, @@ -322,6 +334,7 @@ export const CreatePageConfigs = { bootFile: { required: true, sampleUrl: '' }, dataset: { required: true }, runParameters: { required: false }, + networkType: { required: true }, spec: { required: true }, }, modify: { showIsContinue: false, }, @@ -372,6 +385,7 @@ export const CreatePageConfigs = { model: { required: false, }, algBechmarkType: { required: true, }, imagev1: { required: true, }, + networkType: { required: true }, spec: { required: true, }, }, @@ -441,6 +455,7 @@ export const CreatePageConfigs = { model: { required: false, multiple: true }, imagev1: { required: true, type: 2 }, dataset: { required: false, type: 0, useExceedSize: true }, + networkType: { required: true }, spec: {}, }, }], @@ -953,6 +968,7 @@ export const DetailPageConfigs = { model: { required: false, }, algBechmarkType: { required: true, }, imagev1: { required: true, }, + networkType: { required: true }, spec: { required: true, }, }, diff --git a/web_src/vuepages/pages/cloudbrain/create/index.vue b/web_src/vuepages/pages/cloudbrain/create/index.vue index 0206c48627..055b854f0b 100644 --- a/web_src/vuepages/pages/cloudbrain/create/index.vue +++ b/web_src/vuepages/pages/cloudbrain/create/index.vue @@ -60,8 +60,9 @@ + + :configs="specConfigs" :workServerNum="state.workServerNum" :networkType="state.networkType"> @@ -82,8 +83,9 @@
- {{ $t('cloudbrainObj.createTask') }} + {{ $t('cloudbrainObj.createTask') }} {{ $t('cancel') }}
@@ -107,6 +109,7 @@ import ImageSelectV2 from '~/components/cloudbrain/ImageSelectV2.vue'; import BootFile from '~/components/cloudbrain/BootFile.vue'; import DatasetSelect from '~/components/cloudbrain/DatasetSelect.vue'; import RunParameters from '~/components/cloudbrain/RunParameters.vue'; +import NetworkType from '~/components/cloudbrain/NetworkType.vue'; import SpecSelect from '~/components/cloudbrain/SpecSelect.vue'; import WorkServerNum from '~/components/cloudbrain/WorkServerNum.vue'; import AlgBechmarkType from '~/components/cloudbrain/AlgBechmarkType.vue'; @@ -136,6 +139,7 @@ export default { bootFile: '', dataset: [], runParameters: [], + networkType: 'no_internet', spec: '', workServerNum: 1, algBechmarkType: ['1', ''], @@ -145,7 +149,11 @@ export default { engineList: [], imageList: [], specConfigs: { - specs: [], + specs: { + 'all': [], + 'no_internet': [], + 'has_internet': [], + }, blance: 0, showPoint: false, }, @@ -164,8 +172,8 @@ export default { }; }, components: { - FormTop, TaskName, TaskDescr, BranchName, BootFile, AIEngineSelect, ImageSelectV1, - ImageSelectV2, ModelSelect, DatasetSelect, RunParameters, SpecSelect, WorkServerNum, + FormTop, TaskName, TaskDescr, BranchName, BootFile, AIEngineSelect, ImageSelectV1, ImageSelectV2, + ModelSelect, DatasetSelect, RunParameters, NetworkType, SpecSelect, WorkServerNum, AlgBechmarkType, LoadingMask }, @@ -235,6 +243,15 @@ export default { } subObj['params'] = JSON.stringify(params); break; + case 'networkType': + let networkType = 0; // all + if (this.state.networkType == 'no_internet') { + networkType = 1; + } else if (this.state.networkType == 'has_internet') { + networkType = 2; + } + subObj['has_internet'] = networkType; + break; case 'spec': subObj['spec_id'] = Number(this.state.spec); break; @@ -313,15 +330,23 @@ export default { this.state.branchName = ''; } } + let networkType = 'all'; + if (task.has_internet == 1) { + networkType = 'no_internet'; + } else if (task.has_internet == 2) { + networkType = 'has_internet' + } + this.state.networkType = networkType; if (this.formCfg['spec'] && task.spec) { this.state.spec = ''; + const specs = this.specConfigs.specs[this.state.networkType] || []; if (task.spec.source_spec_id) { - const find = this.specConfigs.specs.filter(item => item.source_spec_id == task.spec.source_spec_id); + const find = specs.filter(item => item.source_spec_id == task.spec.source_spec_id); if (find.length) { this.state.spec = find[0].id.toString(); } } else if (task.spec.id) { - if (this.specConfigs.specs.filter(item => item.id == task.spec.id).length) { + if (specs.filter(item => item.id == task.spec.id).length) { this.state.spec = task.spec.id.toString(); } } @@ -462,14 +487,21 @@ export default { this.alreadyMsgBoxShow = data.not_stop_task_count > 0; this.specConfigs.showPoint = data.pay_switch; this.specConfigs.blance = data.point_account ? data.point_account.balance : 0; - this.specConfigs.specs = data.specs || []; - if (!this.specConfigs.specs.length) { - this.noSpecFlag = true + this.specConfigs.specs = data.specs || { + 'all': [], + 'no_internet': [], + 'has_internet': [], + }; + this.state.networkType = this.formCfg.networkType ? 'no_internet' : 'all'; + if (this.state.networkType == 'no_internet' + && !this.specConfigs.specs['no_internet'].length + && this.specConfigs.specs['has_internet'].length) { + this.state.networkType = 'has_internet'; } + this.state.spec = this.specConfigs.specs[this.state.networkType][0] ? this.specConfigs.specs[this.state.networkType][0].id.toString() : ''; this.queueNum = data.wait_count || 1; this.state.branchName = data.default_branch; this.state.taskName = data.display_job_name; - this.state.spec = this.specConfigs.specs.length ? this.specConfigs.specs[0].id.toString() : ''; this.state.image = this.imageList.length ? { image_id: this.imageList[0].image_id, image_name: this.imageList[0].image_name, @@ -486,9 +518,7 @@ export default { this.modeifyTaskId = taskId; this.getTaskInfo(taskId); } - } else { - - } + } else { } }).catch(err => { console.log(err); }); diff --git a/web_src/vuepages/pages/supercompute/create/index.vue b/web_src/vuepages/pages/supercompute/create/index.vue index b6b9cff383..d3605e2e37 100644 --- a/web_src/vuepages/pages/supercompute/create/index.vue +++ b/web_src/vuepages/pages/supercompute/create/index.vue @@ -51,13 +51,15 @@ + + :configs="specConfigs" :workServerNum="state.workServerNum" :networkType="state.networkType">
- {{ $t('cloudbrainObj.createTask') }} + {{ $t('cloudbrainObj.createTask') }} {{ $t('cancel') }}
@@ -77,6 +79,7 @@ import BranchName from '~/components/cloudbrain/BranchName.vue'; import ModelSelect from '~/components/cloudbrain/ModelSelect.vue'; import ImageSelectV2 from '~/components/cloudbrain/ImageSelectV2.vue'; import DatasetSelect from '~/components/cloudbrain/DatasetSelect.vue'; +import NetworkType from '~/components/cloudbrain/NetworkType.vue'; import SpecSelect from '~/components/cloudbrain/SpecSelect.vue'; import LoadingMask from '~/components/cloudbrain/LoadingMask.vue'; import { getCreatePageConfigs } from '../configs'; @@ -100,12 +103,17 @@ export default { image_url: '', image: {}, dataset: [], + networkType: 'no_internet', spec: '', }, branchList: [], imageList: [], specConfigs: { - specs: [], + specs: { + 'all': [], + 'no_internet': [], + 'has_internet': [], + }, blance: 0, showPoint: false, }, @@ -117,11 +125,10 @@ export default { maskLoading: false, maskLoadingContent: '', datasetSize: 0, - noSpecFlag: false, }; }, components: { - FormTop, TaskName, TaskDescr, BranchName, ImageSelectV2, ModelSelect, DatasetSelect, SpecSelect, + FormTop, TaskName, TaskDescr, BranchName, ImageSelectV2, ModelSelect, DatasetSelect, SpecSelect, NetworkType, LoadingMask }, methods: { @@ -171,6 +178,15 @@ export default { case 'dataset': subObj['dataset_uuid_str'] = this.state.dataset.map(item => item.id).join(';'); break; + case 'networkType': + let networkType = 0; // all + if (this.state.networkType == 'no_internet') { + networkType = 1; + } else if (this.state.networkType == 'has_internet') { + networkType = 2; + } + subObj['has_internet'] = networkType; + break; case 'spec': subObj['spec_id'] = Number(this.state.spec); break; @@ -241,14 +257,21 @@ export default { this.alreadyMsgBoxShow = data.not_stop_task_count > 0; this.specConfigs.showPoint = data.pay_switch; this.specConfigs.blance = data.point_account ? data.point_account.balance : 0; - this.specConfigs.specs = data.specs || []; - if (!this.specConfigs.specs.length) { - this.noSpecFlag = true + this.specConfigs.specs = data.specs || { + 'all': [], + 'no_internet': [], + 'has_internet': [], + }; + this.state.networkType = this.formCfg.networkType ? 'no_internet' : 'all'; + if (this.state.networkType == 'no_internet' + && !this.specConfigs.specs['no_internet'].length + && this.specConfigs.specs['has_internet'].length) { + this.state.networkType = 'has_internet'; } + this.state.spec = this.specConfigs.specs[this.state.networkType][0] ? this.specConfigs.specs[this.state.networkType][0].id.toString() : ''; this.queueNum = data.wait_count || 1; this.state.branchName = data.default_branch; this.state.taskName = data.display_job_name; - this.state.spec = this.specConfigs.specs.length ? this.specConfigs.specs[0].id.toString() : ''; this.state.image = this.imageList.length ? { image_id: this.imageList[0].image_id, image_name: this.imageList[0].image_name, @@ -256,9 +279,7 @@ export default { if (data.config && data.config.dataset_max_size) { this.datasetSize = data.config.dataset_max_size; } - } else { - - } + } else { } }).catch(err => { console.log(err); }); -- 2.34.1 From 625ddde5d570af3ae80f4ec72ceb4b8a2e6c6ad5 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 4 Aug 2023 10:12:22 +0800 Subject: [PATCH 06/24] #4556 update --- models/resource_queue.go | 14 ++++++++++++-- models/resource_scene.go | 2 ++ models/resource_specification.go | 19 ++++++++++++------- routers/admin/resources.go | 4 ++++ .../cloudbrain/resource/resource_queue.go | 3 ++- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/models/resource_queue.go b/models/resource_queue.go index 09b658fe91..a43c30112b 100644 --- a/models/resource_queue.go +++ b/models/resource_queue.go @@ -39,6 +39,7 @@ func (r ResourceQueue) ConvertToRes() *ResourceQueueRes { CardsTotalNum: r.CardsTotalNum, UpdatedTime: r.UpdatedTime, Remark: r.Remark, + HasInternet: AICenterInternetStatus(r.HasInternet), } } @@ -49,6 +50,7 @@ type ResourceQueueReq struct { ComputeResource string `binding:"Required"` AccCardType string `binding:"Required"` CardsTotalNum int + HasInternet int CreatorId int64 IsAutomaticSync bool Remark string @@ -62,6 +64,7 @@ func (r ResourceQueueReq) ToDTO() ResourceQueue { ComputeResource: strings.ToUpper(r.ComputeResource), AccCardType: strings.ToUpper(r.AccCardType), CardsTotalNum: r.CardsTotalNum, + HasInternet: r.HasInternet, IsAutomaticSync: r.IsAutomaticSync, Remark: r.Remark, CreatedBy: r.CreatorId, @@ -85,6 +88,7 @@ type SearchResourceQueueOptions struct { AiCenterCode string ComputeResource string AccCardType string + HasInternet SpecInternetQuery } type ResourceQueueListRes struct { @@ -135,6 +139,7 @@ type ResourceQueueRes struct { CardsTotalNum int UpdatedTime timeutil.TimeStamp Remark string + HasInternet AICenterInternetStatus } func InsertResourceQueue(queue ResourceQueue) (int64, error) { @@ -144,8 +149,8 @@ func InsertResourceQueue(queue ResourceQueue) (int64, error) { func UpdateResourceQueueById(queueId int64, queue ResourceQueue) (int64, error) { return x.ID(queueId).Update(&queue) } -func UpdateResourceCardsTotalNum(queueId int64, queue ResourceQueue) (int64, error) { - return x.ID(queueId).Cols("cards_total_num", "remark").Update(&queue) +func UpdateResourceCardsTotalNumAndInternetStatus(queueId int64, queue ResourceQueue) (int64, error) { + return x.ID(queueId).Cols("cards_total_num", "remark", "has_internet").Update(&queue) } func SearchResourceQueue(opts SearchResourceQueueOptions) (int64, []ResourceQueue, error) { @@ -165,6 +170,11 @@ func SearchResourceQueue(opts SearchResourceQueueOptions) (int64, []ResourceQueu if opts.AccCardType != "" { cond = cond.And(builder.Eq{"acc_card_type": opts.AccCardType}) } + if opts.HasInternet == QueryNoInternetSpecs { + cond = cond.And(builder.Eq{"has_internet": NoInternet}) + } else if opts.HasInternet == QueryHasInternetSpecs { + cond = cond.And(builder.Eq{"acc_card_type": HasInternet}) + } n, err := x.Where(cond).Unscoped().Count(&ResourceQueue{}) if err != nil { return 0, nil, err diff --git a/models/resource_scene.go b/models/resource_scene.go index 9a2c34bf1b..8b18401a86 100644 --- a/models/resource_scene.go +++ b/models/resource_scene.go @@ -107,6 +107,7 @@ type ResourceSpecInfo struct { QueueId int64 ComputeResource string AccCardType string + HasInternet int } func (ResourceSpecInfo) TableName() string { @@ -312,6 +313,7 @@ func SearchResourceScene(opts SearchResourceSceneOptions) (int64, []ResourceScen "resource_queue.ai_center_code", "resource_queue.acc_card_type", "resource_queue.id as queue_id", "resource_queue.compute_resource", "resource_queue.queue_code", "resource_queue.ai_center_name", + "resource_queue.has_internet", ).In("resource_scene_spec.scene_id", sceneIds). Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id"). diff --git a/models/resource_specification.go b/models/resource_specification.go index 1ecc5cd57d..f19c17826c 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -114,6 +114,7 @@ type SearchResourceSpecificationOptions struct { AccCardsNum int ComputeResource string AccCardType string + HasInternet SpecInternetQuery } type SearchResourceBriefSpecificationOptions struct { @@ -385,6 +386,12 @@ func SearchResourceSpecification(opts SearchResourceSpecificationOptions) (int64 } else if opts.AvailableCode == 2 { cond = cond.And(builder.Eq{"resource_specification.is_available": false}) } + + if opts.HasInternet == QueryNoInternetSpecs { + cond = cond.And(builder.Eq{"resource_queue.has_internet": NoInternet}) + } else if opts.HasInternet == QueryHasInternetSpecs { + cond = cond.And(builder.Eq{"resource_queue.has_internet": HasInternet}) + } //cond = cond.And(builder.Or(builder.Eq{"resource_queue.deleted_time": 0}).Or(builder.IsNull{"resource_queue.deleted_time"})) n, err := x.Where(cond).Join("INNER", "resource_queue", "resource_queue.ID = resource_specification.queue_id"). Unscoped().Count(&ResourceSpecAndQueue{}) @@ -557,14 +564,12 @@ func FindSpecs(opts FindSpecsOptions) ([]*Specification, error) { if opts.SpecStatus > 0 { cond = cond.And(builder.Eq{"resource_specification.status": opts.SpecStatus}) } - if opts.HasInternet > 0 { - if opts.HasInternet == 1 { - cond = cond.And(builder.Eq{"resource_queue.has_internet": NoInternet}) - } else if opts.HasInternet == 2 { - cond = cond.And(builder.Eq{"resource_queue.has_internet": HasInternet}) - } - + if opts.HasInternet == QueryNoInternetSpecs { + cond = cond.And(builder.Eq{"resource_queue.has_internet": NoInternet}) + } else if opts.HasInternet == QueryHasInternetSpecs { + cond = cond.And(builder.Eq{"resource_queue.has_internet": HasInternet}) } + r := make([]*Specification, 0) s := x.Where(cond). Join("INNER", "resource_queue", "resource_queue.id = resource_specification.queue_id") diff --git a/routers/admin/resources.go b/routers/admin/resources.go index d7c2394ad2..42d6222e92 100644 --- a/routers/admin/resources.go +++ b/routers/admin/resources.go @@ -45,12 +45,14 @@ func GetResourceQueueList(ctx *context.Context) { aiCenterCode := ctx.Query("center") computeResource := ctx.Query("resource") accCardType := ctx.Query("card") + hasInternet := ctx.QueryInt("has_internet") list, err := resource.GetResourceQueueList(models.SearchResourceQueueOptions{ ListOptions: models.ListOptions{Page: page, PageSize: 10}, Cluster: cluster, AiCenterCode: aiCenterCode, ComputeResource: computeResource, AccCardType: accCardType, + HasInternet: models.SpecInternetQuery(hasInternet), }) if err != nil { log.Error("GetResourceQueueList error.%v", err) @@ -124,6 +126,7 @@ func GetResourceSpecificationList(ctx *context.Context) { cardsNum := ctx.QueryInt("cardsNum") computeResource := ctx.Query("resource") cardType := ctx.Query("cardType") + hasInternet := ctx.QueryInt("has_internet") list, err := resource.GetResourceSpecificationList(models.SearchResourceSpecificationOptions{ ListOptions: models.ListOptions{Page: page, PageSize: 10}, QueueId: queue, @@ -134,6 +137,7 @@ func GetResourceSpecificationList(ctx *context.Context) { AccCardsNum: cardsNum, ComputeResource: computeResource, AccCardType: cardType, + HasInternet: models.SpecInternetQuery(hasInternet), }) if err != nil { log.Error("GetResourceSpecificationList error.%v", err) diff --git a/services/cloudbrain/resource/resource_queue.go b/services/cloudbrain/resource/resource_queue.go index 4e2dac8deb..e1f67e7d66 100644 --- a/services/cloudbrain/resource/resource_queue.go +++ b/services/cloudbrain/resource/resource_queue.go @@ -16,9 +16,10 @@ func AddResourceQueue(req models.ResourceQueueReq) error { } func UpdateResourceQueue(queueId int64, req models.ResourceQueueReq) error { - if _, err := models.UpdateResourceCardsTotalNum(queueId, models.ResourceQueue{ + if _, err := models.UpdateResourceCardsTotalNumAndInternetStatus(queueId, models.ResourceQueue{ CardsTotalNum: req.CardsTotalNum, Remark: req.Remark, + HasInternet: req.HasInternet, }); err != nil { return err } -- 2.34.1 From dfba9f35ffbf4428321160f79a33dcb42ef654d0 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 4 Aug 2023 10:37:26 +0800 Subject: [PATCH 07/24] #4556 update --- routers/admin/resources.go | 1 - 1 file changed, 1 deletion(-) diff --git a/routers/admin/resources.go b/routers/admin/resources.go index 42d6222e92..70e62ea13c 100644 --- a/routers/admin/resources.go +++ b/routers/admin/resources.go @@ -97,7 +97,6 @@ func AddResourceQueue(ctx *context.Context, req models.ResourceQueueReq) { func UpdateResourceQueue(ctx *context.Context, req models.ResourceQueueReq) { queueId := ctx.ParamsInt64(":id") - //only CardsTotalNum permitted to change err := resource.UpdateResourceQueue(queueId, req) if err != nil { log.Error("UpdateResourceQueue error. %v", err) -- 2.34.1 From 49e6aecc2e2dee833b8740d03bd693641a1643e4 Mon Sep 17 00:00:00 2001 From: chenshihai Date: Fri, 4 Aug 2023 11:15:59 +0800 Subject: [PATCH 08/24] update network type related --- .../components/cloudbrain/NetworkType.vue | 10 +++++----- web_src/vuepages/const/index.js | 3 ++- web_src/vuepages/langs/config/en-US.js | 6 ++++++ web_src/vuepages/langs/config/zh-CN.js | 6 ++++++ .../resources/components/QueueDialog.vue | 18 +++++++++++++++--- .../resources/components/SceneDialog.vue | 6 ++++-- .../pages/resources/components/SpecSelect.vue | 10 +++++++--- .../vuepages/pages/resources/queue/index.vue | 19 ++++++++++++++----- .../vuepages/pages/resources/scene/index.vue | 16 ++++++++++++---- .../pages/resources/specification/index.vue | 11 ++++++++++- 10 files changed, 81 insertions(+), 24 deletions(-) diff --git a/web_src/vuepages/components/cloudbrain/NetworkType.vue b/web_src/vuepages/components/cloudbrain/NetworkType.vue index 219a710479..4e01917eb3 100644 --- a/web_src/vuepages/components/cloudbrain/NetworkType.vue +++ b/web_src/vuepages/components/cloudbrain/NetworkType.vue @@ -1,18 +1,18 @@