{{.i18n.Tr "cloudbrain.task_delete_confirm"}}
-diff --git a/models/action.go b/models/action.go index 33b3b77a88..85fa2c184e 100755 --- a/models/action.go +++ b/models/action.go @@ -50,32 +50,34 @@ const ( ActionRejectPullRequest // 22 ActionCommentPull // 23 - ActionUploadAttachment //24 - ActionCreateDebugGPUTask //25 - ActionCreateDebugNPUTask //26 - ActionCreateTrainTask //27 - ActionCreateInferenceTask // 28 - ActionCreateBenchMarkTask //29 - ActionCreateNewModelTask //30 - ActionCreateGPUTrainTask //31 - ActionCreateGrampusNPUTrainTask //32 - ActionCreateGrampusGPUTrainTask //33 - ActionBindWechat //34 - ActionDatasetRecommended //35 - ActionCreateImage //36 - ActionImageRecommend //37 - ActionChangeUserAvatar //38 - ActionCreateGrampusNPUDebugTask //39 - ActionCreateGrampusGPUDebugTask //40 - ActionCreateGrampusGCUDebugTask //41 - ActionCreateGrampusGCUTrainTask //42 - ActionCreateGrampusMLUDebugTask //43 - ActionCreateGrampusMLUTrainTask //44 - ActionCreateGrampusGPUOnlineInferTask //45 - ActionCreateGrampusDCUDebugTask //46 - ActionCreateSuperComputeTask //47 - ActionCreateGrampusILUVATARDebugTask //48 - ActionCreateGrampusMETAXDebugTask //49 + ActionUploadAttachment //24 + ActionCreateDebugGPUTask //25 + ActionCreateDebugNPUTask //26 + ActionCreateTrainTask //27 + ActionCreateInferenceTask // 28 + ActionCreateBenchMarkTask //29 + ActionCreateNewModelTask //30 + ActionCreateGPUTrainTask //31 + ActionCreateGrampusNPUTrainTask //32 + ActionCreateGrampusGPUTrainTask //33 + ActionBindWechat //34 + ActionDatasetRecommended //35 + ActionCreateImage //36 + ActionImageRecommend //37 + ActionChangeUserAvatar //38 + ActionCreateGrampusNPUDebugTask //39 + ActionCreateGrampusGPUDebugTask //40 + ActionCreateGrampusGCUDebugTask //41 + ActionCreateGrampusGCUTrainTask //42 + ActionCreateGrampusMLUDebugTask //43 + ActionCreateGrampusMLUTrainTask //44 + ActionCreateGrampusGPUOnlineInferTask //45 + ActionCreateGrampusDCUDebugTask //46 + ActionCreateSuperComputeTask //47 + ActionCreateGrampusILUVATARDebugTask //48 + ActionCreateGrampusMETAXDebugTask //49 + ActionCreateGrampusGPUInferenceTask //50 + ActionCreateGrampusILUVATARInferenceTask //51 ) // Action represents user operation type and other information to @@ -431,7 +433,9 @@ func (a *Action) IsCloudbrainAction() bool { ActionCreateGrampusMLUDebugTask, ActionCreateGrampusILUVATARDebugTask, ActionCreateGrampusMETAXDebugTask, - ActionCreateSuperComputeTask: + ActionCreateSuperComputeTask, + ActionCreateGrampusILUVATARInferenceTask, + ActionCreateGrampusGPUInferenceTask: return true } return false diff --git a/models/task_config.go b/models/task_config.go index 803795c88a..efb31988bf 100644 --- a/models/task_config.go +++ b/models/task_config.go @@ -80,7 +80,9 @@ func GetTaskTypeFromAction(a ActionType) TaskType { ActionCreateGrampusMETAXDebugTask, ActionCreateSuperComputeTask, ActionCreateGrampusGPUOnlineInferTask, - ActionCreateGrampusGPUTrainTask: + ActionCreateGrampusGPUTrainTask, + ActionCreateGrampusGPUInferenceTask, + ActionCreateGrampusILUVATARInferenceTask: return TaskCreateCloudbrainTask case ActionCreateRepo: return TaskCreatePublicRepo diff --git a/modules/templates/helper.go b/modules/templates/helper.go index 205ef2b9ba..f99ce63839 100755 --- a/modules/templates/helper.go +++ b/modules/templates/helper.go @@ -105,7 +105,7 @@ func NewFuncMap() []template.FuncMap { return setting.UI.Reactions }, "DebugAttachSize": func() int { - return setting.DebugAttachSize * 1000 * 1000 * 1000 + return setting.DebugAttachSize * 1024 * 1024 * 1024 }, "LlmCommonKB": func() string { return setting.LLM_CHAT_API.COMMON_KB diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index c46394b025..29264a1ff8 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1300,7 +1300,7 @@ cloudbrain.morethanonejob2=You can view all your Cloud Brain tasks in initialized first ; debug_task_running_limit =Running time: no more than 4 hours, it will automatically stop if it exceeds 4 hours; @@ -3228,6 +3228,8 @@ task_c2ent_onlineinferjob=`created GPU type online inference task %s` task_nputrainjob=`created NPU training task %s` task_inferencejob=`created reasoning task %s` +task_c2net_gpu_inferencejob=`created GPU type inference task %s` +task_c2net_gpgpu_iluvatar_inferencejob=`created ILUVATAR-GPGPU type inference task %s` task_benchmark=`created profiling task %s` task_createmodel=`created new model %s` task_gputrainjob=`created CPU/GPU training task %s` diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index 1a5df42fba..d3707a1f0c 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -3247,6 +3247,8 @@ task_c2ent_onlineinferjob=`创建了GPU类型在线推理任务 %s` task_nputrainjob=`创建了NPU类型训练任务 %s` task_inferencejob=`创建了推理任务 %s` +task_c2net_gpu_inferencejob=`创建了GPU类型推理任务 %s` +task_c2net_gpgpu_iluvatar_inferencejob=`创建了ILUVATAR-GPGPU类型推理任务 %s` task_benchmark=`创建了评测任务 %s` task_createmodel=`导入了新模型 %s` task_gputrainjob=`创建了CPU/GPU类型训练任务 %s` diff --git a/public/home/home.js b/public/home/home.js index 0cca1a38ea..de41b40def 100755 --- a/public/home/home.js +++ b/public/home/home.js @@ -243,8 +243,8 @@ document.onreadystatechange = function () { html += recordPrefix + actionName; html += " " + getRepotext(record) + "" } - else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" - || record.OpType == "31" || record.OpType == "32" || record.OpType == "33" || record.OpType == "42" || record.OpType == "44"){ + else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "50" || record.OpType == "51" + || record.OpType == "30" || record.OpType == "31" || record.OpType == "32" || record.OpType == "33" || record.OpType == "42" || record.OpType == "44"){ html += recordPrefix + actionName; const taskLink = getTaskLink(record); if (taskLink) { @@ -311,7 +311,17 @@ function getTaskLink(record){ re = ''; } }else if(record.OpType == 28){ - re = re + "/modelarts/inference-job/" + record.Content; + if (record.Cloudbrain) { + re = re + "/modelarts/inference-job/" + record.Cloudbrain.ID; + } else { + re = ''; + } + }else if(record.OpType == 50 || record.OpType == 51){ + if (record.Cloudbrain) { + re = re + "/grampus/inference-job/" + record.Cloudbrain.ID; + } else { + re = ''; + } }else if(record.OpType == 29){ re = re + "/cloudbrain/benchmark/" + record.Content; }else if(record.OpType == 30){ @@ -487,6 +497,8 @@ var actionNameZH={ "26":"创建了NPU类型调试任务", "27":"创建了NPU类型训练任务", "28":"创建了推理任务", + "50":"创建了GPU类型推理任务", + "51":"创建了ILUVATAR-GPGPU类型推理任务", "29":"创建了评测任务", "30":"导入了新模型", "31":"创建了CPU/GPU类型训练任务", @@ -528,7 +540,9 @@ var actionNameEN={ "25":" created CPU/GPU type debugging task ", "26":" created NPU type debugging task ", "27":" created NPU type training task", - "28":" created reasoning task", + "28":" created inference task", + "50":" created GPU type inference task", + "51":" created ILUVATAR-GPGPU type inference task", "29":" created profiling task", "30":" created new model", "31":" created CPU/GPU type training task", diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 4217dfd638..af415fe2ec 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -325,7 +325,7 @@ func cloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { for _, infos := range datasetInfos { attachSize += infos.Size } - if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + if attachSize > int64(setting.DebugAttachSize*1024*1024*1024) { log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB cloudBrainNewDataPrepare(ctx, jobType) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tpl, &form) @@ -3008,16 +3008,13 @@ func cloudBrainTrainJobCreate(ctx *context.Context) { } func InferenceCloudBrainJobNew(ctx *context.Context) { - err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeInference)) - if err != nil { - ctx.ServerError("get new train-job info failed", err) - return - } + ctx.Data["PageIsCloudBrain"] = true ctx.HTML(http.StatusOK, tplCloudBrainInferenceJobNew) } func InferenceCloudBrainJobShow(ctx *context.Context) { - cloudBrainShow(ctx, tplCloudBrainInferenceJobShow, models.JobTypeInference) + ctx.Data["PageIsCloudBrain"] = true + ctx.HTML(http.StatusOK, tplCloudBrainInferenceJobShow) } func DownloadGPUInferenceResultFile(ctx *context.Context) { diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 42e5583db3..cad381228d 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -67,9 +67,23 @@ const ( tplGrampusTrainJobMLUNew base.TplName = "repo/grampus/trainjob/mlu/new" //C2NET notebook - tplGrampusNotebookNew base.TplName = "repo/grampus/notebook/new" + tplGrampusNotebookNew base.TplName = "repo/grampus/notebook/new" + + // Inference job + tplGrampusInferenceNew base.TplName = "repo/grampus/inference/new" + tplGrampusInferenceShow base.TplName = "repo/grampus/inference/show" ) +func GrampusInferenceNew(ctx *context.Context) { + ctx.Data["PageIsCloudBrain"] = true + ctx.HTML(http.StatusOK, tplGrampusInferenceNew) +} + +func GrampusInferenceShow(ctx *context.Context) { + ctx.Data["PageIsCloudBrain"] = true + ctx.HTML(http.StatusOK, tplGrampusInferenceShow) +} + func GrampusNotebookNew(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true ctx.HTML(http.StatusOK, tplGrampusNotebookNew) @@ -256,7 +270,7 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook for _, infos := range datasetInfos { attachSize += infos.Size } - if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + if attachSize > int64(setting.DebugAttachSize*1024*1024*1024) { log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB grampusNotebookNewDataPrepare(ctx, processType) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tpl, &form) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index fab373deef..e835535174 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -254,7 +254,7 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm for _, infos := range datasetInfos { attachSize += infos.Size } - if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + if attachSize > int64(setting.DebugAttachSize*1024*1024*1024) { log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) //GB notebookNewDataPrepare(ctx) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize), tplModelArtsNotebookNew, &form) @@ -2544,91 +2544,11 @@ func checkModelArtsSpecialPool(ctx *context.Context, flavorCode string, jobType return "" } func InferenceJobIndex(ctx *context.Context) { - MustEnableModelArts(ctx) - - repo := ctx.Repo.Repository - page := ctx.QueryInt("page") - if page <= 0 { - page = 1 - } - - listType := ctx.Query("listType") - ctx.Data["ListType"] = listType - - if listType == models.AllResource { - listType = "" - } - - var jobTypes []string - jobTypes = append(jobTypes, string(models.JobTypeInference)) - tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ - ListOptions: models.ListOptions{ - Page: page, - PageSize: setting.UI.IssuePagingNum, - }, - RepoID: repo.ID, - ComputeResource: listType, - JobTypes: jobTypes, - Type: models.TypeCloudBrainAll, - }) - if err != nil { - ctx.ServerError("Cloudbrain", err) - return - } - - for i, task := range tasks { - tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) - tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) - if tasks[i].ComputeResource == "" { - tasks[i].ComputeResource = models.NPUResource - } - if tasks[i].ModelId != "" { - model, err := models.QueryModelById(tasks[i].ModelId) - if err == nil && model != nil { - if model.RepoId != tasks[i].RepoID { - repo, err := models.GetRepositoryByID(model.RepoId) - if err == nil && repo != nil { - tasks[i].ModelRepoName = repo.Name - tasks[i].ModelRepoOwnerName = repo.OwnerName - } - } - } - } - } - isQueryPrivate := isQueryPrivateModel(ctx) - repoId := ctx.Repo.Repository.ID - Type := -1 - _, model_count, _ := models.QueryModel(&models.AiModelQueryOptions{ - ListOptions: models.ListOptions{ - Page: 1, - PageSize: 2, - }, - RepoID: repoId, - Type: Type, - New: MODEL_LATEST, - IsOnlyThisRepo: true, - Status: 0, - IsQueryPrivate: isQueryPrivate, - }) - ctx.Data["MODEL_COUNT"] = model_count - - pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) - pager.SetDefaultParams(ctx) - ctx.Data["Page"] = pager - ctx.Data["PageIsCloudBrain"] = true - ctx.Data["Tasks"] = tasks - ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx) - ctx.Data["RepoIsEmpty"] = repo.IsEmpty ctx.HTML(200, tplModelArtsInferenceJobIndex) } func InferenceJobNew(ctx *context.Context) { - err := inferenceJobNewDataPrepare(ctx) - if err != nil { - ctx.ServerError("get new inference-job info failed", err) - return - } - + ctx.Data["PageIsCloudBrain"] = true ctx.HTML(200, tplModelArtsInferenceJobNew) } func inferenceJobNewDataPrepare(ctx *context.Context) error { @@ -2783,62 +2703,6 @@ func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModel } func InferenceJobShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true - var jobID = ctx.Params(":jobid") - - page := ctx.QueryInt("page") - if page <= 0 { - page = 1 - } - task, err := models.GetCloudbrainByJobID(jobID) - - if err != nil { - log.Error("GetInferenceTask(%s) failed:%v", jobID, err.Error()) - ctx.NotFound(ctx.Req.URL.RequestURI(), nil) - return - } - //设置权限 - canNewJob, err := canUserCreateTrainJobVersion(ctx, task.UserID) - if err != nil { - ctx.ServerError("canNewJob failed", err) - return - } - ctx.Data["canNewJob"] = canNewJob - - //将运行参数转化为epoch_size = 3, device_target = Ascend的格式 - var parameters models.Parameters - err = json.Unmarshal([]byte(task.Parameters), ¶meters) - if err != nil { - log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) - trainJobNewDataPrepare(ctx) - return - } - - if len(parameters.Parameter) > 0 { - paramTemp := "" - for _, Parameter := range parameters.Parameter { - param := Parameter.Label + " = " + Parameter.Value + "; " - paramTemp = paramTemp + param - } - task.Parameters = paramTemp[:len(paramTemp)-2] - } else { - task.Parameters = "" - } - prepareSpec4Show(ctx, task) - LabelName := strings.Fields(task.LabelName) - ctx.Data["labelName"] = LabelName - ctx.Data["jobID"] = jobID - ctx.Data["jobName"] = task.JobName - ctx.Data["displayJobName"] = task.DisplayJobName - ctx.Data["task"] = task - ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) - ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) - tempUids := []int64{} - tempUids = append(tempUids, task.UserID) - JobCreater, err := models.GetUserNamesByIDs(tempUids) - if err != nil { - log.Error("GetUserNamesByIDs (WhitelistUserIDs): %v", err) - } - ctx.Data["userName"] = JobCreater[0] ctx.HTML(http.StatusOK, tplModelArtsInferenceJobShow) } diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 0cbd591ebe..2f897f7573 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1388,6 +1388,13 @@ func RegisterRoutes(m *macaron.Macaron) { m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusTrainJobForm{}), context.PointAccount(), repo.GrampusTrainJobGcuCreate) }) }) + + m.Group("/inference-job", func() { + m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusInferenceNew) + m.Group("/:id", func() { + m.Get("", reqRepoCloudBrainReader, repo.GrampusInferenceShow) + }) + }) }, context.RepoRef()) m.Group("/modelmanage", func() { m.Get("/create_local_model", repo.CreateLocalModel) diff --git a/services/ai_task_service/cluster/c2net.go b/services/ai_task_service/cluster/c2net.go index 2fc4df53af..795519e609 100644 --- a/services/ai_task_service/cluster/c2net.go +++ b/services/ai_task_service/cluster/c2net.go @@ -36,6 +36,7 @@ func (c C2NetClusterAdapter) CreateNoteBook(req entity.CreateNoteBookTaskRequest newReq, err := convertNoteBookReq2Grampus(req) if err != nil { log.Error("CreateNoteBook err.req=%+v err=%v", req, err) + return nil, err } jobResult, err := grampus.CreateNotebookJob(newReq) if err != nil { @@ -778,7 +779,7 @@ func (c C2NetClusterAdapter) DeleteTrainJob(opts entity.JobIdAndVersionId) error } func (c C2NetClusterAdapter) StopTrainJob(opts entity.JobIdAndVersionId) error { - _, err := grampus.StopJob(opts.JobID, string(models.JobTypeDebug)) + _, err := grampus.StopJob(opts.JobID) if err != nil { log.Error("StopNoteBook(%s) failed:%v", opts, err) return err diff --git a/services/ai_task_service/cluster/cloudbrain_two.go b/services/ai_task_service/cluster/cloudbrain_two.go index 4b6eb8982f..c721c36033 100644 --- a/services/ai_task_service/cluster/cloudbrain_two.go +++ b/services/ai_task_service/cluster/cloudbrain_two.go @@ -482,6 +482,11 @@ func handleCloudbrainTwoParameter(req entity.CreateTrainTaskRequest) models.Para }) } + param.Parameter = append(param.Parameter, models.Parameter{ + Label: modelarts.ResultUrl, + Value: t.OutPut[0].S3DownloadUrl, + }) + existDeviceTarget := false for _, parameter := range t.Params.Parameter { if parameter.Label == modelarts.DeviceTarget { diff --git a/services/ai_task_service/container_builder/output_path_builder.go b/services/ai_task_service/container_builder/output_path_builder.go index 98f4d86697..e3b1a84efd 100644 --- a/services/ai_task_service/container_builder/output_path_builder.go +++ b/services/ai_task_service/container_builder/output_path_builder.go @@ -1,13 +1,14 @@ package container_builder import ( + "path" + "code.gitea.io/gitea/entity" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/routers/response" "code.gitea.io/gitea/services/ai_task_service/context" "code.gitea.io/gitea/services/ai_task_service/storage_helper" - "path" ) type OutputPathBuilder struct { @@ -67,6 +68,7 @@ func (b *OutputPathBuilder) Build(ctx *context.CreationContext) ([]entity.Contai GetBackEndpoint: uploader.GetEndpoint(), IsDir: true, StorageType: storageTypes[0], + S3DownloadUrl: uploader.GetS3DownloadUrl(remoteDir), }}, nil } diff --git a/services/ai_task_service/task/cloudbrain_two_inference_task.go b/services/ai_task_service/task/cloudbrain_two_inference_task.go new file mode 100644 index 0000000000..c63c98d7c1 --- /dev/null +++ b/services/ai_task_service/task/cloudbrain_two_inference_task.go @@ -0,0 +1,139 @@ +package task + +import ( + "strings" + + "code.gitea.io/gitea/entity" + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/routers/response" + "code.gitea.io/gitea/services/ai_task_service/context" +) + +type CloudbrainTwoInferenceTaskTemplate struct { + DefaultAITaskTemplate +} + +func init() { + t := &CloudbrainTwoInferenceTaskTemplate{ + DefaultAITaskTemplate: DefaultAITaskTemplate{ + ClusterType: entity.OpenICloudbrainTwo, + JobType: models.JobTypeInference, + Config: GetCloudbrainTwoInferenceConfig, + }, + } + RegisterTask(models.JobTypeInference, entity.OpenICloudbrainTwo, t) +} + +func GetCloudbrainTwoInferenceConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { + var config = &entity.AITaskBaseConfig{ + ActionType: models.ActionCreateInferenceTask, + IsActionUseJobId: true, + ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ + entity.ContainerCode: { + ContainerPath: "/code", + ReadOnly: false, + AcceptStorageType: []entity.StorageType{entity.OBS}, + Uncompressed: true, + }, + entity.ContainerDataset: { + ContainerPath: "/dataset", + ReadOnly: true, + Uncompressed: true, + AcceptStorageType: []entity.StorageType{entity.OBS}, + }, + entity.ContainerPreTrainModel: { + ContainerPath: "/pretrainmodel", + ReadOnly: true, + AcceptStorageType: []entity.StorageType{entity.OBS}, + }, + entity.ContainerOutPutPath: { + ContainerPath: "/output", + StorageRelativePath: "/output" + models.CloudbrainTwoDefaultVersion, + ReadOnly: false, + AcceptStorageType: []entity.StorageType{entity.OBS}, + MKDIR: true, + }, + entity.ContainerLogPath: { + ContainerPath: "/log", + StorageRelativePath: "/log" + models.CloudbrainTwoDefaultVersion, + ReadOnly: false, + AcceptStorageType: []entity.StorageType{entity.OBS}, + MKDIR: true, + }, + }, + } + return config +} + +func (t CloudbrainTwoInferenceTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { + c := &CreateOperator{} + err := c.Next(t.CheckParamFormat). + Next(t.HandleReqParameters). + Next(t.CheckPrivilege4Continue). + Next(t.CheckSourceTaskIsCleared). + Next(t.CheckWorkerNum). + Next(t.CheckMultiRequest). + Next(t.CheckBranchExists). + Next(t.CheckBootFile). + Next(t.CheckDisplayJobName). + Next(t.LoadSpec). + Next(t.CheckPointBalance). + Next(t.CheckDatasets). + Next(t.CheckModel). + Next(t.InsertCloudbrainRecord4Async). + AsyncNextWithErrFun(t.BuildContainerData, t.GetAvailableQueues, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). + Operate(ctx) + if err != nil { + log.Error("create GrampusInferenceTask err.%v", err) + return nil, err + } + return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil +} + +func (g CloudbrainTwoInferenceTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { + c := g.GetMyCluster() + if c == nil { + return response.SYSTEM_ERROR + } + form := ctx.Request + req := entity.CreateTrainTaskRequest{ + Name: form.JobName, + DisplayJobName: form.DisplayJobName, + Description: form.Description, + Tasks: []entity.TrainTask{ + { + Name: form.JobName, + ResourceSpecId: ctx.Spec.SourceSpecId, + ImageId: form.ImageID, + ImageUrl: strings.TrimSpace(form.ImageUrl), + Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), + Code: ctx.GetContainerDataArray(entity.ContainerCode), + LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath), + Queues: ctx.Queues, + PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), + BootFile: form.BootFile, + OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), + Params: form.ParamArray, + Spec: ctx.Spec, + PoolId: ctx.Spec.QueueCode, + WorkServerNumber: form.WorkServerNumber, + }, + }, + } + createTime := timeutil.TimeStampNow() + res, err := c.CreateTrainJob(req) + if err != nil { + log.Error("CloudbrainTwo InfereceTask Create err.req=%+v err=%v", req, err) + return response.NewBizError(err) + } + ctx.Response = &entity.CreationResponse{ + JobID: res.JobID, + Status: res.Status, + CreateTime: createTime, + VersionID: res.VersionID, + VersionName: res.VersionName, + } + return nil +} diff --git a/services/ai_task_service/task/grampus_inference_task.go b/services/ai_task_service/task/grampus_inference_task.go new file mode 100644 index 0000000000..74399fa367 --- /dev/null +++ b/services/ai_task_service/task/grampus_inference_task.go @@ -0,0 +1,144 @@ +package task + +import ( + "strings" + + "code.gitea.io/gitea/entity" + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/cloudbrain" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/routers/response" + "code.gitea.io/gitea/services/ai_task_service/context" +) + +type GrampusInferenceTaskTemplate struct { + DefaultAITaskTemplate +} + +func init() { + t := &GrampusInferenceTaskTemplate{ + DefaultAITaskTemplate: DefaultAITaskTemplate{ + ClusterType: entity.C2Net, + JobType: models.JobTypeInference, + Config: GetGrampusInferenceTaskConfig, + }, + } + RegisterTask(models.JobTypeInference, entity.C2Net, t) +} + +func GetGrampusInferenceTaskConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig { + codePath := "/code" + datasetPath := "/dataset" + pretrainModelPath := "/pretrainmodel" + outputPath := "/output" + var config = &entity.AITaskBaseConfig{ + ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ + entity.ContainerCode: { + ContainerPath: "/tmp" + codePath, + StorageRelativePath: codePath, + ReadOnly: false, + AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, + }, + entity.ContainerDataset: { + ContainerPath: "/tmp" + datasetPath, + ReadOnly: true, + AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, + }, + entity.ContainerPreTrainModel: { + ContainerPath: "/tmp" + pretrainModelPath, + ReadOnly: true, + AcceptStorageType: []entity.StorageType{entity.MINIO, entity.OBS}, + }, + entity.ContainerOutPutPath: { + ContainerPath: "/tmp" + outputPath, + StorageRelativePath: cloudbrain.ModelMountPath, + ReadOnly: false, + AcceptStorageType: []entity.StorageType{entity.MINIO}, + MKDIR: false, + }, + }, + } + + switch opts.ComputeSource { + case models.GPU: + config.ActionType = models.ActionCreateGrampusGPUInferenceTask + case models.ILUVATAR: + config.ActionType = models.ActionCreateGrampusILUVATARInferenceTask + } + + config.IsActionUseJobId = true + return config +} + +func (t GrampusInferenceTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { + c := &CreateOperator{} + err := c.Next(t.CheckParamFormat). + Next(t.HandleReqParameters). + Next(t.CheckPrivilege4Continue). + Next(t.CheckSourceTaskIsCleared). + Next(t.CheckBranchExists). + Next(t.CheckBootFile). + Next(t.CheckWorkerNum). + Next(t.CheckMultiRequest). + Next(t.CheckDisplayJobName). + Next(t.LoadSpec). + Next(t.CheckPointBalance). + Next(t.CheckDatasets). + Next(t.CheckModel). + Next(t.InsertCloudbrainRecord4Async). + AsyncNextWithErrFun(t.BuildContainerData, t.GetAvailableQueues, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). + Operate(ctx) + if err != nil { + log.Error("create GrampusTrainTaskTemplate err.%v", err) + return nil, err + } + return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil +} + +func (g GrampusInferenceTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { + c := g.GetMyCluster() + if c == nil { + return response.SYSTEM_ERROR + } + form := ctx.Request + imageUrl := strings.TrimSpace(form.ImageUrl) + if form.ImageID != "" { + imageUrl = "" + } + req := entity.CreateTrainTaskRequest{ + Name: form.JobName, + DisplayJobName: form.DisplayJobName, + Tasks: []entity.TrainTask{ + { + Name: form.JobName, + ResourceSpecId: ctx.Spec.SourceSpecId, + ImageId: form.ImageID, + ImageUrl: imageUrl, + Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), + Code: ctx.GetContainerDataArray(entity.ContainerCode), + Queues: ctx.Queues, + PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), + BootFile: form.BootFile, + OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), + Params: form.ParamArray, + Spec: ctx.Spec, + RepoName: ctx.Repository.Name, + WorkServerNumber: ctx.Request.WorkServerNumber, + }, + }, + TaskConfig: ctx.Config, + } + createTime := timeutil.TimeStampNow() + res, err := c.CreateTrainJob(req) + if err != nil { + log.Error("GrampusTrainTaskTemplate CreateTrainJob err.req=%+v err=%v", req, err) + return response.NewBizError(err) + } + ctx.Response = &entity.CreationResponse{ + JobID: res.JobID, + Status: res.Status, + CreateTime: createTime, + } + return nil +} diff --git a/services/ai_task_service/task/opt_handler.go b/services/ai_task_service/task/opt_handler.go index e92320bdeb..af94df8f88 100644 --- a/services/ai_task_service/task/opt_handler.go +++ b/services/ai_task_service/task/opt_handler.go @@ -144,7 +144,7 @@ func (g DefaultCreationHandler) CheckDatasets(ctx *context.CreationContext) *res attachSize += infos.Size } limitSizeGB := ctx.Config.DatasetsLimitSizeGB - if limitSizeGB > 0 && attachSize > int64(limitSizeGB*1000*1000*1000) { + if limitSizeGB > 0 && attachSize > int64(limitSizeGB*1024*1024*1024) { log.Error("The DatasetSize exceeds the limit (%dGB)", limitSizeGB) // GB return response.DATASET_SIZE_OVER_LIMIT.WithParams(limitSizeGB) } diff --git a/services/ai_task_service/task/task_creation_info.go b/services/ai_task_service/task/task_creation_info.go index 82d67bf244..406e0e4d32 100644 --- a/services/ai_task_service/task/task_creation_info.go +++ b/services/ai_task_service/task/task_creation_info.go @@ -87,7 +87,7 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio } result.Config = entity.AITaskCreationConfig{ - DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, + DatasetMaxSize: setting.DebugAttachSize * 1024 * 1024 * 1024, } //查询可用节点数 if workerNums, err := t.GetAllowedWorkerNum(req.User.ID, req.ComputeSource); err == nil { diff --git a/services/cloudbrain/cloudbrainTask/notebook.go b/services/cloudbrain/cloudbrainTask/notebook.go index 1977e83155..44d2ff0dd0 100644 --- a/services/cloudbrain/cloudbrainTask/notebook.go +++ b/services/cloudbrain/cloudbrainTask/notebook.go @@ -170,7 +170,7 @@ func GrampusNotebookCreate(ctx *context.Context, option api.CreateNotebookOption for _, infos := range datasetInfos { attachSize += infos.Size } - if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { + if attachSize > int64(setting.DebugAttachSize*1024*1024*1024) { log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.error.debug_datasetsize", setting.DebugAttachSize))) return diff --git a/services/socketwrap/clientManager.go b/services/socketwrap/clientManager.go index f3a4532b42..05bdb9c2d2 100755 --- a/services/socketwrap/clientManager.go +++ b/services/socketwrap/clientManager.go @@ -10,7 +10,7 @@ import ( "github.com/elliotchance/orderedmap" ) -var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49} +var opTypes = []int{1, 2, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51} type ClientsManager struct { Clients *orderedmap.OrderedMap diff --git a/templates/admin/cloudbrain/list.tmpl b/templates/admin/cloudbrain/list.tmpl index 4199b92fc2..7d54a9e0f5 100755 --- a/templates/admin/cloudbrain/list.tmpl +++ b/templates/admin/cloudbrain/list.tmpl @@ -89,7 +89,7 @@
{{.i18n.Tr "cloudbrain.task_delete_confirm"}}
-- {{$.i18n.Tr "repo.cloudbrain_task"}} - | -
-
- {{.DisplayJobName}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.status"}} - | - -
-
- {{.Status}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.train_job.start_time"}} - | - -
-
-
- {{if not (eq .StartTime 0)}}
- {{TimeSinceUnix1 .StartTime}}
- {{else}}
- --
- {{end}}
-
-
- |
-
- {{$.i18n.Tr "repo.modelarts.train_job.dura_time"}} - | - -
-
- {{.TrainJobDuration}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.train_job.AI_driver"}} - | -
-
- {{.EngineName}}
-
- |
-
-
- {{$.i18n.Tr "repo.model.manage.description"}} - | - -
-
- {{if .Description}}
- {{.Description}}
- {{else}}
- --
- {{end}}
-
- |
-
- {{$.i18n.Tr "repo.cloudbrain_creator"}} - | - -
-
- {{$.userName}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.train_job.compute_node"}} - | -
-
- {{.WorkServerNumber}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.infer_job_model"}} - | -
-
- {{.ModelName}}
- {{$.i18n.Tr "repo.modelarts.version"}}:{{.ModelVersion}}
-
-
- |
-
- {{$.i18n.Tr "repo.modelarts.infer_job_model_file"}} - | -
-
- {{.CkptName}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.model_label"}} - | - -
-
-
- {{if .LabelName}}
- {{range $.labelName}}
- {{.}}
- {{end}}
- {{else}}
- --
- {{end}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.code_version"}} - | - -
-
- {{.BranchName}}
- {{SubStr .CommitID 0 10}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.train_job.start_file"}} - | - -
-
- {{.BootFile}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.train_job.run_parameter"}} - | - -
-
- {{if .Parameters}}
- {{.Parameters}}
- {{else}}
- --
- {{end}}
-
- |
-
- {{$.i18n.Tr "repo.modelarts.train_job.standard"}} - | - -- - | -
{{$.i18n.Tr "dataset.file"}} | -
---|
- {{if eq .IsDelete true}} - {{.DatasetName}}({{$.i18n.Tr "dataset.file_deleted"}}) - {{else}} - {{.DatasetName}} - {{end}} - - | -