@@ -1,16 +1,8 @@
package repo
import (
"code.gitea.io/gitea/modules/auth"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
"encoding/json"
"errors"
"github.com/unknwon/com"
"io/ioutil"
"net/http"
"os"
@@ -19,6 +11,15 @@ import (
"strings"
"time"
"code.gitea.io/gitea/modules/auth"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
"github.com/unknwon/com"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/cloudbrain"
@@ -137,6 +138,93 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err
return nil
}
func grampusTrainJobErrorPrepare(ctx *context.Context, processType string, form auth.CreateGrampusTrainJobForm) error {
ctx.Data["PageIsCloudBrain"] = true
//get valid images
images, err := grampus.GetImages(processType)
if err != nil {
log.Error("GetImages failed:", err.Error())
} else {
ctx.Data["images"] = images.Infos
}
grampus.InitSpecialPool()
ctx.Data["GPUEnabled"] = true
ctx.Data["NPUEnabled"] = true
includeCenters := make(map[string]struct{})
excludeCenters := make(map[string]struct{})
if grampus.SpecialPools != nil {
for _, pool := range grampus.SpecialPools.Pools {
if pool.IsExclusive {
if !IsUserInOrgPool(ctx.User.ID, pool) {
ctx.Data[pool.Type+"Enabled"] = false
}
} else {
if strings.Contains(strings.ToLower(processType), strings.ToLower(pool.Type)) {
if IsUserInOrgPool(ctx.User.ID, pool) {
for _, center := range pool.Pool {
includeCenters[center.Queue] = struct{}{}
}
} else {
for _, center := range pool.Pool {
excludeCenters[center.Queue] = struct{}{}
}
}
}
}
}
}
//get valid resource specs
specs, err := grampus.GetResourceSpecs(processType)
grampusSpecs := getFilterSpecBySpecialPool(specs, includeCenters, excludeCenters)
if err != nil {
log.Error("GetResourceSpecs failed:", err.Error())
} else {
ctx.Data["flavor_infos"] = grampusSpecs
}
if processType == grampus.ProcessorTypeGPU {
ctx.Data["datasetType"] = models.TypeCloudBrainOne
} else if processType == grampus.ProcessorTypeNPU {
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
}
var Parameters modelarts.Parameters
if err := json.Unmarshal([]byte(form.Params), &Parameters); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["params"] = Parameters.Parameter
ctx.Data["boot_file"] = form.BootFile
ctx.Data["attachment"] = form.Attachment
_, datasetNames, err := models.GetDatasetInfo(form.Attachment)
if err != nil {
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])
return nil
}
ctx.Data["dataset_name"] = datasetNames
ctx.Data["branch_name"] = form.BranchName
ctx.Data["image_id"] = form.ImageID
ctx.Data["display_job_name"] = form.DisplayJobName
ctx.Data["image"] = form.Image
ctx.Data["flavor"] = form.FlavorID
ctx.Data["flavor_name"] = form.FlavorName
ctx.Data["description"] = form.Description
ctx.Data["engine_name"] = form.EngineName
ctx.Data["work_server_number"] = form.WorkServerNumber
return nil
}
func getFilterSpecBySpecialPool(specs *models.GetGrampusResourceSpecsResult, includeCenters map[string]struct{}, excludeCenters map[string]struct{}) []models.GrampusSpec {
if len(includeCenters) == 0 && len(excludeCenters) == 0 {
return specs.Infos
@@ -207,14 +295,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
image := strings.TrimSpace(form.Image)
if !jobNamePattern.MatchString(displayJobName) {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form)
return
}
errStr := checkSpecialPool(ctx, "GPU")
if errStr != "" {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form)
return
}
@@ -223,13 +311,13 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource)
if err != nil {
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobGPUNew, &form)
return
}
@@ -238,7 +326,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//check param
if err := grampusParamCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form)
return
}
@@ -248,14 +336,14 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err == nil {
if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobGPUNew, &form)
return
}
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("system error", tplGrampusTrainJobGPUNew, &form)
return
}
@@ -265,7 +353,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
attachment, err := models.GetAttachmentByUUID(uuid)
if err != nil {
log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobGPUNew, &form)
return
}
@@ -278,7 +366,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
return
}
@@ -287,7 +375,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//upload code
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
return
}
@@ -295,7 +383,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/"
if err := mkModelPath(modelPath); err != nil {
log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
return
}
@@ -303,7 +391,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//init model readme
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
return
}
@@ -312,7 +400,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name)
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobGPUNew, &form)
return
}
@@ -344,7 +432,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
err = grampus.GenerateTrainJob(ctx, req)
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeGPU, form )
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobGPUNew, &form)
return
}
@@ -391,14 +479,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
engineName := form.EngineName
if !jobNamePattern.MatchString(displayJobName) {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form)
return
}
errStr := checkSpecialPool(ctx, "NPU")
if errStr != "" {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form)
return
}
@@ -407,13 +495,13 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource)
if err != nil {
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplGrampusTrainJobNPUNew, &form)
return
}
@@ -422,7 +510,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//check param
if err := grampusParamCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form)
return
}
@@ -432,14 +520,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err == nil {
if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("the job name did already exist", tplGrampusTrainJobNPUNew, &form)
return
}
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("system error", tplGrampusTrainJobNPUNew, &form)
return
}
@@ -449,7 +537,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
attachment, err := models.GetAttachmentByUUID(uuid)
if err != nil {
log.Error("GetAttachmentByUUID failed:", err.Error(), ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("dataset is not exist", tplGrampusTrainJobNPUNew, &form)
return
}
@@ -462,7 +550,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("Create task failed, server timed out", tplGrampusTrainJobNPUNew, &form)
return
}
@@ -470,14 +558,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
//todo: upload code (send to file_server todo this work?)
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("Failed to obsMkdir_output", tplGrampusTrainJobNPUNew, &form)
return
}
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("Failed to uploadCodeToObs", tplGrampusTrainJobNPUNew, &form)
return
}
@@ -486,7 +574,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+"'"+attachment.Name+"'", bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name)
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form)
return
}
@@ -522,7 +610,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
err = grampus.GenerateTrainJob(ctx, req)
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error())
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU )
grampusTrainJobErrorPrepare(ctx, grampus.ProcessorTypeNPU, form )
ctx.RenderWithErr(err.Error(), tplGrampusTrainJobNPUNew, &form)
return
}