@@ -2,6 +2,7 @@ package repo
import (
"archive/zip"
"code.gitea.io/gitea/modules/modelarts_cd"
"encoding/json"
"errors"
"fmt"
@@ -60,18 +61,11 @@ func DebugJobIndex(ctx *context.Context) {
if page <= 0 {
page = 1
}
typeCloudBrain := models.TypeCloudBrainAll
jobTypeNot := false
if listType == models.GPUResource {
typeCloudBrain = models.TypeCloudBrainOne
} else if listType == models.NPUResource {
typeCloudBrain = models.TypeCloudBrainTwo
} else if listType == models.AllResource {
typeCloudBrain = models.TypeCloudBrainAll
} else {
log.Error("listType(%s) error", listType)
ctx.ServerError("listType error", errors.New("listType error"))
return
var computeResource string
if listType != models.AllResource {
computeResource = listType
}
var jobTypes []string
@@ -81,10 +75,11 @@ func DebugJobIndex(ctx *context.Context) {
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: typeCloudBrain,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
RepoID: repo.ID,
ComputeResource: computeResource,
Type: models.TypeCloudBrainAll,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
})
if err != nil {
ctx.ServerError("Get debugjob faild:", err)
@@ -134,16 +129,8 @@ func notebookNewDataPrepare(ctx *context.Context) error {
return err
}
ctx.Data["attachments"] = attachs
if modelarts.ImageInfos == nil {
json.Unmarshal([]byte(setting.ImageInfos), &modelarts.ImageInfos)
}
ctx.Data["images"] = modelarts.ImageInfos.ImageInfo
if modelarts.FlavorInfos == nil {
json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
}
ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
ctx.Data["images"] = setting.StImageInfos.ImageInfo
ctx.Data["flavors"] = setting.StFlavorInfo.FlavorInfo
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeDebug))
ctx.Data["datasetType"] = models.TypeCloudBrainTwo
@@ -154,50 +141,6 @@ func notebookNewDataPrepare(ctx *context.Context) error {
return nil
}
func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
ctx.Data["PageIsNotebook"] = true
jobName := form.JobName
uuid := form.Attachment
description := form.Description
flavor := form.Flavor
count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form)
return
}
}
_, err = models.GetCloudbrainByName(jobName)
if err == nil {
log.Error("the job name did already exist", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form)
return
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form)
return
}
}
err = modelarts.GenerateTask(ctx, jobName, uuid, description, flavor)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
return
}
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all")
}
func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
ctx.Data["PageIsNotebook"] = true
displayJobName := form.DisplayJobName
@@ -247,7 +190,12 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
return
}
err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
if setting.ModelartsCD.Enabled {
err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
} else {
err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
}
if err != nil {
log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
notebookNewDataPrepare(ctx)
@@ -292,14 +240,11 @@ func NotebookShow(ctx *context.Context) {
if err == nil {
task.User = user
}
if modelarts.FlavorInfos == nil {
json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
}
findSpec := false
if modelarts.FlavorInfos != nil {
ctx.Data["resource_spec"] = modelarts.FlavorInfos .FlavorInfo[0].Desc
for _, f := range modelarts.FlavorInfos .FlavorInfo {
if setting.StFlavorInfo != nil {
ctx.Data["resource_spec"] = setting.StFlavorInfo .FlavorInfo[0].Desc
for _, f := range setting.StFlavorInfo .FlavorInfo {
if fmt.Sprint(f.Value) == task.FlavorCode {
ctx.Data["resource_spec"] = f.Desc
findSpec = true
@@ -394,36 +339,16 @@ func setShowSpecBySpecialPoolConfig(ctx *context.Context, findSpec bool, task *m
}
}
func NotebookDebug(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
result, err := modelarts.GetJob(jobID)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
}
res, err := modelarts.GetJobToken(jobID)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
}
urls := strings.Split(result.Spec.Annotations.Url, "/")
urlPrefix := result.Spec.Annotations.TargetDomain
for i, url := range urls {
if i > 2 {
urlPrefix += "/" + url
}
}
debugUrl := urlPrefix + "?token=" + res.Token
ctx.Redirect(debugUrl)
}
func NotebookDebug2(ctx *context.Context) {
var err error
var result *models.GetNotebook2Result
task := ctx.Cloudbrain
result, err := modelarts.GetNotebook2(task.JobID)
if task.Type == models.TypeCloudBrainTwo {
result, err = modelarts.GetNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
result, err = modelarts_cd.GetNotebook(task.JobID)
}
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
@@ -469,7 +394,13 @@ func NotebookRestart(ctx *context.Context) {
Action: models.ActionStart,
}
res, err := modelarts.ManageNotebook2(task.JobID, param)
var res *models.NotebookActionResult
if task.Type == models.TypeCloudBrainTwo {
res, err = modelarts.ManageNotebook2(task.JobID, param)
} else if task.Type == models.TypeCDCenter {
res, err = modelarts_cd.ManageNotebook(task.JobID, param)
}
if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"])
/* 暂不处理再次调试502的场景,详情见方案
@@ -555,7 +486,14 @@ func NotebookStop(ctx *context.Context) {
Action: models.ActionStop,
}
res, err := modelarts.ManageNotebook2(task.JobID, param)
var err error
var res *models.NotebookActionResult
if task.Type == models.TypeCloudBrainTwo {
res, err = modelarts.ManageNotebook2(task.JobID, param)
} else if task.Type == models.TypeCDCenter {
res, err = modelarts_cd.ManageNotebook(task.JobID, param)
}
if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
@@ -605,7 +543,13 @@ func NotebookDel(ctx *context.Context) {
return
}
_, err := modelarts.DelNotebook2(task.JobID)
var err error
if task.Type == models.TypeCloudBrainTwo {
_, err = modelarts.DelNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
_, err = modelarts_cd.DelNotebook(task.JobID)
}
if err != nil {
log.Error("DelNotebook2(%s) failed:%v", task.JobName, err.Error())
if strings.Contains(err.Error(), modelarts.NotebookNotFound) || strings.Contains(err.Error(), modelarts.NotebookNoPermission) || strings.Contains(err.Error(), modelarts.NotebookInvalid) {
@@ -764,7 +708,7 @@ func trainJobNewDataPrepare(ctx *context.Context) error {
ctx.Data["WaitCount"] = waitCount
setMultiNodeIfConfigureMatch(ctx)
return nil
}
@@ -1130,8 +1074,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
VersionCount := modelarts.VersionCountOne
EngineName := form.EngineName
errStr:=checkMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form)
return
@@ -1371,31 +1315,31 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
}
func checkMultiNode(userId int64, serverNum int) string{
if serverNum==1{
func checkMultiNode(userId int64, serverNum int) string {
if serverNum == 1 {
return ""
}
modelarts.InitMultiNode()
var isServerNumValid=false
var isServerNumValid = false
if modelarts.MultiNodeConfig != nil {
for _, info := range modelarts.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg {
if isInNodes(info.Node,serverNum){
isServerNumValid=true
if isInNodes(info.Node, serverNum) {
isServerNumValid = true
break
}
}
}
}
if isServerNumValid{
if isServerNumValid {
return ""
}else{
} else {
return "repo.modelarts.no_node_right"
}
}
func checkInferenceJobMultiNode(userId int64, serverNum int) string{
if serverNum==1{
func checkInferenceJobMultiNode(userId int64, serverNum int) string {
if serverNum == 1 {
return ""
}
@@ -1404,8 +1348,8 @@ func checkInferenceJobMultiNode(userId int64, serverNum int) string{
}
func isInNodes(nodes []int, num int) bool {
for _, node:=range nodes{
if node==num{
for _, node := range nodes {
if node == num {
return true
}
}
@@ -1447,8 +1391,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.Data["PageIsTrainJob"] = true
var jobID = ctx.Params(":jobid")
errStr:=checkMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
versionErrorDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form)
return
@@ -1789,7 +1733,7 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
log.Error("the boot file(%s) must be a python file", strings.TrimSpace(form.BootFile))
return errors.New("启动文件必须是python文件")
}
if form.BranchName == "" {
log.Error("the branch must not be null!", form.BranchName)
return errors.New("代码分支不能为空!")
@@ -2088,8 +2032,8 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ckptUrl := "/" + form.TrainUrl + form.CkptName
log.Info("ckpt url:" + ckptUrl)
errStr:=checkInferenceJobMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
errStr := checkInferenceJobMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)
return
@@ -2319,7 +2263,7 @@ func checkModelArtsSpecialPool(ctx *context.Context, flavorCode string, jobType
if !isMatchPool {
isMatchSpec := false
if jobType == string(models.JobTypeDebug) {
for _, flavor := range modelarts.FlavorInfos .FlavorInfo {
for _, flavor := range setting.StFlavorInfo .FlavorInfo {
if flavor.Value == flavorCode {
isMatchSpec = true
break