#5336 #5326 修改旧的GCU训练任务,提交后任务会一直waiting

Merged
ychao_1983 merged 10 commits from zouap_dev into V20240402 1 month ago
  1. +0
    -2
      models/action.go
  2. +12
    -0
      models/cloudbrain_image.go
  3. +2
    -2
      modules/setting/setting.go
  4. +1
    -0
      options/locale/locale_en-US.ini
  5. +1
    -1
      options/locale/locale_zh-CN.ini
  6. +1
    -0
      routers/ai_task/ai_task.go
  7. +3
    -2
      routers/api/v1/api.go
  8. +3
    -0
      routers/api/v1/repo/modelmanage.go
  9. +8
    -0
      routers/repo/attachment_dir.go
  10. +1
    -0
      routers/response/response_list.go
  11. +0
    -2
      routers/user/home.go
  12. +1
    -0
      services/ai_task_service/task/grampus_train_task.go
  13. +21
    -0
      services/ai_task_service/task/opt_handler.go

+ 0
- 2
models/action.go View File

@@ -512,7 +512,6 @@ type GetFeedsOptions struct {
// GetFeeds returns actions according to the provided options
func GetFeeds(opts GetFeedsOptions) ([]*Action, error) {
cond := builder.NewCond()

var repoIDs []int64
var actorID int64

@@ -556,7 +555,6 @@ func GetFeeds(opts GetFeedsOptions) ([]*Action, error) {
if err := ActionList(actions).LoadAllAttributes(); err != nil {
return nil, fmt.Errorf("LoadAttributes: %v", err)
}

return actions, nil
}



+ 12
- 0
models/cloudbrain_image.go View File

@@ -918,3 +918,15 @@ func GetGrampusSrcAllBaseImage(srcImageUrl string) (ImageList, error) {
}
return images, nil
}

func FindImageByImageUrl(imageUrl string) (ImageList, error) {
var cond = builder.NewCond()
cond = cond.Or(builder.Eq{"place": imageUrl})
cond = cond.Or(builder.Like{"ai_center_images", "\"imageUrl\":\"" + imageUrl + "\""})
images := make(ImageList, 0, 10)
err := x.Table("image").Where(cond).Find(&images)
if err != nil {
return nil, err
}
return images, nil
}

+ 2
- 2
modules/setting/setting.go View File

@@ -1979,8 +1979,8 @@ func GetGrampusConfig() {

Grampus.MMLSparkMaxTime = sec.Key("MMLSparkMaxTime").MustInt64(8 * 3600)

Grampus.NoteBookDomainURL = sec.Key("NoteBookDomainURL").MustString("https://notebook.openi.org.cn")
Grampus.NoteBookLocalURL = sec.Key("NoteBookLocalURL").MustString("http://192.168.242.23")
Grampus.NoteBookDomainURL = sec.Key("NoteBookDomainURL").MustString("https://mlunotebook.openi.org.cn")
Grampus.NoteBookLocalURL = sec.Key("NoteBookLocalURL").MustString("http://192.168.242.50")
}

func SetRadarMapConfig() {


+ 1
- 0
options/locale/locale_en-US.ini View File

@@ -3506,6 +3506,7 @@ model_not_exist=The model in the task does not exist or has been deleted
too_many_notebook=A user can have up to 5 debug tasks, please try again after delete some debug tasks.
can_not_stop_creating_job=AI task is creating, can not be stopped.
no_center_match=Can not match an AI center, please select other specification.
image_not_availabel=The image is not available, please select other image.

[common_error]
system_error = System error.Please try again later


+ 1
- 1
options/locale/locale_zh-CN.ini View File

@@ -3529,7 +3529,7 @@ model_not_exist=选择的预训练模型不存在或者已被删除
too_many_notebook=每个用户最多只能创建5个调试任务,请删除历史任务再新建。
can_not_stop_creating_job=任务正在创建中,请等创建完成后再尝试停止。
no_center_match=没有可分配的中心,请选择其他规格。
image_not_availabel=该镜像不可用,请重新选择镜像。

[common_error]
system_error = 当前服务不可用,请稍后再试


+ 1
- 0
routers/ai_task/ai_task.go View File

@@ -21,6 +21,7 @@ import (
)

func CreateAITask(ctx *context.Context, form entity.CreateReq) {
log.Info("start to here.....")
handCreateReq(&form)
res, err := task.CreateAITask(form, ctx.Repo.GitRepo, ctx.Repo.Repository, ctx.User)
if err != nil {


+ 3
- 2
routers/api/v1/api.go View File

@@ -59,10 +59,11 @@
package v1

import (
"code.gitea.io/gitea/routers/resources"
"net/http"
"strings"

"code.gitea.io/gitea/routers/resources"

"code.gitea.io/gitea/routers/reward/point"

"code.gitea.io/gitea/services/memory"
@@ -1327,7 +1328,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Get("/query_model_convert_byName", repo.QueryModelConvertByName)
m.Get("/query_model_convert_resultfile", repo.QueryModeConvertResultFile)
m.Get("/download_model_convert_resultfile", repo.DownloadModeConvertResultFile)
m.Get("/query_onelevel_modelfile", repo.QueryOneLevelModelFile)
m.Get("/:id", repo.GetCloudbrainModelConvertTask)
m.Get("/:id/log", repo.GrampusForModelConvertGetLog)
m.Get("/:id/modelartlog", repo.TrainJobForModelConvertGetLog)


+ 3
- 0
routers/api/v1/repo/modelmanage.go View File

@@ -177,6 +177,9 @@ func QueryModelConvertById(ctx *context.APIContext) {
ctx.JSON(http.StatusOK, nil)
}
}
func QueryOneLevelModelFile(ctx *context.APIContext) {
routerRepo.QueryOneLevelModelFile(ctx.Context)
}

func QueryModelConvertByName(ctx *context.APIContext) {
modelResult, err := routerRepo.GetModelConvertByName(ctx.Context)


+ 8
- 0
routers/repo/attachment_dir.go View File

@@ -115,7 +115,11 @@ func GetTxtContent(ctx *context.Context) {
}

func getObsImageContent(uuid string, fileName string, ctx *context.Context) {
isModel := ctx.QueryBool("isModel")
objectName := strings.TrimPrefix(path.Join(setting.BasePath+models.AttachmentRelativePath(uuid)+uuid, fileName), "/")
if isModel {
objectName = strings.TrimPrefix(path.Join(Model_prefix, path.Join(uuid[0:1], uuid[1:2], uuid, fileName)), "/")
}
//log.Info("obs objectName=" + objectName)
body, err := storage.ObsDownloadAFile(setting.Bucket, objectName)
if err != nil {
@@ -140,7 +144,11 @@ func getObsImageContent(uuid string, fileName string, ctx *context.Context) {
}

func getObsTxtContent(uuid string, fileName string, ctx *context.Context) {
isModel := ctx.QueryBool("isModel")
objectName := strings.TrimPrefix(path.Join(setting.BasePath+models.AttachmentRelativePath(uuid)+uuid, fileName), "/")
if isModel {
objectName = strings.TrimPrefix(path.Join(Model_prefix, path.Join(uuid[0:1], uuid[1:2], uuid, fileName)), "/")
}
//log.Info("obs objectName=" + objectName)
body, err := storage.ObsDownloadAFile(setting.Bucket, objectName)
if err != nil {


+ 1
- 0
routers/response/response_list.go View File

@@ -41,3 +41,4 @@ var CAN_NOT_STOP_CREATING_JOB = &BizError{Code: 2024, DefaultMsg: "AI task is cr
var NO_CENTER_MATCH = &BizError{Code: 2024, DefaultMsg: "Can not match an AI center, please select other specification.", TrCode: "ai_task.no_center_match"}
var MODEL_NUMBER_OVER_LIMIT = &BizError{Code: 2025, DefaultMsg: "The model count exceed the limit", TrCode: "ai_task.model_number_over_limit"}
var MODEL_SIZE_OVER_LIMIT = &BizError{Code: 2026, DefaultMsg: "The size of model exceeds limitation", TrCode: "ai_task.model_size_over_limit"}
var IMAGE_NOT_AVAILABLE = &BizError{Code: 2027, DefaultMsg: "The image is not available", TrCode: "ai_task.image_not_availabel"}

+ 0
- 2
routers/user/home.go View File

@@ -76,7 +76,6 @@ func retrieveFeeds(ctx *context.Context, options models.GetFeedsOptions) {
ctx.ServerError("GetFeeds", err)
return
}

userCache := map[int64]*models.User{options.RequestedUser.ID: options.RequestedUser}
if ctx.User != nil {
userCache[ctx.User.ID] = ctx.User
@@ -112,7 +111,6 @@ func Dashboard(ctx *context.Context) {
if ctx.Written() {
return
}

ctx.Data["Title"] = ctxUser.DisplayName() + " - " + ctx.Tr("dashboard")
ctx.Data["PageIsDashboard"] = true
ctx.Data["PageIsNews"] = true


+ 1
- 0
services/ai_task_service/task/grampus_train_task.go View File

@@ -109,6 +109,7 @@ func (t GrampusTrainTaskTemplate) Create(ctx *context.CreationContext) (*entity.
Next(t.HandleReqParameters).
Next(t.CheckPrivilege4Continue).
Next(t.CheckSourceTaskIsCleared).
Next(t.CheckImageAvailable).
Next(t.CheckBranchExists).
Next(t.CheckBootFile).
Next(t.CheckWorkerNum).


+ 21
- 0
services/ai_task_service/task/opt_handler.go View File

@@ -43,6 +43,7 @@ type CreationHandler interface {
HandleErr4Async(ctx *context.CreationContext) *response.BizError
CheckNotebookCount(ctx *context.CreationContext) *response.BizError
GetAvailableQueues(ctx *context.CreationContext) *response.BizError
CheckImageAvailable(ctx *context.CreationContext) *response.BizError
}

// DefaultCreationHandler CreationHandler的默认实现,公共逻辑可以在此结构体中实现
@@ -731,3 +732,23 @@ func (DefaultCreationHandler) CheckNotebookCount(ctx *context.CreationContext) *
}
return nil
}

func (DefaultCreationHandler) CheckImageAvailable(ctx *context.CreationContext) *response.BizError {
log.Info("start to gcu CheckImageAvailable 1")
if ctx.Request.SourceCloudbrainId <= 0 {
return nil
}
log.Info("start to gcu CheckImageAvailable 2")
if ctx.Request.ComputeSource.Name == models.GCU {
image_name := ctx.Request.ImageUrl
log.Info("image_name=" + image_name)
re, err := models.FindImageByImageUrl(image_name)
if err != nil {
return response.IMAGE_NOT_AVAILABLE
}
if len(re) == 0 {
return response.IMAGE_NOT_AVAILABLE
}
}
return nil
}

Loading…
Cancel
Save