#4856 fix-4772

Merged
chenshihai merged 8 commits from fix-4772 into V20231102 6 months ago
  1. +13
    -3
      entity/ai_task.go
  2. +5
    -0
      entity/creation.go
  3. +11
    -6
      models/cloudbrain.go
  4. +17
    -0
      models/resource_specification.go
  5. +1
    -0
      options/locale/locale_en-US.ini
  6. +1
    -0
      options/locale/locale_zh-CN.ini
  7. +46
    -3
      routers/ai_task/ai_task.go
  8. +1
    -0
      routers/api/v1/api.go
  9. +1
    -0
      routers/response/response_list.go
  10. +28
    -6
      services/ai_task_service/cluster/c2net.go
  11. +2
    -2
      services/ai_task_service/cluster/cloudbrain_one.go
  12. +2
    -2
      services/ai_task_service/cluster/cloudbrain_two.go
  13. +2
    -2
      services/ai_task_service/cluster/cluster_base.go
  14. +12
    -6
      services/ai_task_service/task/cloudbrain_one_notebook_task.go
  15. +29
    -21
      services/ai_task_service/task/cloudbrain_one_train_task.go
  16. +16
    -12
      services/ai_task_service/task/cloudbrain_two_train_task.go
  17. +10
    -6
      services/ai_task_service/task/grampus_notebook_task.go
  18. +11
    -7
      services/ai_task_service/task/grampus_online_infer_task.go
  19. +29
    -21
      services/ai_task_service/task/grampus_train_task.go
  20. +1
    -1
      services/ai_task_service/task/super_compute_task.go
  21. +4
    -4
      services/ai_task_service/task/task_base.go
  22. +27
    -5
      services/ai_task_service/task/task_creation_info.go
  23. +3
    -0
      services/ai_task_service/task/task_extend.go
  24. +44
    -0
      services/ai_task_service/task/task_service.go
  25. +16
    -0
      web_src/vuepages/apis/modules/cloudbrain.js
  26. +41
    -1
      web_src/vuepages/components/cloudbrain/ImageSelectV2.vue
  27. +2
    -2
      web_src/vuepages/pages/cloudbrain/configs.js
  28. +24
    -6
      web_src/vuepages/pages/cloudbrain/create/index.vue
  29. +1
    -1
      web_src/vuepages/pages/supercompute/create/index.vue

+ 13
- 3
entity/ai_task.go View File

@@ -2,8 +2,6 @@ package entity

import (
"archive/zip"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
"encoding/json"
"fmt"
"io"
@@ -11,6 +9,10 @@ import (
"strings"
"sync"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/services/role"

"code.gitea.io/gitea/models"

"code.gitea.io/gitea/modules/git"
@@ -152,7 +154,7 @@ func (a *AITaskDetailInfo) Tr(language string) {
}

func (a *AITaskDetailInfo) TryToRemoveDatasets(currentUser *models.User) {
if currentUser == nil || a.UserId == 0 || (!currentUser.IsAdmin && currentUser.ID != a.UserId) {
if currentUser == nil || a.UserId == 0 || (!currentUser.IsAdmin && currentUser.ID != a.UserId && !role.UserHasRole(currentUser.ID, models.MonitorAdmin)) {
a.DatasetList = []*models.DatasetDownload{}
}
}
@@ -199,6 +201,14 @@ type GetAITaskCreationInfoReq struct {
IsOnlineType bool
}

type GetAITaskCreationImageInfoReq struct {
JobType models.JobType
ClusterType ClusterType
ComputeSource *models.ComputeSource
Spec *models.Specification
UserID int64
}

type AITaskBriefInfo struct {
ID int64 `json:"id"`
JobType string `json:"job_type"`


+ 5
- 0
entity/creation.go View File

@@ -21,6 +21,11 @@ type CreationRequiredInfo struct {
AllowedWorkerNum []int `json:"allowed_worker_num"`
}

type ImageRequiredInfo struct {
Images []ClusterImage `json:"images"`
CanUseAllImages bool `json:"can_use_all_images"`
}

type AITaskCreationConfig struct {
DatasetMaxSize int `json:"dataset_max_size"`
}


+ 11
- 6
models/cloudbrain.go View File

@@ -1594,6 +1594,7 @@ type DatasetDownload struct {
DatasetDownloadLink string `json:"dataset_download_link"`
RepositoryLink string `json:"repository_link"`
IsDelete bool `json:"is_delete"`
Size int64 `json:"size"`
}

type ModelDownload struct {
@@ -1927,13 +1928,17 @@ type GetGrampusAiCentersResult struct {
Infos []GrampusAiCenter `json:"aiCenterInfos"`
TotalSize int `json:"totalSize"`
}

type AICenterImage struct {
AICenterID string `json:"aiCenterId"`
ImageUrl string `json:"imageUrl"`
}
type GrampusImage struct {
CreatedAt int64 `json:"createdAt"`
UpdatedAt int64 `json:"updatedAt"`
ID string `json:"id"`
Name string `json:"name"`
ProcessorType string `json:"processorType"`
CreatedAt int64 `json:"createdAt"`
UpdatedAt int64 `json:"updatedAt"`
ID string `json:"id"`
Name string `json:"name"`
ProcessorType string `json:"processorType"`
AICenterImage []AICenterImage `json:"aiCenterImages"`
}

type GetGrampusImagesResult struct {


+ 17
- 0
models/resource_specification.go View File

@@ -297,6 +297,23 @@ func (s *Specification) ToShowString() string {
return specName
}

func GetAvailableCenterIdsByASpec(ID int64) ([]string, error) {
spec, err := GetResourceSpecification(&ResourceSpecification{
ID: ID})
if err != nil {
return []string{}, err
}
var queueIds []int64
err = x.Table("resource_specification").Cols("queue_id").Where("status=? and source_spec_id=?", SpecOnShelf, spec.SourceSpecId).Find(&queueIds)
if err != nil || len(queueIds) == 0 {
return []string{}, err
}
var centerIds []string
err = x.Table("resource_queue").Cols("ai_center_code").In("id", queueIds).Find(&centerIds)
return centerIds, err

}

type GetAvailableCenterIdOpts struct {
UserId int64
JobType JobType


+ 1
- 0
options/locale/locale_en-US.ini View File

@@ -3478,6 +3478,7 @@ dataset_number_over_limit = The dataset count exceed the limit
result_cleared=The files of the task have been cleared, can not restart or retrain any more, please create a new task instead
model_not_exist=The model in the task does not exist or has been deleted
too_many_notebook=A user can have up to 5 debug tasks, please try again after delete some debug tasks.
no_center_match=Can not match a AI center, please select other specification.

[common_error]
system_error = System error.Please try again later


+ 1
- 0
options/locale/locale_zh-CN.ini View File

@@ -3501,6 +3501,7 @@ dataset_number_over_limit = 选择的数据集文件数量超出限制
result_cleared=源任务的文件已被清理,无法再次调试或复用训练结果,请新建任务。
model_not_exist=选择的预训练模型不存在或者已被删除
too_many_notebook=每个用户最多只能创建5个调试任务,请删除历史任务再新建。
no_center_match=没有可分配的中心,请选择其他规格。


[common_error]


+ 46
- 3
routers/ai_task/ai_task.go View File

@@ -2,6 +2,10 @@ package ai_task

import (
"archive/zip"
"net/http"
"net/url"
"strings"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/cloudbrain"
@@ -13,9 +17,7 @@ import (
"code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/ai_task_service/schedule"
"code.gitea.io/gitea/services/ai_task_service/task"
"net/http"
"net/url"
"strings"
"code.gitea.io/gitea/services/cloudbrain/resource"
)

func CreateAITask(ctx *context.Context, form entity.CreateReq) {
@@ -385,6 +387,47 @@ func GetNodeInfo(ctx *context.Context) {
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m))
}

func GetImageInfoBySelectedSpec(ctx *context.Context) {
jobType := ctx.Query("job_type")

if models.JobType(jobType) == (models.JobTypeOnlineInference) {
jobType = string(models.JobTypeDebug)
}
log.Info("required jobType=" + jobType)
computeSourceName := ctx.Query("compute_source")
clusterType := ctx.Query("cluster_type")

computeSource := models.GetComputeSourceInstance(computeSourceName)
specId := ctx.QueryInt64("spec_id")
hasInternet := ctx.QueryInt("has_internet")

spec, err := resource.GetAndCheckSpec(ctx.User.ID, specId, models.FindSpecsOptions{
JobType: models.JobType(jobType),
ComputeResource: computeSourceName,
Cluster: clusterType,
HasInternet: models.SpecInternetQuery(hasInternet),
})

if err != nil || spec == nil {
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.SPEC_NOT_AVAILABLE, ctx))
return
}

result, bizerr := task.GetAvailableImageInfoBySpec(entity.GetAITaskCreationImageInfoReq{
ClusterType: entity.ClusterType(clusterType),
ComputeSource: computeSource,
Spec: spec,
JobType: models.JobType(jobType),
UserID: ctx.User.ID,
})
if bizerr != nil {
log.Error("GetAITaskImageCreationInfo error,err=%v", bizerr)
ctx.JSON(http.StatusOK, response.OuterTrBizError(bizerr, ctx))
return
}
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(result))
}

func GetCreationRequiredInfo(ctx *context.Context) {
jobType := ctx.Query("job_type")
var isOnlineType bool


+ 1
- 0
routers/api/v1/api.go View File

@@ -658,6 +658,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Post("/restart", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrAITaskCreator(), ai_task.RestartAITask)
m.Get("/debug_url", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetNotebookUrl)
m.Get("/creation/required", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetCreationRequiredInfo)
m.Get("/creation/image_by_spec", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetImageInfoBySelectedSpec)
m.Post("/output/reschedule", reqRepoWriter(models.UnitTypeCloudBrain), ai_task.RetryModelSchedule)

}, reqToken(), context.RepoRef())


+ 1
- 0
routers/response/response_list.go View File

@@ -37,3 +37,4 @@ var BRANCH_NOT_EXISTS = &BizError{Code: 2020, DefaultMsg: "The branch does not e
var MODEL_NUM_OVER_LIMIT = &BizError{Code: 2021, DefaultMsg: "The number of models exceeds the limit of 30", TrCode: "repo.debug.manage.model_num_over_limit"}
var DATASET_NUMBER_OVER_LIMIT = &BizError{Code: 2022, DefaultMsg: "The dataset count exceed the limit", TrCode: "ai_task.dataset_number_over_limit"}
var NOTEBOOK_EXCEED_MAX_NUM = &BizError{Code: 2023, DefaultMsg: "You can have up to 5 Debug Tasks, please try again after delete some tasks. ", TrCode: "ai_task.too_many_notebook"}
var NO_CENTER_MATCH = &BizError{Code: 2024, DefaultMsg: "", TrCode: "ai_task.no_center_match"}

+ 28
- 6
services/ai_task_service/cluster/c2net.go View File

@@ -59,7 +59,7 @@ func (c C2NetClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBookTaskRequ
return convertGrampus2NoteBookRes(jobResult), nil
}

func (c C2NetClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
func (c C2NetClusterAdapter) GetNotebookImages(req entity.GetImageReq, centerId ...string) ([]entity.ClusterImage, bool, error) {
processType := req.ComputeSource.FullName
images, err := grampus.GetImages(processType, string(req.JobType))
if err != nil {
@@ -69,15 +69,37 @@ func (c C2NetClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity
if images == nil || images.Infos == nil || len(images.Infos) == 0 {
return nil, true, err
}
r := make([]entity.ClusterImage, len(images.Infos))
for i, v := range images.Infos {
r[i] = ConvertGrampusImageToStandard(v)

r := make([]entity.ClusterImage, 0)
for _, v := range images.Infos {
if hasIntersection(v.AICenterImage, centerId...) {
r = append(r, ConvertGrampusImageToStandard(v))
}
}
if len(r) == 0 {
return nil, false, nil
}

return r, false, nil
}

func (c C2NetClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
return c.GetNotebookImages(req)
func hasIntersection(imageCenterInfos []models.AICenterImage, centerId ...string) bool {
if len(centerId) == 0 || len(imageCenterInfos) == 0 {
//如果没传centerId或者查询的镜像不含可用中心信息,不进行判断,直接返回true
return true
}
for _, aicenterImage := range imageCenterInfos {
for _, centerCode := range centerId {
if aicenterImage.AICenterID == centerCode {
return true
}
}
}
return false
}

func (c C2NetClusterAdapter) GetTrainImages(req entity.GetImageReq, centerId ...string) ([]entity.ClusterImage, bool, error) {
return c.GetNotebookImages(req, centerId...)
}

func ConvertGrampusImageToStandard(image models.GrampusImage) entity.ClusterImage {


+ 2
- 2
services/ai_task_service/cluster/cloudbrain_one.go View File

@@ -37,11 +37,11 @@ func (c CloudbrainOneClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBook
return nil, nil
}

func (c CloudbrainOneClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
func (c CloudbrainOneClusterAdapter) GetNotebookImages(req entity.GetImageReq, centerId ...string) ([]entity.ClusterImage, bool, error) {
return nil, true, nil
}

func (c CloudbrainOneClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
func (c CloudbrainOneClusterAdapter) GetTrainImages(req entity.GetImageReq, centerId ...string) ([]entity.ClusterImage, bool, error) {
return c.GetNotebookImages(req)
}



+ 2
- 2
services/ai_task_service/cluster/cloudbrain_two.go View File

@@ -88,7 +88,7 @@ func (c CloudbrainTwoClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBook

var cloudbrainTwoNotebookImages []entity.ClusterImage

func (c CloudbrainTwoClusterAdapter) GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
func (c CloudbrainTwoClusterAdapter) GetNotebookImages(req entity.GetImageReq, centerId ...string) ([]entity.ClusterImage, bool, error) {
if cloudbrainTwoNotebookImages == nil || len(cloudbrainTwoNotebookImages) == 0 {
images := setting.StImageInfos.ImageInfo
cloudbrainTwoNotebookImages = make([]entity.ClusterImage, len(images))
@@ -105,7 +105,7 @@ func (c CloudbrainTwoClusterAdapter) GetNotebookImages(req entity.GetImageReq) (

var cloudbrainTwoTrainImages []entity.ClusterImage

func (c CloudbrainTwoClusterAdapter) GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) {
func (c CloudbrainTwoClusterAdapter) GetTrainImages(req entity.GetImageReq, centerId ...string) ([]entity.ClusterImage, bool, error) {
if cloudbrainTwoTrainImages == nil || len(cloudbrainTwoTrainImages) == 0 {
var versionInfos modelarts.VersionInfo
if err := json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {


+ 2
- 2
services/ai_task_service/cluster/cluster_base.go View File

@@ -48,7 +48,7 @@ type ClusterAdapter interface {
GetResourceUsage(opts entity.ClusterResourceUsageOpts) (*entity.ResourceUsage, error)
//GetImages return available list of clusters
//The second parameter will return true if image is no limit
GetNotebookImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error)
GetTrainImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error)
GetNotebookImages(req entity.GetImageReq, centerId ...string) ([]entity.ClusterImage, bool, error)
GetTrainImages(req entity.GetImageReq, centerId ...string) ([]entity.ClusterImage, bool, error)
CreateOnlineInfer(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error)
}

+ 12
- 6
services/ai_task_service/task/cloudbrain_one_notebook_task.go View File

@@ -133,6 +133,16 @@ func (g CloudbrainOneNotebookTaskTemplate) CallCreationAPI(ctx *context.Creation
return response.SYSTEM_ERROR
}
form := ctx.Request

centerIds, bizErr := GetAvailableCenterIds(ctx.Spec, models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}, form.ComputeSource, form.ImageID, g.ClusterType)
if bizErr != nil {
return bizErr
}

req := entity.CreateNoteBookTaskRequest{
Name: form.JobName,
Tasks: []entity.NoteBookTask{
@@ -147,12 +157,8 @@ func (g CloudbrainOneNotebookTaskTemplate) CallCreationAPI(ctx *context.Creation
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
AutoStopDuration: autoStopDurationMs,
Capacity: setting.Capacity,
CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}),
Spec: ctx.Spec,
CenterID: centerIds,
Spec: ctx.Spec,
},
},
}


+ 29
- 21
services/ai_task_service/task/cloudbrain_one_train_task.go View File

@@ -97,6 +97,14 @@ func (g CloudbrainOneTrainTaskTemplate) CallCreationAPI(ctx *context.CreationCon
return response.SYSTEM_ERROR
}
form := ctx.Request
centerIds, bizErr := GetAvailableCenterIds(ctx.Spec, models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}, form.ComputeSource, form.ImageID, g.ClusterType)
if bizErr != nil {
return bizErr
}
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
@@ -108,17 +116,13 @@ func (g CloudbrainOneTrainTaskTemplate) CallCreationAPI(ctx *context.CreationCon
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}),
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath),
Params: form.ParamArray,
Spec: ctx.Spec,
CenterID: centerIds,
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath),
Params: form.ParamArray,
Spec: ctx.Spec,
},
},
}
@@ -142,6 +146,14 @@ func (g CloudbrainOneTrainTaskTemplate) CallRestartAPI(ctx *context.CreationCont
return response.SYSTEM_ERROR
}
form := ctx.Request
centerIds, bizErr := GetAvailableCenterIds(ctx.Spec, models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}, form.ComputeSource, form.ImageID, g.ClusterType)
if bizErr != nil {
return bizErr
}
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
@@ -153,16 +165,12 @@ func (g CloudbrainOneTrainTaskTemplate) CallRestartAPI(ctx *context.CreationCont
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}),
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
CenterID: centerIds,
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
},
},
}


+ 16
- 12
services/ai_task_service/task/cloudbrain_two_train_task.go View File

@@ -115,24 +115,28 @@ func (g CloudbrainTwoTrainTaskTemplate) CallCreationAPI(ctx *context.CreationCon
}
}
form := ctx.Request
centerIds, bizErr := GetAvailableCenterIds(ctx.Spec, models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}, form.ComputeSource, form.ImageID, g.ClusterType)
if bizErr != nil {
return bizErr
}
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
Description: form.Description,
Tasks: []entity.TrainTask{
{
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath),
CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}),
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
LogPath: ctx.GetContainerDataArray(entity.ContainerLogPath),
CenterID: centerIds,
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),


+ 10
- 6
services/ai_task_service/task/grampus_notebook_task.go View File

@@ -199,6 +199,14 @@ func (g GrampusNoteBookTaskTemplate) CallCreationAPI(ctx *context.CreationContex
return response.SYSTEM_ERROR
}
form := ctx.Request
centerIds, bizErr := GetAvailableCenterIds(ctx.Spec, models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}, form.ComputeSource, form.ImageID, g.ClusterType)
if bizErr != nil {
return bizErr
}
imageUrl := strings.TrimSpace(form.ImageUrl)
if form.ImageID != "" {
imageUrl = ""
@@ -215,12 +223,8 @@ func (g GrampusNoteBookTaskTemplate) CallCreationAPI(ctx *context.CreationContex
Code: ctx.GetContainerDataArray(entity.ContainerCode),
AutoStopDuration: autoStopDurationMs,
Capacity: setting.Capacity,
CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}),
Spec: ctx.Spec,
CenterID: centerIds,
Spec: ctx.Spec,
},
},
}


+ 11
- 7
services/ai_task_service/task/grampus_online_infer_task.go View File

@@ -95,6 +95,14 @@ func (g GrampusOnlineInferTaskTemplate) CallCreationAPI(ctx *context.CreationCon
return response.SYSTEM_ERROR
}
form := ctx.Request
centerIds, bizErr := GetAvailableCenterIds(ctx.Spec, models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}, form.ComputeSource, form.ImageID, g.ClusterType)
if bizErr != nil {
return bizErr
}
imageUrl := strings.TrimSpace(form.ImageUrl)
if form.ImageID != "" {
imageUrl = ""
@@ -119,13 +127,9 @@ func (g GrampusOnlineInferTaskTemplate) CallCreationAPI(ctx *context.CreationCon
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
AutoStopDuration: -1,
Capacity: setting.Capacity,
CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}),
Spec: ctx.Spec,
BootFile: ctx.Request.BootFile,
CenterID: centerIds,
Spec: ctx.Spec,
BootFile: ctx.Request.BootFile,
},
},
}


+ 29
- 21
services/ai_task_service/task/grampus_train_task.go View File

@@ -131,6 +131,14 @@ func (g GrampusTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext)
return response.SYSTEM_ERROR
}
form := ctx.Request
centerIds, bizErr := GetAvailableCenterIds(ctx.Spec, models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}, form.ComputeSource, form.ImageID, g.ClusterType)
if bizErr != nil {
return bizErr
}
imageUrl := strings.TrimSpace(form.ImageUrl)
if form.ImageID != "" {
imageUrl = ""
@@ -140,17 +148,13 @@ func (g GrampusTrainTaskTemplate) CallCreationAPI(ctx *context.CreationContext)
DisplayJobName: form.DisplayJobName,
Tasks: []entity.TrainTask{
{
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: imageUrl,
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}),
Name: form.JobName,
ResourceSpecId: ctx.Spec.SourceSpecId,
ImageId: form.ImageID,
ImageUrl: imageUrl,
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: centerIds,
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
@@ -182,6 +186,14 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *
return response.SYSTEM_ERROR
}
form := ctx.Request
centerIds, bizErr := GetAvailableCenterIds(ctx.Spec, models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}, form.ComputeSource, form.ImageID, g.ClusterType)
if bizErr != nil {
return bizErr
}
req := entity.CreateTrainTaskRequest{
Name: form.JobName,
DisplayJobName: form.DisplayJobName,
@@ -193,16 +205,12 @@ func (g GrampusTrainTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *
ImageUrl: strings.TrimSpace(form.ImageUrl),
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset),
Code: ctx.GetContainerDataArray(entity.ContainerCode),
CenterID: ctx.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: ctx.User.ID,
JobType: g.JobType,
HasInternet: form.HasInternet,
}),
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
CenterID: centerIds,
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel),
BootFile: form.BootFile,
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath),
Params: form.ParamArray,
Spec: ctx.Spec,
},
},
}


+ 1
- 1
services/ai_task_service/task/super_compute_task.go View File

@@ -29,7 +29,7 @@ func init() {
RegisterTask(models.JobTypeSuperCompute, entity.C2Net, t)
}

func (g SuperComputeTaskTemplate) GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError) {
func (g SuperComputeTaskTemplate) GetImages(computeSource models.ComputeSource, centerId ...string) ([]entity.ClusterImage, bool, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed")


+ 4
- 4
services/ai_task_service/task/task_base.go View File

@@ -60,7 +60,7 @@ type AITaskTemplate interface {
GetDebugUrl(cloudbrainId int64, fileName ...string) (string, *response.BizError)
GetOperationProfile(cloudbrainId int64) (*entity.OperationProfile, *response.BizError)
GetResourceUsage(opts entity.GetResourceUsageOpts) (*entity.ResourceUsage, *response.BizError)
GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError)
GetImages(computeSource models.ComputeSource, centerId ...string) ([]entity.ClusterImage, bool, *response.BizError)
GetSpecs(opts entity.GetSpecOpts) ([]*api.SpecificationShow, *response.BizError)
GetConfig(opts entity.AITaskConfigKey) *entity.AITaskBaseConfig
GetNodeInfo(cloudbrainId int64) ([]entity.AITaskNodeInfo, *response.BizError)
@@ -456,7 +456,7 @@ func (g DefaultAITaskTemplate) GetResourceUsage(opts entity.GetResourceUsageOpts
return res, nil
}

func (g DefaultAITaskTemplate) GetImages(computeSource models.ComputeSource) ([]entity.ClusterImage, bool, *response.BizError) {
func (g DefaultAITaskTemplate) GetImages(computeSource models.ComputeSource, centerId ...string) ([]entity.ClusterImage, bool, *response.BizError) {
c := g.GetMyCluster()
if c == nil {
log.Error("Get cluster failed")
@@ -470,12 +470,12 @@ func (g DefaultAITaskTemplate) GetImages(computeSource models.ComputeSource) ([]
images, customFlag, err = c.GetNotebookImages(entity.GetImageReq{
ComputeSource: computeSource,
JobType: g.JobType,
})
}, centerId...)
} else {
images, customFlag, err = c.GetTrainImages(entity.GetImageReq{
ComputeSource: computeSource,
JobType: g.JobType,
})
}, centerId...)
}
if err != nil {
log.Error("GetImages err.computeSource=%s err =%v", computeSource.Name, err)


+ 27
- 5
services/ai_task_service/task/task_creation_info.go View File

@@ -55,11 +55,6 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio

//生成任务名称
result.DisplayJobName = t.GetDisplayJobName(req.User.Name)
// 查询镜像列表
if images, canUseAll, err := t.GetImages(*req.ComputeSource); err == nil {
result.Images = images
result.CanUseAllImages = canUseAll
}
specsMap := make(map[string][]*structs.SpecificationShow, 0)
//查询有网资源规格
if specs, err := t.GetSpecs(entity.GetSpecOpts{
@@ -85,6 +80,12 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio
specsMap["all"] = specs
}
result.Specs = specsMap
// 查询镜像列表
if images, canUseAll, err := t.GetImages(*req.ComputeSource); err == nil {
result.Images = images
result.CanUseAllImages = canUseAll
}

result.Config = entity.AITaskCreationConfig{
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000,
}
@@ -96,3 +97,24 @@ func GetAITaskCreationInfo(req entity.GetAITaskCreationInfoReq) (*entity.Creatio
}
return result, nil
}

func GetAvailableImageInfoBySpec(req entity.GetAITaskCreationImageInfoReq) (*entity.ImageRequiredInfo, *response.BizError) {
result := &entity.ImageRequiredInfo{}
t, err := GetAITaskTemplate(req.JobType, req.ClusterType)

if err != nil {
log.Error("param error")
return nil, err
}
centerIds := req.Spec.GetAvailableCenterIds(models.GetAvailableCenterIdOpts{
UserId: req.UserID,
JobType: req.JobType,
})

if images, canUseAll, err := t.GetImages(*req.ComputeSource, centerIds...); err == nil {
result.Images = images
result.CanUseAllImages = canUseAll
}
return result, nil

}

+ 3
- 0
services/ai_task_service/task/task_extend.go View File

@@ -96,6 +96,7 @@ func getCloudBrainDatasetInfo4Local(uuid string, datasetname string, isNeedDown
link := ""
url := ""
isDelete := false
var size int64
attachment, err := models.GetAttachmentByUUID(uuidStr)
if err != nil {
log.Error("GetAttachmentByUUID failed:%v", err.Error())
@@ -106,6 +107,7 @@ func getCloudBrainDatasetInfo4Local(uuid string, datasetname string, isNeedDown
isDelete = true
} else {
name = attachment.Name
size = attachment.Size
dataset, err := models.GetDatasetByID(attachment.DatasetID)
if err != nil {
log.Error("GetDatasetByID failed:%v", err.Error())
@@ -128,6 +130,7 @@ func getCloudBrainDatasetInfo4Local(uuid string, datasetname string, isNeedDown
RepositoryLink: link,
IsDelete: isDelete,
UUID: uuidStr,
Size: size,
})
}
log.Info("dataset length=" + fmt.Sprint(len(datasetDownload)))


+ 44
- 0
services/ai_task_service/task/task_service.go View File

@@ -11,6 +11,7 @@ import (
"strings"

"code.gitea.io/gitea/entity"
"code.gitea.io/gitea/manager/client/grampus"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/convert"
"code.gitea.io/gitea/modules/git"
@@ -729,6 +730,49 @@ func SyncAITaskStatus() {
}
}
}
func GetAvailableCenterIds(specification *models.Specification, opts models.GetAvailableCenterIdOpts, computeSource *models.ComputeSource,
imageId string, clusterType entity.ClusterType) ([]string, *response.BizError) {
centerIds := specification.GetAvailableCenterIds(opts)

if len(centerIds) == 0 || imageId == "" || clusterType != entity.C2Net {
return centerIds, nil
}

processType := computeSource.FullName
images, err := grampus.GetImages(processType, string(opts.JobType))
if err != nil {
log.Warn("can not get image info from grampus", err)
return centerIds, nil
}
var imageCenterIds []string
for _, image := range images.Infos {
if image.ID == imageId {
for _, centerInfo := range image.AICenterImage {
imageCenterIds = append(imageCenterIds, centerInfo.AICenterID)
}
break
}
}
if len(imageCenterIds) == 0 {
return centerIds, nil
}

var intersectionCenterIds []string
for _, centerId := range centerIds {
for _, imageCenterId := range imageCenterIds {
if centerId == imageCenterId {
intersectionCenterIds = append(intersectionCenterIds, centerId)
break
}
}
}
if len(intersectionCenterIds) == 0 {
return intersectionCenterIds, response.NO_CENTER_MATCH
}

return intersectionCenterIds, nil

}

func HandleNoJobIdAITasks() {
defer func() {


+ 16
- 0
web_src/vuepages/apis/modules/cloudbrain.js View File

@@ -15,6 +15,22 @@ export const getAiTaskPrepareInfo = (params) => {
});
}

// 由资源规格查询可用镜像
// params: repoOwnerName, repoName, job_type,compute_source,cluster_type,spec_id
export const getAiTaskImgesBySpec = (params) => {
return service({
url: `/api/v1/${params.repoOwnerName}/${params.repoName}/ai_task/creation/image_by_spec`,
method: 'get',
params: {
job_type: params.jobType,
compute_source: params.computeSource,
cluster_type: params.clusterType,
spec_id: params.spec,
has_internet: params.hasInternet,
},
});
}

// 创建AI任务
export const createAiTask = (data) => {
return service({


+ 41
- 1
web_src/vuepages/components/cloudbrain/ImageSelectV2.vue View File

@@ -14,15 +14,21 @@

<script>

import { getAiTaskImgesBySpec } from '~/apis/modules/cloudbrain';
export default {
name: 'ImageSelectV2',
props: {
configs: { type: Object, required: true, },
value: { type: Object, required: true },
images: { type: Array, required: true, },
spec: { type: String, required: true, },
networkType: { type: String, required: true },
required: { type: Boolean, default: true },
},
data() {
return {
repoOwnerName: location.pathname.split('/')[1],
repoName: location.pathname.split('/')[2],
currentValue: '',
errStatus: false,
};
@@ -35,7 +41,41 @@ export default {
newVal = newVal === undefined ? {} : newVal;
this.currentValue = (newVal.image_id || '').toString();
}
}
},
spec: {
immediate: true,
handler(newVal) {
const imagev2Cfg = this.configs?.form?.imagev2;
if (imagev2Cfg && imagev2Cfg.relatedSpec) {
if (!newVal) {
this.$emit('changeImages', []);
} else {
let networkType = 0; // all
if (this.networkType == 'no_internet') {
networkType = 1;
} else if (this.networkType == 'has_internet') {
networkType = 2;
}
getAiTaskImgesBySpec({
repoOwnerName: this.repoOwnerName,
repoName: this.repoName,
jobType: this.configs.taskType,
computeSource: this.configs.computerResouce,
clusterType: this.configs.clusterType,
spec: newVal,
hasInternet: networkType,
}).then(res => {
const data = res.data;
if (data.code == 0) {
this.$emit('changeImages', data?.data?.images || []);
}
}).catch(err => {
console.log(err);
})
}
}
}
},
},
methods: {
check() {


+ 2
- 2
web_src/vuepages/pages/cloudbrain/configs.js View File

@@ -113,7 +113,7 @@ export const CreatePageConfigs = {
taskDescr: { required: false, },
branchName: { required: true, },
model: { required: false, multiple: true },
imagev2: { required: true },
imagev2: { required: true, relatedSpec: true },
dataset: { required: false, type: 1, useExceedSize: true },
networkType: { required: true },
spec: { required: true },
@@ -324,7 +324,7 @@ export const CreatePageConfigs = {
taskDescr: { required: false, },
branchName: { required: true, },
model: { required: false, multiple: false },
imagev2: { required: true },
imagev2: { required: true, relatedSpec: true },
bootFile: { required: true, sampleUrl: 'https://openi.pcl.ac.cn/OpenIOSSG/MNIST_Example/src/branch/master/train_for_c2net.py' },
dataset: { required: true, type: 1 },
runParameters: { required: false },


+ 24
- 6
web_src/vuepages/pages/cloudbrain/create/index.vue View File

@@ -26,6 +26,12 @@
<div class="main-title">{{ $t('cloudbrainObj.basicInfo') }}:</div>
<FormTop ref="formTopRef" :repoOwnerName="repoOwnerName" :repoName="repoName" :configs="pageCfg"
:queueNum="queueNum"></FormTop>
<NetworkType ref="networkTypeRef" v-if="formCfg.networkType" v-model="state.networkType"></NetworkType>
<SpecSelect ref="specRef" v-if="formCfg.spec" v-model="state.spec" :required="formCfg.spec.required"
:configs="specConfigs" :workServerNum="state.workServerNum" :networkType="state.networkType"></SpecSelect>
<WorkServerNum ref="workServerNumRef" v-if="formCfg.workServerNum && workServerNumList.length > 1"
v-model="state.workServerNum" :required="formCfg.workServerNum.required" :data="workServerNumList">
</WorkServerNum>
<TaskName ref="taskNameRef" v-if="formCfg.taskName" v-model="state.taskName" autofocus
:required="formCfg.taskName.required" :userName="repoOwnerName">
</TaskName>
@@ -49,6 +55,7 @@
:required="formCfg.imagev1.required" :type="formCfg.imagev1.type != undefined ? formCfg.imagev1.type : 0">
</ImageSelectV1>
<ImageSelectV2 ref="imagev2Ref" v-if="formCfg.imagev2" v-model="state.image" :images="imageList"
:configs="pageCfg" :spec="state.spec" :networkType="state.networkType" @changeImages="changeImages"
:required="formCfg.imagev2.required">
</ImageSelectV2>
<BootFile ref="bootFileRef" v-if="formCfg.bootFile" v-model="state.bootFile"
@@ -60,12 +67,6 @@
<RunParameters ref="runParametersRef" v-if="formCfg.runParameters" v-model="state.runParameters"
:required="formCfg.runParameters.required">
</RunParameters>
<NetworkType ref="networkTypeRef" v-if="formCfg.networkType" v-model="state.networkType"></NetworkType>
<SpecSelect ref="specRef" v-if="formCfg.spec" v-model="state.spec" :required="formCfg.spec.required"
:configs="specConfigs" :workServerNum="state.workServerNum" :networkType="state.networkType"></SpecSelect>
<WorkServerNum ref="workServerNumRef" v-if="formCfg.workServerNum && workServerNumList.length > 1"
v-model="state.workServerNum" :required="formCfg.workServerNum.required" :data="workServerNumList">
</WorkServerNum>
<div class="form-row" v-if="this.isModifyTask && (pageCfg.modify && pageCfg.modify.showIsContinue)">
<div class="title"></div>
<div class="content">
@@ -426,6 +427,23 @@ export default {
console.log(err);
});
},
changeImages(images) {
this.imageList = images || [];
let image = this.imageList[0];
if (this.isModifyTask && this.modeifyTaskId && this.state.image.image_id) {
const matchImage = this.imageList.filter(item => item.image_id == this.state.image.image_id)[0];
if (matchImage) {
image = matchImage;
}
}
if (image) {
this.state.image.image_id = image.image_id;
this.state.image.image_name = image.image_id;
} else {
this.state.image.image_id = '';
this.state.image.image_name = '';
}
},
transformTreeData(data) {
for (let i = 0, iLen = data.length; i < iLen; i++) {
const dataI = data[i];


+ 1
- 1
web_src/vuepages/pages/supercompute/create/index.vue View File

@@ -46,7 +46,7 @@
<ModelSelect ref="modelRef" v-if="formCfg.model" v-model="state.model" :required="formCfg.model.required"
:multiple="formCfg.model.multiple" :repoOwnerName="repoOwnerName" :repoName="repoName"></ModelSelect>
<ImageSelectV2 ref="imagev2Ref" v-if="formCfg.imagev2" v-model="state.image" :images="imageList"
:required="formCfg.imagev2.required">
:configs="pageCfg" :spec="state.spec" :networkType="state.networkType" :required="formCfg.imagev2.required">
</ImageSelectV2>
<DatasetSelect ref="datasetRef" v-if="formCfg.dataset" v-model="state.dataset"
:required="formCfg.dataset.required" :type="formCfg.dataset.type != undefined ? formCfg.dataset.type : -1"


Loading…
Cancel
Save