#2379 grampus-specialpool 智算网络专属资源池

Merged
lewis merged 5 commits from grampus-specialpool into V20220630 1 year ago
  1. +11
    -0
      models/cloudbrain.go
  2. +35
    -1
      modules/grampus/grampus.go
  3. +7
    -4
      modules/setting/setting.go
  4. +1
    -0
      options/locale/locale_en-US.ini
  5. +2
    -0
      options/locale/locale_zh-CN.ini
  6. +57
    -0
      routers/repo/grampus.go
  7. +6
    -6
      templates/repo/grampus/trainjob/gpu/new.tmpl
  8. +6
    -6
      templates/repo/grampus/trainjob/npu/new.tmpl

+ 11
- 0
models/cloudbrain.go View File

@@ -565,6 +565,17 @@ type FlavorInfo struct {
Desc string `json:"desc"`
}

type SpecialPools struct {
Pools []*SpecialPool `json:"pools"`
}
type SpecialPool struct {
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
}

type ImageInfosModelArts struct {
ImageInfo []*ImageInfoModelArts `json:"image_info"`
}


+ 35
- 1
modules/grampus/grampus.go View File

@@ -1,12 +1,16 @@
package grampus

import (
"encoding/json"
"strings"

"code.gitea.io/gitea/modules/setting"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/timeutil"
"strings"
)

const (
@@ -28,6 +32,8 @@ var (
poolInfos *models.PoolInfos
FlavorInfos *models.FlavorInfos
ImageInfos *models.ImageInfosModelArts

SpecialPools *models.SpecialPools
)

type GenerateTrainJobReq struct {
@@ -63,6 +69,27 @@ type GenerateTrainJobReq struct {

func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
createTime := timeutil.TimeStampNow()

var CenterID []string
var CenterName []string

if SpecialPools != nil {
for _, pool := range SpecialPools.Pools {
if !pool.IsExclusive && strings.Contains(req.ComputeResource, pool.Type) {
org, _ := models.GetOrgByName(pool.Org)
if org != nil {
isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID)
if isOrgMember {
for _, info := range pool.Pool {
CenterID = append(CenterID, info.Queue)
CenterName = append(CenterName, info.Value)
}
}
}
}
}
}

jobResult, err := createJob(models.CreateGrampusJobRequest{
Name: req.JobName,
Tasks: []models.GrampusTasks{
@@ -72,6 +99,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
ResourceSpecId: req.ResourceSpecId,
ImageId: req.ImageId,
ImageUrl: req.ImageUrl,
CenterID: CenterID,
CenterName: CenterName,
ReplicaNum: 1,
},
},
@@ -136,3 +165,8 @@ func TransTrainJobStatus(status string) string {

return strings.ToUpper(status)
}
func InitSpecialPool() {
if SpecialPools == nil && setting.Grampus.SpecialPools != "" {
json.Unmarshal([]byte(setting.Grampus.SpecialPools), &SpecialPools)
}
}

+ 7
- 4
modules/setting/setting.go View File

@@ -530,10 +530,11 @@ var (

//grampus config
Grampus = struct {
Env string
Host string
UserName string
Password string
Env string
Host string
UserName string
Password string
SpecialPools string
}{}

//elk config
@@ -1412,6 +1413,8 @@ func GetGrampusConfig() {
Grampus.Host = sec.Key("SERVER_HOST").MustString("")
Grampus.UserName = sec.Key("USERNAME").MustString("")
Grampus.Password = sec.Key("PASSWORD").MustString("")
Grampus.SpecialPools = sec.Key("SPECIAL_POOL").MustString("")

}

func SetRadarMapConfig() {


+ 1
- 0
options/locale/locale_en-US.ini View File

@@ -1178,6 +1178,7 @@ model.manage.model_accuracy = Model Accuracy

grampus.train_job.ai_center = AI Center
grampus.dataset_path_rule = The code is storaged in /cache/code;the dataset is storaged in /cache/dataset;and please put your model into /cache/output, then you can download it online。
grampus.no_operate_right = You have no right to do this operation.

template.items = Template Items
template.git_content = Git Content (Default Branch)


+ 2
- 0
options/locale/locale_zh-CN.ini View File

@@ -1193,6 +1193,8 @@ model.manage.model_accuracy = 模型精度
grampus.train_job.ai_center=智算中心
grampus.dataset_path_rule = 训练脚本存储在/cache/code中,数据集存储在/cache/dataset中,训练输出请存储在/cache/output中以供后续下载。

grampus.no_operate_right = 您没有权限创建这类任务。

template.items=模板选项
template.git_content=Git数据(默认分支)
template.git_hooks=Git 钩子


+ 57
- 0
routers/repo/grampus.go View File

@@ -71,6 +71,25 @@ func grampusTrainJobNewDataPrepare(ctx *context.Context, processType string) err
ctx.Data["images"] = images.Infos
}

grampus.InitSpecialPool()

ctx.Data["GPUEnabled"] = true
ctx.Data["NPUEnabled"] = true

if grampus.SpecialPools != nil {
for _, pool := range grampus.SpecialPools.Pools {
if pool.IsExclusive {
org, _ := models.GetOrgByName(pool.Org)
if org != nil {
isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID)
if !isOrgMember {
ctx.Data[pool.Type+"Enabled"] = false
}
}
}
}
}

//get valid resource specs
specs, err := grampus.GetResourceSpecs(processType)
if err != nil {
@@ -128,10 +147,18 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
image := strings.TrimSpace(form.Image)

if !jobNamePattern.MatchString(displayJobName) {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form)
return
}

errStr := checkSpecialPool(ctx, "GPU")
if errStr != "" {
ychao_1983 marked this conversation as resolved
lewis commented 1 year ago
Review
这个错误是否需要return
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form)
return
}

//check count limit
count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.GPUResource)
if err != nil {
@@ -263,6 +290,28 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
}

func checkSpecialPool(ctx *context.Context, resourceType string) string {
grampus.InitSpecialPool()
if grampus.SpecialPools != nil {
for _, pool := range grampus.SpecialPools.Pools {

if pool.IsExclusive && pool.Type == resourceType {

org, _ := models.GetOrgByName(pool.Org)
if org != nil {
isOrgMember, _ := models.IsOrganizationMember(org.ID, ctx.User.ID)
if !isOrgMember {
return ctx.Tr("repo.grampus.no_operate_right")
}
}
}

}

}
return ""
}

func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrainJobForm) {
displayJobName := form.DisplayJobName
jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
@@ -281,10 +330,18 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
engineName := form.EngineName

if !jobNamePattern.MatchString(displayJobName) {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobNPUNew, &form)
return
}

errStr := checkSpecialPool(ctx, "NPU")
ychao_1983 marked this conversation as resolved
lewis commented 1 year ago
Review
同上
if errStr != "" {
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr(errStr, tplGrampusTrainJobGPUNew, &form)
return
}

//check count limit
count, err := models.GetGrampusCountByUserID(ctx.User.ID, string(models.JobTypeTrain), models.NPUResource)
if err != nil {


+ 6
- 6
templates/repo/grampus/trainjob/gpu/new.tmpl View File

@@ -89,23 +89,23 @@
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/ {{if.NPUEnabled}}npu{{else}}gpu{{end}}/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required unite min_title inline field">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<a class="active item" href="{{.RepoLink}}/grampus/train-job/gpu/create">
<a {{if.GPUEnabled}}class="active item" href="{{.RepoLink}}/grampus/train-job/gpu/create"{{else}}href="javascript:return false;" class="item disabled" {{end}}>
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a>
<a class="item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<a {{if.NPUEnabled}}class="item" href="{{.RepoLink}}/grampus/train-job/npu/create"{{else}}href="javascript:return false;" class="item disabled" {{end}} >
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>


+ 6
- 6
templates/repo/grampus/trainjob/npu/new.tmpl View File

@@ -85,23 +85,23 @@
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_openi"}}
</a>
<a class="active item" href="{{.RepoLink}}/grampus/train-job/npu/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
<a class="active item" href="{{.RepoLink}}/grampus/train-job/ {{if.NPUEnabled}}npu{{else}}gpu{{end}}/create">
<svg class="svg" sxmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16"><path fill="none" d="M0 0h24v24H0z"></path><path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-2.29-2.333A17.9 17.9 0 0 1 8.027 13H4.062a8.008 8.008 0 0 0 5.648 6.667zM10.03 13c.151 2.439.848 4.73 1.97 6.752A15.905 15.905 0 0 0 13.97 13h-3.94zm9.908 0h-3.965a17.9 17.9 0 0 1-1.683 6.667A8.008 8.008 0 0 0 19.938 13zM4.062 11h3.965A17.9 17.9 0 0 1 9.71 4.333 8.008 8.008 0 0 0 4.062 11zm5.969 0h3.938A15.905 15.905 0 0 0 12 4.248 15.905 15.905 0 0 0 10.03 11zm4.259-6.667A17.9 17.9 0 0 1 15.973 11h3.965a8.008 8.008 0 0 0-5.648-6.667z"></path></svg>
{{.i18n.Tr "cloudbrain.resource_cluster_c2net"}}(Beta)
</a>
</div>
</div>
<div class="required unite min_title inline field">
<label style="font-weight: normal;">{{.i18n.Tr "cloudbrain.compute_resource"}}</label>
<div class="ui blue mini menu compact selectcloudbrain">
<!--a class="item" href="{{.RepoLink}}/grampus/train-job/gpu/create" type="hidden">
<a {{if.GPUEnabled}}class="item" href="{{.RepoLink}}/grampus/train-job/gpu/create"{{else}}href="javascript:return false;" class="item disabled" {{end}}>
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>
</svg>
CPU/GPU
</a-->
<a class="active item" href="{{.RepoLink}}/grampus/train-job/npu/create">
</a>
<a {{if.NPUEnabled}}class="active item" href="{{.RepoLink}}/grampus/train-job/npu/create"{{else}}href="javascript:return false;" class="item disabled" {{end}} >
<svg class="svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="16" height="16">
<path fill="none" d="M0 0h24v24H0z"/>
<path d="M3 2.992C3 2.444 3.445 2 3.993 2h16.014a1 1 0 0 1 .993.992v18.016a.993.993 0 0 1-.993.992H3.993A1 1 0 0 1 3 21.008V2.992zM19 11V4H5v7h14zm0 2H5v7h14v-7zM9 6h6v2H9V6zm0 9h6v2H9v-2z"/>


Loading…
Cancel
Save