#4773 fix-4698

Merged
zouap merged 3 commits from fix-4698 into V20231018 7 months ago
  1. +28
    -0
      models/cloudbrain.go
  2. +12
    -0
      modules/cron/tasks_basic.go
  3. +18
    -0
      modules/setting/setting.go
  4. +1
    -0
      options/locale/locale_en-US.ini
  5. +1
    -0
      options/locale/locale_zh-CN.ini
  6. +2
    -2
      routers/api/v1/api.go
  7. +1
    -0
      routers/response/response_list.go
  8. +1
    -0
      services/ai_task_service/task/cloudbrain_one_notebook_task.go
  9. +1
    -0
      services/ai_task_service/task/cloudbrain_two_notebook_task.go
  10. +1
    -0
      services/ai_task_service/task/grampus_notebook_task.go
  11. +14
    -0
      services/ai_task_service/task/opt_handler.go
  12. +43
    -0
      services/ai_task_service/task/task_service.go

+ 28
- 0
models/cloudbrain.go View File

@@ -3452,6 +3452,34 @@ func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) {
Find(&cloudbrains)
}

type CountPerUserID struct {
Count int64
UserID int64
}

func GetNotebookCountGreaterThanN(n int) ([]CountPerUserID, error) {
cpuis := []CountPerUserID{}
err := x.
Table("cloudbrain").
GroupBy("user_id").Having("count(*)>"+strconv.Itoa(n)).
Select("user_id, count(*) AS count").
Where("job_type=? and (deleted_at=? or deleted_at is NULL)", "DEBUG", "0001-01-01 00:00:00").OrderBy("count(*) desc").
Find(&cpuis)
return cpuis, err

}
func GetNotebooksByUser(uid int64, offset int) ([]int64, error) {
var ints []int64
err := x.Table("cloudbrain").Cols("id").Where("job_type=? and user_id=? and (deleted_at=? or deleted_at is NULL)", "DEBUG", uid, "0001-01-01 00:00:00").Desc("id").Limit(1000, offset).Find(&ints)
return ints, err
}

func GetNotebooksCountByUser(uid int64) (int64, error) {
cloudbrain := new(Cloudbrain)
return x.Where("user_id=? and job_type=?", uid, "DEBUG").Count(cloudbrain)

}

func GetCloudbrainWithDeletedByIDs(ids []int64) ([]*Cloudbrain, error) {
cloudbrains := make([]*Cloudbrain, 0)
return cloudbrains, x.


+ 12
- 0
modules/cron/tasks_basic.go View File

@@ -209,6 +209,17 @@ func registerHandleClearCloudbrainResult() {
})
}

func registerHandleClearNotebook() {
RegisterTaskFatal("handle_notebook_clear", &BaseConfig{
Enabled: true,
RunAtStart: setting.NotebookStrategy.RunAtStart,
Schedule: setting.NotebookStrategy.Cron,
}, func(ctx context.Context, _ *models.User, _ Config) error {
task.ClearNotebook()
return nil
})
}

func registerHandleSummaryStatistic() {
RegisterTaskFatal("handle_summary_statistic", &BaseConfig{
Enabled: true,
@@ -379,6 +390,7 @@ func initBasicTasks() {
registerHandleRepoAndUserStatistic()
registerHandleSummaryStatistic()
registerHandleClearCloudbrainResult()
registerHandleClearNotebook()

registerSyncCloudbrainStatus()
registerHandleOrgStatistic()


+ 18
- 0
modules/setting/setting.go View File

@@ -652,6 +652,13 @@ var (
Cron string
RunAtStart bool
}{}
NotebookStrategy = struct {
ClearEnabled bool
ClearBatchSize int
MaxNumberPerUser int
Cron string
RunAtStart bool
}{}

C2NetInfos *C2NetSqInfos
CenterInfos *AiCenterInfos
@@ -1741,6 +1748,7 @@ func NewContext() {
getModelSafetyConfig()
getModelAppConfig()
getClearStrategy()
getNotebookStrategy()
NewScreenMapConfig()
}

@@ -1850,6 +1858,16 @@ func getClearStrategy() {
ClearStrategy.RunAtStart = sec.Key("RUN_AT_START").MustBool(false)
}

func getNotebookStrategy() {

sec := Cfg.Section("notebook_strategy")
NotebookStrategy.ClearEnabled = sec.Key("CLEAR_ENABLED").MustBool(false)
NotebookStrategy.ClearBatchSize = sec.Key("CLEAR_BATCH_SIZE").MustInt(300)
NotebookStrategy.MaxNumberPerUser = sec.Key("MAX_NUMBER").MustInt(5)
NotebookStrategy.Cron = sec.Key("CRON").MustString("0 0/0 2-8 * * ?")
NotebookStrategy.RunAtStart = sec.Key("RUN_AT_START").MustBool(false)
}

func GetGrampusConfig() {
sec := Cfg.Section("grampus")



+ 1
- 0
options/locale/locale_en-US.ini View File

@@ -3466,6 +3466,7 @@ branch_not_exists = The branch does not exist. Please refresh and select again.
dataset_number_over_limit = The dataset count exceed the limit
result_cleared=The files of the task have been cleared, can not restart or retrain any more, please create a new task instead
model_not_exist=The model in the task does not exist or has been deleted
too_many_notebook=A user can have up to 5 debug tasks, please try again after delete some debug task.

[common_error]
system_error = System error.Please try again later


+ 1
- 0
options/locale/locale_zh-CN.ini View File

@@ -3489,6 +3489,7 @@ branch_not_exists = 代码分支不存在,请刷新后重试
dataset_number_over_limit = 选择的数据集文件数量超出限制
result_cleared=源任务的文件已被清理,无法再次调试或复用训练结果,请新建任务。
model_not_exist=选择的预训练模型不存在或者已被删除
too_many_notebook=最多只能保留5个调试任务,请删除一些调试任务再重试。


[common_error]


+ 2
- 2
routers/api/v1/api.go View File

@@ -651,8 +651,8 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/:username/:reponame", func() {
m.Group("/ai_task", func() {
m.Post("/create", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), bind(entity.CreateReq{}), ai_task.CreateAITask)
m.Post("/stop", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.StopAITask)
m.Post("/del", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.DelAITask)
m.Post("/stop", reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.StopAITask)
m.Post("/del", reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.DelAITask)
m.Post("/restart", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrAITaskCreator(), ai_task.RestartAITask)
m.Get("/debug_url", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetNotebookUrl)
m.Get("/creation/required", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetCreationRequiredInfo)


+ 1
- 0
routers/response/response_list.go View File

@@ -36,3 +36,4 @@ var LOAD_CODE_FAILED = &BizError{Code: 2019, DefaultMsg: "Fail to load code, ple
var BRANCH_NOT_EXISTS = &BizError{Code: 2020, DefaultMsg: "The branch does not exist", TrCode: "ai_task.branch_not_exists"}
var MODEL_NUM_OVER_LIMIT = &BizError{Code: 2021, DefaultMsg: "The number of models exceeds the limit of 30", TrCode: "repo.debug.manage.model_num_over_limit"}
var DATASET_NUMBER_OVER_LIMIT = &BizError{Code: 2022, DefaultMsg: "The dataset count exceed the limit", TrCode: "ai_task.dataset_number_over_limit"}
var NOTEBOOK_EXCEED_MAX_NUM = &BizError{Code: 2023, DefaultMsg: "You can have up to 5 Debug Tasks, please try again after delete some tasks. ", TrCode: "ai_task.too_many_notebook"}

+ 1
- 0
services/ai_task_service/task/cloudbrain_one_notebook_task.go View File

@@ -83,6 +83,7 @@ func (t CloudbrainOneNotebookTaskTemplate) Create(ctx *context.CreationContext)
c := &CreateOperator{}
err := c.Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.CheckNotebookCount).
Next(t.CheckDisplayJobName).
Next(t.LoadSpec).
Next(t.CheckPointBalance).


+ 1
- 0
services/ai_task_service/task/cloudbrain_two_notebook_task.go View File

@@ -68,6 +68,7 @@ func (t CloudbrainTwoNotebookTaskTemplate) Create(ctx *context.CreationContext)
err := c.Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.CheckDisplayJobName).
Next(t.CheckNotebookCount).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasets).


+ 1
- 0
services/ai_task_service/task/grampus_notebook_task.go View File

@@ -149,6 +149,7 @@ func (t GrampusNoteBookTaskTemplate) Create(ctx *context.CreationContext) (*enti
err := c.Next(t.CheckParamFormat).
Next(t.CheckMultiRequest).
Next(t.CheckDisplayJobName).
Next(t.CheckNotebookCount).
Next(t.LoadSpec).
Next(t.CheckPointBalance).
Next(t.CheckDatasets).


+ 14
- 0
services/ai_task_service/task/opt_handler.go View File

@@ -42,6 +42,7 @@ type CreationHandler interface {
CallRestartAPI(ctx *context.CreationContext) *response.BizError
NotifyCreation(ctx *context.CreationContext) *response.BizError
HandleErr4Async(ctx *context.CreationContext) *response.BizError
CheckNotebookCount(ctx *context.CreationContext) *response.BizError
}

//DefaultCreationHandler CreationHandler的默认实现,公共逻辑可以在此结构体中实现
@@ -685,3 +686,16 @@ func (DefaultCreationHandler) HandleErr4Async(ctx *context.CreationContext) *res
func (g DefaultCreationHandler) NotifyCreation(ctx *context.CreationContext) *response.BizError {
return nil
}

func (DefaultCreationHandler) CheckNotebookCount(ctx *context.CreationContext) *response.BizError {
if ctx.Request.JobType == models.JobTypeDebug {
count, err := models.GetNotebooksCountByUser(ctx.User.ID)
if err != nil {
log.Warn("can not get user notebook count", err)
}
if count >= int64(setting.NotebookStrategy.MaxNumberPerUser) {
return response.NOTEBOOK_EXCEED_MAX_NUM
}
}
return nil
}

+ 43
- 0
services/ai_task_service/task/task_service.go View File

@@ -805,3 +805,46 @@ func HandleNewAITaskDelete(cloudbrainId int64) (isHandled bool, err error) {
}
return true, nil
}

func ClearNotebook() {
defer func() {
if err := recover(); err != nil {
log.Error("panic occurred:", err)
}
}()

if !setting.NotebookStrategy.ClearEnabled {
return
}

userCountInfo, err := models.GetNotebookCountGreaterThanN(setting.NotebookStrategy.MaxNumberPerUser)
if err != nil {
log.Error("can not get Notebook user count info", err)
return
}
deleteCount := 0
for _, userCount := range userCountInfo {
ids, err := models.GetNotebooksByUser(userCount.UserID, setting.NotebookStrategy.MaxNumberPerUser)
if err != nil {
log.Error("can not get Notebook by user id", err)
continue
}
for _, id := range ids {
t, _ := GetAITaskTemplateByCloudbrainId(id)
if t == nil {
log.Error("can not get task template")
continue
}
err := t.Delete(id)
if err != nil {
log.Error("Delete error.%v", err)
continue
}
deleteCount += 1
if deleteCount >= setting.NotebookStrategy.ClearBatchSize {
return
}
}
}

}

Loading…
Cancel
Save