@@ -172,6 +172,21 @@ | |||
> [attachment] | |||
> PATH = /data/gitea/attachments | |||
> | |||
> ENABLED = true | |||
> MAX_SIZE = 1048576 | |||
> ALLOWED_TYPES = */* | |||
> MAX_FILES = 10 | |||
> STORE_TYPE = minio | |||
> MINIO_ENDPOINT = | |||
> | |||
> MINIO_ACCESS_KEY_ID = | |||
> MINIO_SECRET_ACCESS_KEY = | |||
> MINIO_BUCKET = | |||
> MINIO_LOCATION = | |||
> MINIO_BASE_PATH = attachment/ | |||
> MINIO_USE_SSL = true | |||
> MINIO_REAL_PATH = | |||
> | |||
> [log] | |||
> MODE = file | |||
> LEVEL = info | |||
@@ -419,41 +419,40 @@ | |||
@media only screen and (min-width: 1920px) { | |||
} | |||
/* rotation3D */ | |||
#app{ | |||
position: relative; | |||
width: 800px; | |||
margin: 0 auto; | |||
z-index: 4; | |||
} | |||
.rotation3D-baseMap{ | |||
position: absolute; left: 0; right: 0; top: 104px; margin: auto; | |||
width: 800px; height: 516px; | |||
background: url("../rotation3D/img/baseMap.png") no-repeat; | |||
background-size: cover; | |||
} | |||
.rotation3D-baseMap::before{ | |||
position: absolute; | |||
margin: auto; z-index: 99; | |||
left:50%; top: -150px; | |||
transform:translate(-50%,0); | |||
width: 342px; height: 470px; display: block; content: ''; | |||
background: url("../rotation3D/img/baseLogo.svg"); | |||
/*animation: 10s bounceUpDown infinite;*/ | |||
} | |||
.rotation3D-baseMap::after{ | |||
position: absolute; | |||
margin: auto; z-index: 100; | |||
left:50%; top:0; | |||
transform:translate(-50%,0); | |||
width: 110px; height: 86px; display: block; content: ''; | |||
background: url("../rotation3D/img/brain.svg"); | |||
animation: 6s bounceUpDown infinite; | |||
mix-blend-mode: color-dodge; | |||
} | |||
@keyframes bounceUpDown{ | |||
0% {transform: translate(-50%, 0px);} | |||
50% {transform: translate(-50%, -15px);} | |||
100% {transform: translate(-50%, 0px);} | |||
/* rotation3D */ | |||
#app{ | |||
position: relative; | |||
width: 800px; | |||
margin: 0 auto; | |||
z-index: 4; | |||
} | |||
.rotation3D-baseMap{ | |||
position: absolute; left: 0; right: 0; top: 104px; margin: auto; | |||
width: 800px; height: 516px; | |||
background: url("../rotation3D/img/baseMap.png") no-repeat; | |||
background-size: cover; | |||
} | |||
.rotation3D-baseMap::before{ | |||
position: absolute; | |||
margin: auto; z-index: 99; | |||
left:50%; top: -150px; | |||
transform:translate(-50%,0); | |||
width: 342px; height: 470px; display: block; content: ''; | |||
background: url("../rotation3D/img/baseLogo.svg"); | |||
/*animation: 10s bounceUpDown infinite;*/ | |||
} | |||
.rotation3D-baseMap::after{ | |||
position: absolute; | |||
margin: auto; z-index: 100; | |||
left:50%; top:0; | |||
transform:translate(-50%,0); | |||
width: 110px; height: 86px; display: block; content: ''; | |||
background: url("../rotation3D/img/brain.svg"); | |||
animation: 6s bounceUpDown infinite; | |||
mix-blend-mode: color-dodge; | |||
} | |||
@keyframes bounceUpDown{ | |||
0% {transform: translate(-50%, 0px);} | |||
50% {transform: translate(-50%, -15px);} | |||
100% {transform: translate(-50%, 0px);} | |||
} |
@@ -0,0 +1,208 @@ | |||
package entity | |||
import ( | |||
"strings" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/git" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/structs" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
) | |||
//todo 暂时保留之前各种云脑属性的定义 | |||
type CreateReq struct { | |||
JobType models.JobType `json:"job_type" binding:"Required"` | |||
DisplayJobName string `json:"display_job_name" binding:"Required"` | |||
JobName string `json:"job_name"` | |||
SpecId int64 `json:"spec_id" binding:"Required"` | |||
ComputeSourceStr string `json:"compute_source" binding:"Required"` | |||
Cluster ClusterType `json:"cluster" binding:"Required"` | |||
WorkServerNumber int `json:"work_server_number"` | |||
BranchName string `json:"branch_name"` | |||
PreTrainModelUrl string `json:"pretrain_model_url"` | |||
PretrainModelCkptName string `json:"pretrain_model_ckpt_name"` | |||
ImageUrl string `json:"image_url"` | |||
ImageID string `json:"image_id"` | |||
ImageName string `json:"image_name"` | |||
PretrainModelName string `json:"pretrain_model_name"` | |||
PretrainModelVersion string `json:"pretrain_model_version"` | |||
PretrainModelId string `json:"pretrain_model_id"` | |||
Description string `json:"description"` | |||
LabelName string `json:"label_names"` | |||
DatasetUUIDStr string `json:"dataset_uuid_str"` | |||
Params string `json:"run_para_list"` | |||
BootFile string `json:"boot_file"` | |||
ParamArray models.Parameters | |||
ComputeSource *models.ComputeSource | |||
ReqCommitID string | |||
IsFileNoteBookRequest bool | |||
FileRepository *models.Repository | |||
FileBranchName string | |||
IsRestartRequest bool | |||
DatasetNames string | |||
} | |||
type CreationResponse struct { | |||
Error error | |||
JobID string | |||
Status string //todo 考虑统一状态 | |||
CreateTime timeutil.TimeStamp | |||
} | |||
type QueryAITaskRes struct { | |||
Task *AITaskDetailInfo `json:"task"` | |||
EarlyVersionList []*AITaskDetailInfo `json:"early_version_list"` | |||
CanCreateVersion bool `json:"can_create_version"` | |||
} | |||
type AITaskDetailInfo struct { | |||
ID int64 `json:"id"` | |||
JobID string `json:"job_id"` | |||
Status string `json:"status"` | |||
JobType string `json:"job_type"` | |||
Cluster string `json:"cluster"` | |||
DisplayJobName string `json:"display_job_name"` | |||
FormattedDuration string `json:"formatted_duration"` | |||
ComputeSource string `json:"compute_source"` | |||
AICenter string `json:"ai_center"` | |||
BootFile string `json:"boot_file"` | |||
PreVersionName string `json:"pre_version_name"` | |||
CurrentVersionName string `json:"current_version_name"` | |||
WorkServerNumber int `json:"work_server_number"` | |||
Spec *structs.SpecificationShow `json:"spec"` | |||
DatasetList []*models.DatasetDownload `json:"dataset_list"` | |||
PretrainModelList []*models.ModelDownload `json:"pretrain_model_list"` | |||
Parameters *models.Parameters `json:"parameters"` | |||
CreatedUnix timeutil.TimeStamp `json:"created_unix"` | |||
CodePath string `json:"code_path"` | |||
DatasetPath string `json:"dataset_path"` | |||
PretrainModelPath string `json:"pretrain_model_path"` | |||
OutputPath string `json:"output_path"` | |||
CodeUrl string `json:"code_url"` | |||
PretrainModelName string `json:"pretrain_model_name"` | |||
PretrainModelVersion string `json:"pretrain_model_version"` | |||
PretrainCkptName string `json:"pretrain_model_ckpt_name"` | |||
StartTime timeutil.TimeStamp `json:"start_time"` | |||
EndTime timeutil.TimeStamp `json:"end_time"` | |||
Description string `json:"description"` | |||
CommitID string `json:"commit_id"` | |||
BranchName string `json:"branch_name"` | |||
ImageUrl string `json:"image_url"` | |||
ImageID string `json:"image_id"` | |||
ImageName string `json:"image_name"` | |||
CreatorName string `json:"creator_name"` | |||
EngineName string `json:"engine_name"` | |||
} | |||
func (a *AITaskDetailInfo) Tr(language string) { | |||
a.AICenter = getAiCenterShow(a.AICenter, language) | |||
} | |||
func (a *AITaskDetailInfo) RemoveDatasets() { | |||
a.DatasetList = []*models.DatasetDownload{} | |||
} | |||
func (a *AITaskDetailInfo) RemovePretrainModelList() { | |||
a.PretrainModelList = []*models.ModelDownload{} | |||
} | |||
func getAiCenterShow(aiCenter string, language string) string { | |||
aiCenterInfo := strings.Split(aiCenter, "+") | |||
if len(aiCenterInfo) == 2 { | |||
if setting.C2NetMapInfo != nil { | |||
if info, ok := setting.C2NetMapInfo[aiCenterInfo[0]]; ok { | |||
if language == defaultLanguage { | |||
return info.Content | |||
} else { | |||
return info.ContentEN | |||
} | |||
} else { | |||
return aiCenterInfo[1] | |||
} | |||
} else { | |||
return aiCenterInfo[1] | |||
} | |||
} | |||
return "" | |||
} | |||
var defaultLanguage = "zh-CN" | |||
type CreateTaskRes struct { | |||
ID int64 `json:"id"` | |||
Status string `json:"status"` | |||
} | |||
type GetAITaskCreationInfoReq struct { | |||
User *models.User | |||
JobType models.JobType | |||
ClusterType ClusterType | |||
ComputeSource *models.ComputeSource | |||
Repo *models.Repository | |||
GitRepo *git.Repository | |||
IsOnlineType bool | |||
} | |||
type AITaskBriefInfo struct { | |||
ID int64 `json:"id"` | |||
JobType string `json:"job_type"` | |||
Status string `json:"status"` | |||
DisplayJobName string `json:"display_job_name"` | |||
CreatedUnix timeutil.TimeStamp `json:"created_unix"` | |||
StartTime timeutil.TimeStamp `json:"start_time"` | |||
EndTime timeutil.TimeStamp `json:"end_time"` | |||
FormattedDuration string `json:"formatted_duration"` | |||
Cluster string `json:"cluster"` | |||
ComputeSource string `json:"compute_source"` | |||
AICenter string `json:"ai_center"` | |||
IsFileNotebook bool `json:"is_file_notebook"` | |||
} | |||
func (a *AITaskBriefInfo) Tr(language string) { | |||
a.AICenter = getAiCenterShow(a.AICenter, language) | |||
} | |||
type AITaskListRes struct { | |||
Tasks []*AITaskInfo4List `json:"tasks"` | |||
Total int64 `json:"total"` | |||
PageSize int `json:"page_size"` | |||
Page int `json:"page"` | |||
CanCreateTask bool `json:"can_create_task"` | |||
} | |||
type AITaskInfo4List struct { | |||
Task *AITaskBriefInfo `json:"task"` | |||
Creator UserBriefInfo `json:"creator"` | |||
CanModify bool `json:"can_modify"` | |||
CanDelete bool `json:"can_delete"` | |||
} | |||
func ConvertCloudbrainToAITaskBriefInfo(task *models.Cloudbrain) *AITaskBriefInfo { | |||
computeSource := "" | |||
c := models.GetComputeSourceInstance(task.ComputeResource) | |||
if c != nil { | |||
computeSource = c.Name | |||
} | |||
return &AITaskBriefInfo{ | |||
ID: task.ID, | |||
JobType: task.JobType, | |||
Status: task.Status, | |||
DisplayJobName: task.DisplayJobName, | |||
CreatedUnix: task.CreatedUnix, | |||
FormattedDuration: task.TrainJobDuration, | |||
Cluster: GetClusterTypeFromCloudbrainType(task.Type).GetParentCluster(), | |||
ComputeSource: computeSource, | |||
StartTime: task.StartTime, | |||
EndTime: task.EndTime, | |||
AICenter: task.AiCenter, | |||
IsFileNotebook: task.IsFileNoteBookTask(), | |||
} | |||
} | |||
type NotebookDataset struct { | |||
DatasetUrl string `json:"dataset_url"` | |||
} |
@@ -0,0 +1,35 @@ | |||
package entity | |||
type AITaskConfig struct { | |||
ContainerSteps map[ContainerDataType]*ContainerBuildOpts `json:"container_configs"` | |||
DatasetMaxSize int | |||
} | |||
type ContainerConfig struct { | |||
Enable bool | |||
ContainerPath string | |||
ReadOnly bool | |||
AcceptStorageType []StorageType | |||
} | |||
type GetAITaskConfigOpts struct { | |||
ComputeSource string | |||
IsFileNoteBookRequest bool | |||
} | |||
func (c *AITaskConfig) GetContainerConfig(containerDataType ContainerDataType) *ContainerBuildOpts { | |||
containerConfigs := c.ContainerSteps | |||
if containerConfigs != nil { | |||
return containerConfigs[containerDataType] | |||
} | |||
return nil | |||
} | |||
func (c *AITaskConfig) GetContainerPath(containerDataType ContainerDataType) string { | |||
config := c.GetContainerConfig(containerDataType) | |||
if config == nil { | |||
return "" | |||
} | |||
return config.ContainerPath | |||
} |
@@ -1,28 +0,0 @@ | |||
package ai_task_entity | |||
type TaskData struct { | |||
Code ContainerData | |||
Dataset []ContainerData | |||
PreTrainModel ContainerData | |||
OutPutPath ContainerData | |||
} | |||
type ContainerData struct { | |||
Name string `json:"name"` | |||
Bucket string `json:"bucket"` | |||
EndPoint string `json:"endPoint"` | |||
ObjectKey string `json:"objectKey"` | |||
ContainerPath string `json:"containerPath"` | |||
RealPath string `json:"realPath"` | |||
ReadOnly bool `json:"readOnly"` | |||
} | |||
type ContainerDataType string | |||
const ( | |||
ContainerCode ContainerDataType = "code" | |||
ContainerDataset ContainerDataType = "dataset" | |||
ContainerPreTrainModel ContainerDataType = "pre_train_model" | |||
ContainerOutPutPath ContainerDataType = "output" | |||
ContainerCloudbrainOneOutPutReadMe ContainerDataType = "cloudbrain_one_readme" | |||
) |
@@ -1 +0,0 @@ | |||
package ai_task_entity |
@@ -1,70 +0,0 @@ | |||
package ai_task_entity | |||
import ( | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/git" | |||
"code.gitea.io/gitea/modules/structs" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
) | |||
//todo 暂时保留之前各种云脑属性的定义 | |||
type CreateReq struct { | |||
JobType models.JobType `json:"job_type" binding:"Required"` | |||
DisplayJobName string `json:"display_job_name" binding:"Required"` | |||
JobName string `json:"job_name"` | |||
SpecId int64 `json:"spec_id" binding:"Required"` | |||
ComputeSourceStr string `json:"compute_source" binding:"Required"` | |||
Cluster ClusterType `json:"cluster" binding:"Required"` | |||
WorkServerNumber int `json:"work_server_number"` | |||
BranchName string `json:"branch_name"` | |||
PreTrainModelUrl string `json:"pre_train_model_url"` | |||
CkptName string `json:"ckpt_name"` | |||
ImageUrl string `json:"image_url"` | |||
ImageID string `json:"image_id"` | |||
ImageName string `json:"image_name"` | |||
ModelName string `json:"model_name"` | |||
ModelVersion string `json:"model_version"` | |||
ModelId string `json:"model_id"` | |||
Description string `json:"description"` | |||
LabelName string `json:"label_names"` | |||
DatasetUUIDStr string `json:"dataset_uuid_str"` | |||
Params string `json:"run_para_list"` | |||
BootFile string `json:"boot_file"` | |||
ParamArray models.Parameters | |||
ComputeSource *models.ComputeSource | |||
} | |||
type CreationResponse struct { | |||
Error error | |||
JobID string | |||
Status string //todo 考虑统一状态 | |||
CreateTime timeutil.TimeStamp | |||
} | |||
type QueryTaskInfo struct { | |||
ID int64 `json:"id"` | |||
JobID string `json:"job_id"` | |||
Status string `json:"status"` | |||
JobType string `json:"job_type"` | |||
Cluster string `json:"cluster"` | |||
DisplayJobName string `json:"display_job_name"` | |||
Duration string `json:"duration"` | |||
ComputeSource string `json:"compute_source"` | |||
AiCenter string `json:"ai_center"` | |||
WorkServerNumber int `json:"work_server_number"` | |||
Spec *structs.SpecificationShow `json:"spec"` | |||
DatasetList []*models.DatasetDownload `json:"dataset_list"` | |||
} | |||
type CreateTaskRes struct { | |||
ID int64 `json:"id"` | |||
} | |||
type GetAITaskCreationInfoReq struct { | |||
User *models.User | |||
JobType models.JobType | |||
ClusterType ClusterType | |||
ComputeSource *models.ComputeSource | |||
Repo *models.Repository | |||
GitRepo *git.Repository | |||
} |
@@ -0,0 +1,12 @@ | |||
package entity | |||
import "code.gitea.io/gitea/models" | |||
type GetTaskListReq struct { | |||
models.ListOptions | |||
ComputeSource *models.ComputeSource | |||
JobTypes []string | |||
RepoID int64 | |||
Operator *models.User | |||
IsRepoOwner bool | |||
} |
@@ -1,26 +1,37 @@ | |||
package ai_task_entity | |||
package entity | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"strconv" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
) | |||
type CreateNoteBookTaskRequest struct { | |||
Name string | |||
Tasks []NoteBookTask | |||
Name string | |||
Description string | |||
Tasks []NoteBookTask | |||
PrimitiveDatasetName string | |||
RepoName string | |||
} | |||
type NoteBookTask struct { | |||
AutoStopDuration int | |||
AutoStopDuration int64 | |||
Name string | |||
Capacity int | |||
CenterID []string | |||
Code []ContainerData | |||
Datasets []ContainerData | |||
PreTrainModel []ContainerData | |||
OutPut []ContainerData | |||
ImageId string | |||
ImageUrl string | |||
ResourceSpecId string | |||
BootFile string | |||
Spec *models.Specification | |||
} | |||
@@ -56,6 +67,10 @@ type QueryTaskResponse struct { | |||
Token string `json:"token"` | |||
CenterId string `json:"center_id"` | |||
CenterName string `json:"center_name"` | |||
CodeUrl string `json:"code_url"` | |||
DataUrl string `json:"data_url"` | |||
ContainerIP string `json:"container_ip"` | |||
ContainerID string `json:"container_id"` | |||
} | |||
func ConvertGrampusNotebookResponse(job models.GrampusNotebookInfo) *QueryTaskResponse { | |||
@@ -71,10 +86,13 @@ func ConvertGrampusNotebookResponse(job models.GrampusNotebookInfo) *QueryTaskRe | |||
if len(task.CenterName) > 0 { | |||
centerName = task.CenterName[0] | |||
} | |||
var url, token string | |||
var url, token, codeUrl, dataUrl string | |||
if len(job.Tasks) > 0 { | |||
url = job.Tasks[0].Url | |||
token = job.Tasks[0].Token | |||
t := job.Tasks[0] | |||
url = t.Url | |||
token = t.Token | |||
codeUrl = t.CodeUrl | |||
dataUrl = t.DataUrl | |||
} | |||
return &QueryTaskResponse{ | |||
StartedAt: timeutil.TimeStamp(job.StartedAt), | |||
@@ -85,6 +103,8 @@ func ConvertGrampusNotebookResponse(job models.GrampusNotebookInfo) *QueryTaskRe | |||
Url: url, | |||
Token: token, | |||
JobId: job.JobID, | |||
CodeUrl: codeUrl, | |||
DataUrl: dataUrl, | |||
} | |||
} | |||
func ConvertGrampusTrainResponse(job models.GrampusJobInfo) *QueryTaskResponse { | |||
@@ -122,16 +142,56 @@ func ConvertCloudbrainOneQueryNotebookByNameResponse(result models.JobResultInLi | |||
} | |||
} | |||
func ConvertCloudbrainOneNotebookResponse(result models.JobResultPayload) *QueryTaskResponse { | |||
if result.JobStatus.State == "" { | |||
return nil | |||
func ConvertCloudbrainOneNotebookResponse(input map[string]interface{}) (*QueryTaskResponse, error) { | |||
data, _ := json.Marshal(input) | |||
var jobResultPayload models.JobResultPayload | |||
err := json.Unmarshal(data, &jobResultPayload) | |||
if err != nil { | |||
log.Error("parse cloudbrain one result err,result=%+v err=%v", input, err) | |||
return nil, err | |||
} | |||
return &QueryTaskResponse{ | |||
StartedAt: timeutil.TimeStamp(result.JobStatus.CreatedTime / 1000), | |||
CompletedAt: timeutil.TimeStamp(result.JobStatus.CompletedTime / 1000), | |||
Status: result.JobStatus.State, | |||
JobId: result.ID, | |||
if jobResultPayload.JobStatus.State == "" { | |||
return nil, nil | |||
} | |||
startTime := jobResultPayload.JobStatus.AppLaunchedTime / 1000 | |||
var endTime int64 | |||
switch jobResultPayload.JobStatus.AppCompletedTime.(type) { | |||
case float64: | |||
f := jobResultPayload.JobStatus.AppCompletedTime.(float64) | |||
s := fmt.Sprintf("%.0f", f) | |||
i, err := strconv.ParseInt(s, 10, 64) | |||
if err == nil { | |||
endTime = i / 1000 | |||
} | |||
} | |||
if jobResultPayload.JobStatus.State == string(models.JobWaiting) { | |||
startTime = 0 | |||
endTime = 0 | |||
} | |||
var containerIP, containerID string | |||
taskRoles := jobResultPayload.TaskRoles | |||
if taskRoles != nil && len(taskRoles) > 0 { | |||
subTask := taskRoles[cloudbrain.SubTaskName] | |||
if subTask != nil { | |||
taskRes, _ := models.ConvertToTaskPod(taskRoles[cloudbrain.SubTaskName].(map[string]interface{})) | |||
if taskRes.TaskStatuses != nil && len(taskRes.TaskStatuses) > 0 { | |||
containerIP = taskRes.TaskStatuses[0].ContainerIP | |||
containerID = taskRes.TaskStatuses[0].ContainerID | |||
} | |||
} | |||
} | |||
res := &QueryTaskResponse{ | |||
StartedAt: timeutil.TimeStamp(startTime), | |||
CompletedAt: timeutil.TimeStamp(endTime), | |||
Status: jobResultPayload.JobStatus.State, | |||
JobId: jobResultPayload.ID, | |||
ContainerIP: containerIP, | |||
ContainerID: containerID, | |||
} | |||
return res, nil | |||
} | |||
type ClusterLog struct { | |||
@@ -205,6 +265,8 @@ func GetClusterTypeFromCloudbrainType(t int) ClusterType { | |||
return OpenICloudbrainTwo | |||
case models.TypeC2Net: | |||
return C2Net | |||
case models.TypeCDCenter: | |||
return OpenICloudbrainTwo | |||
} | |||
return "" | |||
} |
@@ -0,0 +1,45 @@ | |||
package entity | |||
type TaskData struct { | |||
Code ContainerData | |||
Dataset []ContainerData | |||
PreTrainModel ContainerData | |||
OutPutPath ContainerData | |||
} | |||
type ContainerData struct { | |||
Name string `json:"name"` | |||
Bucket string `json:"bucket"` | |||
EndPoint string `json:"endPoint"` | |||
ObjectKey string `json:"objectKey"` | |||
ContainerPath string `json:"containerPath"` | |||
RealPath string `json:"realPath"` | |||
ReadOnly bool `json:"readOnly"` | |||
} | |||
type ContainerDataType string | |||
const ( | |||
ContainerCode ContainerDataType = "code" | |||
ContainerDataset ContainerDataType = "dataset" | |||
ContainerPreTrainModel ContainerDataType = "pre_train_model" | |||
ContainerOutPutPath ContainerDataType = "output" | |||
ContainerFileNoteBookCode ContainerDataType = "file_note_book_code" | |||
) | |||
type ContainerBuildOpts struct { | |||
Disable bool | |||
ContainerPath string | |||
ReadOnly bool | |||
AcceptStorageType []StorageType | |||
NotArchive bool | |||
} | |||
func (opts ContainerBuildOpts) IsStorageTypeIn(storageType StorageType) bool { | |||
for _, s := range opts.AcceptStorageType { | |||
if string(s) == string(storageType) { | |||
return true | |||
} | |||
} | |||
return false | |||
} |
@@ -1,13 +1,10 @@ | |||
package ai_task_entity | |||
package entity | |||
import ( | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/structs" | |||
) | |||
type TaskBriefInfo struct { | |||
} | |||
type CreationRequiredInfo struct { | |||
//排队信息、代码分支信息、查询是否有正在运行的任务、查询镜像列表、查询资源规格(积分余额,开关) | |||
Specs []*structs.SpecificationShow `json:"specs"` | |||
@@ -20,6 +17,11 @@ type CreationRequiredInfo struct { | |||
DisplayJobName string `json:"display_job_name"` | |||
PointAccount *PointAccountInfo `json:"point_account"` | |||
PaySwitch bool `json:"pay_switch"` | |||
Config AITaskCreationConfig `json:"config"` | |||
} | |||
type AITaskCreationConfig struct { | |||
DatasetMaxSize int `json:"dataset_max_size"` | |||
} | |||
type SpecificationInfo struct { |
@@ -0,0 +1,13 @@ | |||
package entity | |||
import "fmt" | |||
type ErrCode struct { | |||
CodeVal string | |||
CodeMsg string | |||
CodeTrCode string | |||
} | |||
func (e *ErrCode) IsMatch(code interface{}) bool { | |||
return fmt.Sprint(code) == e.CodeVal | |||
} |
@@ -0,0 +1,3 @@ | |||
package entity | |||
var GrampusJobCanNotRestart = &ErrCode{CodeVal: "5005", CodeMsg: "Job can not restart", CodeTrCode: "ai_task.can_not_restart"} |
@@ -1,4 +1,4 @@ | |||
package ai_task_entity | |||
package entity | |||
import "code.gitea.io/gitea/models" | |||
@@ -0,0 +1,35 @@ | |||
package entity | |||
type OperationProfile struct { | |||
Events []ProfileEvent `json:"events"` | |||
} | |||
type ProfileEvent struct { | |||
Message string `json:"message"` | |||
Name string `json:"name"` | |||
Reason string `json:"reason"` | |||
Timestamp string `json:"timestamp"` | |||
Action string `json:"action"` | |||
} | |||
type CloudbrainOneAppExitDiagnostics struct { | |||
PodRoleName struct { | |||
Task10 string `json:"task1-0"` | |||
} `json:"podRoleName"` | |||
PodEvents struct { | |||
Task10 []struct { | |||
Uid string `json:"uid"` | |||
Reason string `json:"reason"` | |||
Message string `json:"message"` | |||
ReportingController string `json:"reportingController"` | |||
Action string `json:"action"` | |||
} `json:"task1-0"` | |||
} `json:"podEvents"` | |||
Extras []struct { | |||
Uid string `json:"uid"` | |||
Reason string `json:"reason"` | |||
Message string `json:"message"` | |||
ReportingController string `json:"reportingController"` | |||
Action string `json:"action"` | |||
} `json:"extras"` | |||
} |
@@ -0,0 +1,21 @@ | |||
package entity | |||
import "code.gitea.io/gitea/models" | |||
type StorageType string | |||
const ( | |||
MINIO StorageType = "MINIO" | |||
OBS StorageType = "OBS" | |||
) | |||
func GetStorageTypeFromCloudbrainType(cloudbrainType int) StorageType { | |||
switch cloudbrainType { | |||
case models.TypeCloudBrainOne: | |||
return MINIO | |||
case models.TypeCloudBrainTwo: | |||
return OBS | |||
} | |||
return "" | |||
} |
@@ -0,0 +1,44 @@ | |||
package entity | |||
import ( | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
) | |||
type UserBriefInfo struct { | |||
ID int64 `json:"id"` | |||
LowerName string `json:"lower_name"` | |||
Name string `json:"name"` | |||
FullName string `json:"full_name"` | |||
Email string `json:"email"` | |||
Language string `json:"language"` | |||
Description string `json:"description"` | |||
RelAvatarLink string `json:"rel_avatar_link"` | |||
NumMembers int `json:"num_members"` | |||
CreatedUnix timeutil.TimeStamp `json:"created_unix"` | |||
UpdatedUnix timeutil.TimeStamp `json:"updated_unix"` | |||
} | |||
func ConvertUserToBrief(u *models.User) *UserBriefInfo { | |||
fullName := u.Name | |||
if u.FullName != "" { | |||
fullName = u.FullName | |||
} | |||
uf := &UserBriefInfo{ | |||
ID: u.ID, | |||
LowerName: u.LowerName, | |||
Name: u.Name, | |||
FullName: fullName, | |||
Email: u.Email, | |||
Language: u.Language, | |||
Description: u.Description, | |||
CreatedUnix: u.CreatedUnix, | |||
UpdatedUnix: u.UpdatedUnix, | |||
NumMembers: u.NumMembers, | |||
} | |||
if !u.KeepEmailPrivate { | |||
uf.Email = u.Email | |||
} | |||
uf.RelAvatarLink = u.RelAvatarLink() | |||
return uf | |||
} |
@@ -0,0 +1,1527 @@ | |||
package cloudbrain_two | |||
import ( | |||
"crypto/tls" | |||
"encoding/json" | |||
"fmt" | |||
"net/http" | |||
"strconv" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"github.com/go-resty/resty/v2" | |||
) | |||
var ( | |||
restyClient *resty.Client | |||
HOST string | |||
TOKEN string | |||
AutoStopDurationMs = 4 * 60 * 60 * 1000 | |||
) | |||
const ( | |||
methodPassword = "password" | |||
urlGetToken = "/v3/auth/tokens" | |||
urlNotebook = "/demanager/instances" | |||
urlTrainJob = "/training-jobs" | |||
urlResourceSpecs = "/job/resource-specs" | |||
urlTrainJobConfig = "/training-job-configs" | |||
errorCodeExceedLimit = "ModelArts.0118" | |||
//notebook 2.0 | |||
urlNotebook2 = "/notebooks" | |||
//error code | |||
modelartsIllegalToken = "ModelArts.6401" | |||
NotebookNotFound = "ModelArts.6404" | |||
NotebookNoPermission = "ModelArts.6407" | |||
NotebookInvalid = "ModelArts.6400" | |||
UnknownErrorPrefix = "UNKNOWN:" | |||
ModelArtsJobInTargetState = "ModelArts.6357" | |||
ModelArtsJobNotExists = "ModelArts.0102" | |||
ModelArtsJobInternalError = "ModelArts.0010" | |||
) | |||
func getRestyClient() *resty.Client { | |||
if restyClient == nil { | |||
restyClient = resty.New() | |||
restyClient.SetTLSClientConfig(&tls.Config{InsecureSkipVerify: true}) | |||
} | |||
return restyClient | |||
} | |||
func checkSetting() { | |||
if len(HOST) != 0 && len(TOKEN) != 0 && restyClient != nil { | |||
return | |||
} | |||
err := getToken() | |||
if err != nil { | |||
log.Error("getToken failed:%v", err) | |||
} | |||
} | |||
func getToken() error { | |||
HOST = setting.ModelArtsHost | |||
client := getRestyClient() | |||
params := models.GetTokenParams{ | |||
Auth: models.Auth{ | |||
Identity: models.Identity{ | |||
Methods: []string{methodPassword}, | |||
Password: models.Password{ | |||
User: models.NotebookUser{ | |||
Name: setting.ModelArtsUsername, | |||
Password: setting.ModelArtsPassword, | |||
Domain: models.Domain{ | |||
Name: setting.ModelArtsDomain, | |||
}, | |||
}, | |||
}, | |||
}, | |||
Scope: models.Scope{ | |||
Project: models.Project{ | |||
Name: setting.ProjectName, | |||
}, | |||
}, | |||
}, | |||
} | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetBody(params). | |||
Post(setting.IamHost + urlGetToken) | |||
if err != nil { | |||
return fmt.Errorf("resty getToken: %v", err) | |||
} | |||
if res.StatusCode() != http.StatusCreated { | |||
return fmt.Errorf("getToken failed:%s", res.String()) | |||
} | |||
TOKEN = res.Header().Get("X-Subject-Token") | |||
return nil | |||
} | |||
func CreateJob(createJobParams models.CreateNotebookParams) (*models.CreateNotebookResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateNotebookResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(createJobParams). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlNotebook) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty create notebook: %s", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("createNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
if response.ErrorCode == errorCodeExceedLimit { | |||
response.ErrorMsg = "所选规格使用数量已超过最大配额限制。" | |||
} | |||
return &result, fmt.Errorf("createNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetJob(jobID string) (*models.GetNotebookResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetNotebookResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetNotebook2(jobID string) (*models.GetNotebook2Result, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetNotebook2Result | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
if response.ErrorCode == modelartsIllegalToken && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.NotebookActionResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetBody(param). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID + "/action") | |||
if err != nil { | |||
return &result, fmt.Errorf("resty StopJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
return &result, fmt.Errorf("ManageNotebook failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func ManageNotebook2(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.NotebookActionResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID + "/" + param.Action + "?duration=" + strconv.Itoa(AutoStopDurationMs)) | |||
if err != nil { | |||
return &result, fmt.Errorf("resty ManageNotebook2: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if res.StatusCode() == http.StatusBadGateway { | |||
return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
if response.ErrorCode == modelartsIllegalToken && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
return &result, fmt.Errorf("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func DelNotebook(jobID string) (*models.NotebookDelResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.NotebookDelResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID) | |||
if err != nil { | |||
return &result, fmt.Errorf("resty DelJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
return &result, fmt.Errorf("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func DelNotebook2(jobID string) (*models.NotebookDelResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.NotebookDelResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID) | |||
if err != nil { | |||
return &result, fmt.Errorf("resty DelJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("DelNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
if response.ErrorCode == modelartsIllegalToken && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if response.ErrorCode == ModelArtsJobNotExists || response.ErrorCode == ModelArtsJobInTargetState { | |||
//任务不存在或者已经处于被删除的状态,此时认为删除成功 | |||
return &models.NotebookDelResult{}, nil | |||
} | |||
if result.ErrorCode == ModelArtsJobInternalError { | |||
log.Error("ModelArt internal error when del job,jobId=%s", jobID) | |||
return &models.NotebookDelResult{}, nil | |||
} | |||
return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func DelJob(jobID string) (*models.NotebookDelResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.NotebookDelResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID) | |||
if err != nil { | |||
return &result, fmt.Errorf("resty DelJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
return &result, fmt.Errorf("DelJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetJobToken(jobID string) (*models.NotebookGetJobTokenResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.NotebookGetJobTokenResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlNotebook + "/" + jobID + "/token") | |||
if err != nil { | |||
return &result, fmt.Errorf("resty GetJobToken: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("GetJobToken failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
return &result, fmt.Errorf("GetJobToken failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func createTrainJobUserImage(createJobParams models.CreateUserImageTrainJobParams) (*models.CreateTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(createJobParams). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty create train-job: %s", err) | |||
} | |||
req, _ := json.Marshal(createJobParams) | |||
log.Info("postapi json: %s", req) | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||
dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||
if temp.ErrorMsg == bootFileErrorMsg { | |||
log.Error("启动文件错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("启动文件错误!") | |||
} | |||
if temp.ErrorMsg == dataSetErrorMsg { | |||
log.Error("数据集错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("数据集错误!") | |||
} | |||
if res.StatusCode() == http.StatusBadGateway { | |||
return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} else { | |||
return &result, fmt.Errorf("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
} | |||
if !result.IsSuccess { | |||
log.Error("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func createTrainJob(createJobParams models.CreateTrainJobParams) (*models.CreateTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobResult | |||
retry := 0 | |||
req, _ := json.Marshal(createJobParams) | |||
log.Info("postapi json: %s", req) | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(createJobParams). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty create train-job: %s", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||
dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||
if temp.ErrorMsg == bootFileErrorMsg { | |||
log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("启动文件错误!") | |||
} | |||
if temp.ErrorMsg == dataSetErrorMsg { | |||
log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("数据集错误!") | |||
} | |||
if res.StatusCode() == http.StatusBadGateway { | |||
return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} else { | |||
return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
} | |||
if !result.IsSuccess { | |||
log.Error("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func createTrainJobVersion(createJobVersionParams models.CreateTrainJobVersionParams, jobID string) (*models.CreateTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(createJobVersionParams). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions") | |||
if err != nil { | |||
return nil, fmt.Errorf("resty create train-job version: %s", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
bootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'." | |||
dataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'." | |||
if temp.ErrorMsg == bootFileErrorMsg { | |||
log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("启动文件错误!") | |||
} | |||
if temp.ErrorMsg == dataSetErrorMsg { | |||
log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("数据集错误!") | |||
} | |||
if res.StatusCode() == http.StatusBadGateway { | |||
return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} else { | |||
return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
} | |||
if !result.IsSuccess { | |||
log.Error("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func createTrainJobVersionUserImage(createJobVersionParams models.CreateTrainJobVersionUserImageParams, jobID string) (*models.CreateTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(createJobVersionParams). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions") | |||
if err != nil { | |||
return nil, fmt.Errorf("resty create train-job version: %s", err) | |||
} | |||
req, _ := json.Marshal(createJobVersionParams) | |||
log.Info("%s", req) | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
BootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'." | |||
DataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'." | |||
if temp.ErrorMsg == BootFileErrorMsg { | |||
log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("启动文件错误!") | |||
} | |||
if temp.ErrorMsg == DataSetErrorMsg { | |||
log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("数据集错误!") | |||
} | |||
return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("createTrainJobVersion failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetResourceSpecs() (*models.GetResourceSpecsResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetResourceSpecsResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlResourceSpecs) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetResourceSpecs: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetResourceSpecs failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("GetResourceSpecs failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetResourceSpecs failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("GetResourceSpecs failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func CreateTrainJobConfig(req models.CreateConfigParams) (*models.CreateTrainJobConfigResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobConfigResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(req). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty CreateTrainJobConfig: %s", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("CreateTrainJobConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("CreateTrainJobConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("CreateTrainJobConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("CreateTrainJobConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetConfigList(perPage, page int, sortBy, order, searchContent, configType string) (*models.GetConfigListResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetConfigListResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetQueryParams(map[string]string{ | |||
"per_page": strconv.Itoa(perPage), | |||
"page": strconv.Itoa(page), | |||
"sortBy": sortBy, | |||
"order": order, | |||
"search_content": searchContent, | |||
"config_type": configType, | |||
}). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetConfigList: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetConfigList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("获取参数配置列表失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetConfigList failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("获取参数配置列表失败(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetParaConfig(configName, configType string) (models.GetConfigResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetConfigResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetQueryParams(map[string]string{ | |||
"config_type": configType, | |||
}). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJobConfig + "/" + configName) | |||
if err != nil { | |||
return result, fmt.Errorf("resty GetParaConfig: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetParaConfig failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return result, fmt.Errorf("获取参数配置详情失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetParaConfig failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return result, fmt.Errorf("获取参数配置详情失败(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return result, nil | |||
} | |||
func GetTrainJob(jobID, versionID string) (*models.GetTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetTrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetTrainJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("获取作业详情失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetTrainJob(%s) failed", jobID) | |||
return &result, fmt.Errorf("获取作业详情失败") | |||
} | |||
return &result, nil | |||
} | |||
func GetTrainJobLog(jobID, versionID, baseLine, logFile, order string, lines int) (*models.GetTrainJobLogResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetTrainJobLogResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetQueryParams(map[string]string{ | |||
"base_line": baseLine, | |||
"lines": strconv.Itoa(lines), | |||
"log_file": logFile, | |||
"order": order, | |||
}). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/aom-log") | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetTrainJobLog: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetTrainJobLog failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("获取作业日志失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetTrainJobLog(%s) failed", jobID) | |||
return &result, fmt.Errorf("获取作业日志失败:%s", result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetTrainJobLogFileNames(jobID, versionID string) (*models.GetTrainJobLogFileNamesResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetTrainJobLogFileNamesResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/log/file-names") | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetTrainJobLogFileNames: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetTrainJobLogFileNames failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("GetTrainJobLogFileNames failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetTrainJobLogFileNames(%s) failed", jobID) | |||
return &result, fmt.Errorf("获取作业日志文件失败:%s", result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func DelTrainJob(jobID string) (*models.TrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.TrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Delete(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID) | |||
if err != nil { | |||
return &result, fmt.Errorf("resty DelTrainJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
if temp.ErrorCode == ModelArtsJobNotExists || temp.ErrorCode == ModelArtsJobInTargetState { | |||
//任务不存在或者已经处于被删除的状态,此时认为删除成功 | |||
return &models.TrainJobResult{IsSuccess: true}, nil | |||
} | |||
if result.ErrorCode == ModelArtsJobInternalError { | |||
log.Error("ModelArt internal error when del job,jobId=%s", jobID) | |||
return &models.TrainJobResult{IsSuccess: true}, nil | |||
} | |||
return &result, fmt.Errorf("删除训练作业失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("DelTrainJob(%s) failed", jobID) | |||
if result.ErrorCode == ModelArtsJobNotExists || result.ErrorCode == ModelArtsJobInTargetState { | |||
//任务不存在或者已经处于被删除的状态,此时认为删除成功 | |||
return &models.TrainJobResult{IsSuccess: true}, nil | |||
} | |||
if result.ErrorCode == ModelArtsJobInternalError { | |||
log.Error("ModelArt internal error when del job,jobId=%s", jobID) | |||
return &models.TrainJobResult{IsSuccess: true}, nil | |||
} | |||
return &result, fmt.Errorf("删除训练作业失败:%s", result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func StopTrainJob(jobID, versionID string) (*models.TrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.TrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/stop") | |||
if err != nil { | |||
return &result, fmt.Errorf("resty StopTrainJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("StopTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("停止训练作业失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("StopTrainJob(%s) failed", jobID) | |||
return &result, fmt.Errorf("停止训练作业失败:%s", result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func DelTrainJobVersion(jobID string, versionID string) (*models.TrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.TrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Delete(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID) | |||
if err != nil { | |||
return &result, fmt.Errorf("resty DelTrainJobVersion: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
if temp.ErrorCode == ModelArtsJobNotExists || temp.ErrorCode == ModelArtsJobInTargetState { | |||
//任务不存在或者已经处于被删除的状态,此时认为删除成功 | |||
return &models.TrainJobResult{IsSuccess: true}, nil | |||
} | |||
if result.ErrorCode == ModelArtsJobInternalError { | |||
log.Error("ModelArt internal error when del job,jobId=%s", jobID) | |||
return &models.TrainJobResult{IsSuccess: true}, nil | |||
} | |||
log.Error("DelTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("删除训练作业版本失败(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("DelTrainJob(%s) failed", jobID) | |||
return &result, fmt.Errorf("删除训练作业版本失败:%s", result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func createInferenceJob(createJobParams models.CreateInferenceJobParams) (*models.CreateTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(createJobParams). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty create inference-job: %s", err) | |||
} | |||
req, _ := json.Marshal(createJobParams) | |||
log.Info("%s", req) | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
BootFileErrorMsg := "Invalid OBS path '" + createJobParams.InfConfig.BootFileUrl + "'." | |||
DataSetErrorMsg := "Invalid OBS path '" + createJobParams.InfConfig.DataUrl + "'." | |||
if temp.ErrorMsg == BootFileErrorMsg { | |||
log.Error("启动文件错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("启动文件错误!") | |||
} | |||
if temp.ErrorMsg == DataSetErrorMsg { | |||
log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("数据集错误!") | |||
} | |||
if res.StatusCode() == http.StatusBadGateway { | |||
return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} else { | |||
return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
} | |||
if !result.IsSuccess { | |||
log.Error("createInferenceJob failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("createInferenceJob failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func createInferenceJobUserImage(createJobParams models.CreateInfUserImageParams) (*models.CreateTrainJobResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateTrainJobResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(createJobParams). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlTrainJob) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty create train-job: %s", err) | |||
} | |||
req, _ := json.Marshal(createJobParams) | |||
log.Info("%s", req) | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||
dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||
if temp.ErrorMsg == bootFileErrorMsg { | |||
log.Error("启动文件错误!createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("启动文件错误!") | |||
} | |||
if temp.ErrorMsg == dataSetErrorMsg { | |||
log.Error("数据集错误!createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("数据集错误!") | |||
} | |||
if res.StatusCode() == http.StatusBadGateway { | |||
return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} else { | |||
return &result, fmt.Errorf("createInferenceJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
} | |||
if !result.IsSuccess { | |||
log.Error("createInferenceJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("createInferenceJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func CreateNotebook2(createJobParams models.CreateNotebook2Params) (*models.CreateNotebookResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.CreateNotebookResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetBody(createJobParams). | |||
SetResult(&result). | |||
Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty create notebook2: %s", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error()) | |||
} | |||
if res.StatusCode() == http.StatusBadGateway { | |||
return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
if response.ErrorCode == errorCodeExceedLimit { | |||
response.ErrorMsg = "所选规格使用数量已超过最大配额限制。" | |||
} | |||
if response.ErrorCode == modelartsIllegalToken && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
return &result, fmt.Errorf("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetTrainJobMetricStatistic(jobID, versionID, podName string) (*models.GetTrainJobMetricStatisticResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetTrainJobMetricStatisticResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions/" + versionID + "/pod/" + podName + "/metric-statistic?statistic_type=each") | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetTrainJobMetricStatistic: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetTrainJobMetricStatistic failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf("GetTrainJobMetricStatistic failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetTrainJobMetricStatistic(%s) failed", jobID) | |||
return &result, fmt.Errorf("获取任务资源占用情况失败:%s", result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetTrainJobList(perPage, page int, sortBy, order, searchContent string) (*models.GetTrainJobListResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetTrainJobListResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetQueryParams(map[string]string{ | |||
"per_page": strconv.Itoa(perPage), | |||
"page": strconv.Itoa(page), | |||
"sortBy": sortBy, | |||
"order": order, | |||
"search_content": searchContent, | |||
}). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetTrainJobList: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetTrainJobList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf(temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetTrainJobList failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf(result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetTrainJobVersionList(perPage, page int, jobID string) (*models.GetTrainJobVersionListResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetTrainJobVersionListResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetQueryParams(map[string]string{ | |||
"per_page": strconv.Itoa(perPage), | |||
"page": strconv.Itoa(page), | |||
}). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions") | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetTrainJobVersionList: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetTrainJobVersionList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf(temp.ErrorMsg) | |||
} | |||
if !result.IsSuccess { | |||
log.Error("GetTrainJobVersionList failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf(result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetNotebookList(limit, offset int, sortBy, order, searchContent string) (*models.GetNotebookListResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetNotebookListResult | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetQueryParams(map[string]string{ | |||
"limit": strconv.Itoa(limit), | |||
"offset": strconv.Itoa(offset), | |||
"name": searchContent, | |||
"sort_key": sortBy, | |||
"sort_dir": order, | |||
}). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetNotebookList: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
retry++ | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
var temp models.ErrorResult | |||
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||
} | |||
log.Error("GetNotebookList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||
return &result, fmt.Errorf(temp.ErrorMsg) | |||
} | |||
return &result, nil | |||
} |
@@ -0,0 +1,233 @@ | |||
package cloudbrain_two_cd | |||
import ( | |||
"bytes" | |||
"code.gitea.io/gitea/modules/modelarts_gateway/core" | |||
"crypto/tls" | |||
"encoding/json" | |||
"fmt" | |||
"io/ioutil" | |||
"net/http" | |||
"strconv" | |||
"time" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
) | |||
var ( | |||
httpClient *http.Client | |||
HOST string | |||
TOKEN string | |||
autoStopDurationMs = 4 * 60 * 60 * 1000 | |||
) | |||
const ( | |||
errorCodeExceedLimit = "ModelArts.0118" | |||
//notebook 2.0 | |||
urlNotebook2 = "/notebooks" | |||
//error code | |||
modelartsIllegalToken = "ModelArts.6401" | |||
NotebookNotFound = "ModelArts.6404" | |||
NotebookNoPermission = "ModelArts.6407" | |||
NotebookInvalid = "ModelArts.6400" | |||
UnknownErrorPrefix = "UNKNOWN:" | |||
ModelArtsJobNotExists = "ModelArts.0102" | |||
ModelArtsJobInTargetState = "ModelArts.6357" | |||
ModelArtsJobInternalError = "ModelArts.0010" | |||
) | |||
func getHttpClient() *http.Client { | |||
if httpClient == nil { | |||
httpClient = &http.Client{ | |||
Timeout: 30 * time.Second, | |||
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, | |||
} | |||
} | |||
return httpClient | |||
} | |||
func GetNotebook(jobID string) (*models.GetNotebook2Result, error) { | |||
var result models.GetNotebook2Result | |||
client := getHttpClient() | |||
s := core.Signer{ | |||
Key: setting.ModelartsCD.AccessKey, | |||
Secret: setting.ModelartsCD.SecretKey, | |||
} | |||
r, _ := http.NewRequest(http.MethodGet, | |||
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID, | |||
nil) | |||
r.Header.Add("content-type", "application/json") | |||
s.Sign(r) | |||
resp, err := client.Do(r) | |||
if err != nil { | |||
log.Error("client.Do failed: %s", err.Error()) | |||
return &result, fmt.Errorf("client.Do failed: %s", err.Error()) | |||
} | |||
defer resp.Body.Close() | |||
body, err := ioutil.ReadAll(resp.Body) | |||
if err != nil { | |||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||
} | |||
err = json.Unmarshal(body, &result) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(result.ErrorCode) != 0 { | |||
log.Error("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||
var result models.NotebookActionResult | |||
client := getHttpClient() | |||
s := core.Signer{ | |||
Key: setting.ModelartsCD.AccessKey, | |||
Secret: setting.ModelartsCD.SecretKey, | |||
} | |||
r, _ := http.NewRequest(http.MethodPost, | |||
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID+"/"+param.Action+"?duration="+strconv.Itoa(autoStopDurationMs), | |||
nil) | |||
r.Header.Add("content-type", "application/json") | |||
s.Sign(r) | |||
resp, err := client.Do(r) | |||
if err != nil { | |||
log.Error("client.Do failed: %s", err.Error()) | |||
return &result, fmt.Errorf("client.Do failed: %s", err.Error()) | |||
} | |||
defer resp.Body.Close() | |||
body, err := ioutil.ReadAll(resp.Body) | |||
if err != nil { | |||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||
} | |||
err = json.Unmarshal(body, &result) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(result.ErrorCode) != 0 { | |||
log.Error("ManageNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("ManageNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func DelNotebook(jobID string) (*models.NotebookDelResult, error) { | |||
var result models.NotebookDelResult | |||
client := getHttpClient() | |||
s := core.Signer{ | |||
Key: setting.ModelartsCD.AccessKey, | |||
Secret: setting.ModelartsCD.SecretKey, | |||
} | |||
r, _ := http.NewRequest(http.MethodDelete, | |||
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID, | |||
nil) | |||
r.Header.Add("content-type", "application/json") | |||
s.Sign(r) | |||
resp, err := client.Do(r) | |||
if err != nil { | |||
log.Error("client.Do failed: %s", err.Error()) | |||
return &result, fmt.Errorf("client.Do failed: %s", err.Error()) | |||
} | |||
defer resp.Body.Close() | |||
body, err := ioutil.ReadAll(resp.Body) | |||
if err != nil { | |||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||
} | |||
err = json.Unmarshal(body, &result) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(result.ErrorCode) != 0 { | |||
log.Error("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
if result.ErrorCode == ModelArtsJobNotExists || result.ErrorCode == ModelArtsJobInTargetState { | |||
//任务不存在或者已经处于被删除的状态,此时认为删除成功 | |||
return &models.NotebookDelResult{}, nil | |||
} | |||
if result.ErrorCode == ModelArtsJobInternalError { | |||
log.Error("ModelArt internal error when del job,jobId=%s", jobID) | |||
return &models.NotebookDelResult{}, nil | |||
} | |||
return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func CreateNotebook(createJobParams models.CreateNotebookWithoutPoolParams) (*models.CreateNotebookResult, error) { | |||
var result models.CreateNotebookResult | |||
client := getHttpClient() | |||
s := core.Signer{ | |||
Key: setting.ModelartsCD.AccessKey, | |||
Secret: setting.ModelartsCD.SecretKey, | |||
} | |||
req, _ := json.Marshal(createJobParams) | |||
r, _ := http.NewRequest(http.MethodPost, | |||
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2, | |||
ioutil.NopCloser(bytes.NewBuffer(req))) | |||
r.Header.Add("content-type", "application/json") | |||
s.Sign(r) | |||
resp, err := client.Do(r) | |||
if err != nil { | |||
log.Error("client.Do failed: %s", err.Error()) | |||
return &result, fmt.Errorf("client.Do failed: %s", err.Error()) | |||
} | |||
defer resp.Body.Close() | |||
body, err := ioutil.ReadAll(resp.Body) | |||
if err != nil { | |||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||
} | |||
err = json.Unmarshal(body, &result) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(result.ErrorCode) != 0 { | |||
log.Error("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
if result.ErrorCode == errorCodeExceedLimit { | |||
result.ErrorMsg = "所选规格使用数量已超过最大配额限制。" | |||
} | |||
return &result, fmt.Errorf("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} |
@@ -118,7 +118,7 @@ sendjob: | |||
log.Error("CreateNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("CreateNotebookJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
log.Info("CreateNotebookJob success.req.JobName = %s ,result=%+v", req.Name, result) | |||
return &result, nil | |||
} | |||
@@ -466,6 +466,70 @@ sendjob: | |||
log.Error("resty grampus restart note book job failed(%s): %v", res.String(), err.Error()) | |||
return nil, fmt.Errorf("resty grampus restart note book job failed: %v", err) | |||
} | |||
log.Info("RestartNotebookJob success.jobId = %s ,result=%+v", jobID, restartResponse) | |||
return restartResponse, nil | |||
} | |||
func GetDebugJobEvents(jobID string) (*models.GetGrampusDebugJobEventsResponse, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetGrampusDebugJobEventsResponse | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + urlNotebookJob + "/" + jobID + "/events") | |||
log.Info("res=%v", res) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetDebugJobEvents: %v", err) | |||
} | |||
if result.ErrorCode == errorIllegalToken && retry < 1 { | |||
retry++ | |||
log.Info("retry get token") | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if result.ErrorCode != 0 { | |||
log.Error("GetDebugJobEvents failed(%d): %s", result.ErrorCode, result.ErrorMsg) | |||
return nil, fmt.Errorf("GetDebugJobEvents failed(%d): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} | |||
func GetTrainJobEvents(jobID string) (*models.GetGrampusJobEventsResponse, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetGrampusJobEventsResponse | |||
retry := 0 | |||
sendjob: | |||
_, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + urlTrainJob + "/" + jobID + "/events") | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetTrainJobEvents: %v", err) | |||
} | |||
if result.ErrorCode == errorIllegalToken && retry < 1 { | |||
retry++ | |||
log.Info("retry get token") | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
if result.ErrorCode != 0 { | |||
log.Error("GetTrainJobEvents failed(%d): %s", result.ErrorCode, result.ErrorMsg) | |||
return nil, fmt.Errorf("GetTrainJobEvents failed(%d): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
} |
@@ -50,26 +50,28 @@ const ( | |||
ActionRejectPullRequest // 22 | |||
ActionCommentPull // 23 | |||
ActionUploadAttachment //24 | |||
ActionCreateDebugGPUTask //25 | |||
ActionCreateDebugNPUTask //26 | |||
ActionCreateTrainTask //27 | |||
ActionCreateInferenceTask // 28 | |||
ActionCreateBenchMarkTask //29 | |||
ActionCreateNewModelTask //30 | |||
ActionCreateGPUTrainTask //31 | |||
ActionCreateGrampusNPUTrainTask //32 | |||
ActionCreateGrampusGPUTrainTask //33 | |||
ActionBindWechat //34 | |||
ActionDatasetRecommended //35 | |||
ActionCreateImage //36 | |||
ActionImageRecommend //37 | |||
ActionChangeUserAvatar //38 | |||
ActionCreateGrampusNPUDebugTask //39 | |||
ActionCreateGrampusGPUDebugTask //40 | |||
ActionCreateGrampusGCUDebugTask //41 | |||
ActionCreateGrampusGCUTrainTask //42 | |||
ActionCreateGrampusMLUDebugTask //43 | |||
ActionUploadAttachment //24 | |||
ActionCreateDebugGPUTask //25 | |||
ActionCreateDebugNPUTask //26 | |||
ActionCreateTrainTask //27 | |||
ActionCreateInferenceTask // 28 | |||
ActionCreateBenchMarkTask //29 | |||
ActionCreateNewModelTask //30 | |||
ActionCreateGPUTrainTask //31 | |||
ActionCreateGrampusNPUTrainTask //32 | |||
ActionCreateGrampusGPUTrainTask //33 | |||
ActionBindWechat //34 | |||
ActionDatasetRecommended //35 | |||
ActionCreateImage //36 | |||
ActionImageRecommend //37 | |||
ActionChangeUserAvatar //38 | |||
ActionCreateGrampusNPUDebugTask //39 | |||
ActionCreateGrampusGPUDebugTask //40 | |||
ActionCreateGrampusGCUDebugTask //41 | |||
ActionCreateGrampusGCUTrainTask //42 | |||
ActionCreateGrampusMLUDebugTask //43 | |||
ActionCreateGrampusMLUTrainTask //44 | |||
ActionCreateGrampusGPUOnlineInferTask //45 | |||
) | |||
// Action represents user operation type and other information to | |||
@@ -126,6 +128,20 @@ func (a *Action) loadActUser() { | |||
} | |||
} | |||
func (a *Action) FilterCloudbrainInfo() { | |||
if a.Cloudbrain == nil { | |||
return | |||
} | |||
if a.Cloudbrain.DeletedAt.IsZero() { | |||
newCloudbrain := &Cloudbrain{} | |||
newCloudbrain.ID = a.Cloudbrain.ID | |||
a.Cloudbrain = newCloudbrain | |||
} else { | |||
a.Cloudbrain = nil | |||
} | |||
} | |||
func (a *Action) loadRepo() { | |||
if a.Repo != nil { | |||
return | |||
@@ -136,6 +152,26 @@ func (a *Action) loadRepo() { | |||
log.Error("GetRepositoryByID(%d): %v", a.RepoID, err) | |||
} | |||
} | |||
func (a *Action) loadCloudbrain() { | |||
if !a.IsCloudbrainAction() { | |||
return | |||
} | |||
cloudbrain := &Cloudbrain{} | |||
cloudbrainId, _ := strconv.ParseInt(a.Content, 10, 64) | |||
jobId := a.Content | |||
//由于各个类型的云脑任务在发布action的时候,content字段保存的ID含义不同,部分取的是ID,部分取的是jobId | |||
//所以在查询action对应的cloudbrain对象时,以这两个字段做为条件查询 | |||
if has, err := x. | |||
Where(builder.Or(builder.Eq{"id": cloudbrainId}).Or(builder.Eq{"job_id": jobId})).Unscoped(). | |||
Get(cloudbrain); err != nil || !has { | |||
return | |||
} | |||
if cloudbrain.DisplayJobName == a.RefName || cloudbrain.JobName == a.RefName { | |||
a.Cloudbrain = cloudbrain | |||
} | |||
} | |||
// GetActFullName gets the action's user full name. | |||
func (a *Action) GetActFullName() string { | |||
@@ -381,6 +417,7 @@ func (a *Action) IsCloudbrainAction() bool { | |||
ActionCreateBenchMarkTask, | |||
ActionCreateGPUTrainTask, | |||
ActionCreateGrampusGPUDebugTask, | |||
ActionCreateGrampusGPUOnlineInferTask, | |||
ActionCreateGrampusNPUDebugTask, | |||
ActionCreateGrampusNPUTrainTask, | |||
ActionCreateGrampusGPUTrainTask, | |||
@@ -463,7 +500,7 @@ func GetFeeds(opts GetFeedsOptions) ([]*Action, error) { | |||
return nil, fmt.Errorf("Find: %v", err) | |||
} | |||
if err := ActionList(actions).LoadAttributes(); err != nil { | |||
if err := ActionList(actions).LoadAllAttributes(); err != nil { | |||
return nil, fmt.Errorf("LoadAttributes: %v", err) | |||
} | |||
@@ -483,7 +520,7 @@ func GetLast20PublicFeeds(opTypes []int) ([]*Action, error) { | |||
return nil, fmt.Errorf("Find: %v", err) | |||
} | |||
if err := ActionList(actions).LoadAttributes(); err != nil { | |||
if err := ActionList(actions).LoadAllAttributes(); err != nil { | |||
return nil, fmt.Errorf("LoadAttributes: %v", err) | |||
} | |||
@@ -819,3 +819,17 @@ func QueryModelForSearch(opts *AiModelQueryOptions) ([]*AiModelManage, int64, er | |||
return aiModelManages, count, nil | |||
} | |||
func QueryModelRepoByModelID(modelId string) (*Repository, error) { | |||
r := &Repository{} | |||
has, err := x.Where(builder.NewCond(). | |||
And(builder.Eq{"id": builder.Select("repo_id"). | |||
From("ai_model_manage"). | |||
Where(builder.Eq{"id": modelId})})).Get(r) | |||
if err != nil { | |||
return nil, err | |||
} else if !has { | |||
return nil, &ErrRecordNotExist{} | |||
} | |||
return r, nil | |||
} |
@@ -329,7 +329,7 @@ func DeleteAttachments(attachments []*Attachment, remove bool) (int, error) { | |||
log.Info("Message:%s\n", obsError.Message) | |||
} | |||
} | |||
DeleteFileChunkById(a.UUID) | |||
//rf := path.Join(a.UUID[0:1], a.UUID[1:2]) | |||
/* | |||
files, err := repo.GetDatasetDirs(a.UUID, "") | |||
@@ -68,15 +68,16 @@ const ( | |||
ModelSafetyTesting CloudbrainStatus = "TESTING" | |||
JobTypeDebug JobType = "DEBUG" | |||
JobTypeBenchmark JobType = "BENCHMARK" | |||
JobTypeModelSafety JobType = "MODELSAFETY" | |||
JobTypeSnn4imagenet JobType = "SNN4IMAGENET" | |||
JobTypeBrainScore JobType = "BRAINSCORE" | |||
JobTypeSnn4Ecoset JobType = "SNN4ECOSET" | |||
JobTypeSim2BrainSNN JobType = "SIM2BRAIN_SNN" | |||
JobTypeTrain JobType = "TRAIN" | |||
JobTypeInference JobType = "INFERENCE" | |||
JobTypeDebug JobType = "DEBUG" | |||
JobTypeBenchmark JobType = "BENCHMARK" | |||
JobTypeModelSafety JobType = "MODELSAFETY" | |||
JobTypeSnn4imagenet JobType = "SNN4IMAGENET" | |||
JobTypeBrainScore JobType = "BRAINSCORE" | |||
JobTypeSnn4Ecoset JobType = "SNN4ECOSET" | |||
JobTypeSim2BrainSNN JobType = "SIM2BRAIN_SNN" | |||
JobTypeTrain JobType = "TRAIN" | |||
JobTypeInference JobType = "INFERENCE" | |||
JobTypeOnlineInference JobType = "ONLINEINFERENCE" | |||
//notebook | |||
ModelArtsCreateQueue ModelArtsJobStatus = "CREATE_QUEUING" //免费资源创建排队中 | |||
@@ -237,6 +238,7 @@ type Cloudbrain struct { | |||
EngineID int64 //引擎id | |||
ImageID string //grampus image_id | |||
AiCenter string //grampus ai center: center_id+center_name | |||
FailedReason string `xorm:"text"` | |||
TrainUrl string //输出模型的obs路径 | |||
BranchName string `xorm:"varchar(2550)"` //分支名称 | |||
@@ -344,15 +346,56 @@ func (task *Cloudbrain) CorrectCreateUnix() { | |||
task.CreatedUnix = task.StartTime | |||
} | |||
} | |||
func (task *Cloudbrain) GetAiCenter() string { | |||
if task.Type == TypeCloudBrainOne { | |||
return AICenterOfCloudBrainOne | |||
} else if task.Type == TypeCloudBrainTwo { | |||
return AICenterOfCloudBrainTwo | |||
} else if task.Type == TypeCDCenter { | |||
return AICenterOfChengdu | |||
} else { | |||
return strings.Split(task.AiCenter, "+")[0] | |||
} | |||
} | |||
//是否为在线notebook文件任务 | |||
func (task *Cloudbrain) IsFileNoteBookTask() bool { | |||
return task.JobType == string(JobTypeDebug) && task.BootFile != "" | |||
} | |||
func (task *Cloudbrain) CanUserModify(user *User) bool { | |||
if user == nil { | |||
return false | |||
} | |||
return user.IsAdmin || user.ID == task.UserID | |||
} | |||
func (task *Cloudbrain) CanUserDelete(user *User, isRepoOwner bool) bool { | |||
if user == nil { | |||
return false | |||
} | |||
return isRepoOwner || user.IsAdmin || user.ID == task.UserID | |||
} | |||
func AllTerminalStatus() []string { | |||
return []string{string(ModelArtsTrainJobCompleted), string(ModelArtsTrainJobFailed), | |||
string(ModelArtsTrainJobKilled), string(ModelArtsStopped), | |||
string(JobStopped), string(JobFailed), | |||
string(ModelArtsTrainJobKilled), string(ModelArtsStopped), string(ModelArtsCreateFailed), | |||
string(ModelArtsStartFailed), string(JobStopped), string(JobFailed), | |||
string(JobSucceeded), GrampusStatusFailed, | |||
GrampusStatusSucceeded, GrampusStatusStopped, LocalStatusFailed} | |||
} | |||
func IsCloudbrainTerminalStatus(status string) bool { | |||
for _, s := range AllTerminalStatus() { | |||
if strings.ToUpper(status) == strings.ToUpper(s) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func AllStoppingStatus() []string { | |||
return []string{string(ModelArtsStopping), string(ModelArtsDeleting), | |||
string(ModelArtsTrainJobKilling), GrampusStatusStopping} | |||
@@ -388,12 +431,7 @@ func AllStoppingAndTerminalStatus() []string { | |||
func (task *Cloudbrain) IsTerminal() bool { | |||
status := task.Status | |||
for _, s := range AllTerminalStatus() { | |||
if status == s { | |||
return true | |||
} | |||
} | |||
return false | |||
return IsCloudbrainTerminalStatus(status) | |||
} | |||
func (task *Cloudbrain) IsPreparing() bool { | |||
return task.Status == LocalStatusPreparing | |||
@@ -405,6 +443,15 @@ func (task *Cloudbrain) NeedActiveStop() bool { | |||
return task.IsCreating() || (task.IsPreparing() && int64(task.CreatedUnix) < time.Now().Add(-1*setting.PREPARING_MAX_WAIT_DURATION).Unix()) | |||
} | |||
//是否允许创建多版本 | |||
//目前只有启智NPU可以 | |||
func (task *Cloudbrain) IsAllowedToCreateMultipleVersions() bool { | |||
if task.Type == TypeCloudBrainTwo && task.ComputeResource == NPUResource { | |||
return true | |||
} | |||
return false | |||
} | |||
func (task *Cloudbrain) IsNewAITask() bool { | |||
for k, v := range setting.AI_TASK_RANGE { | |||
if k == task.JobType+"_"+fmt.Sprint(task.Type) { | |||
@@ -1225,6 +1272,11 @@ type GetNotebook2Result struct { | |||
Ownership string `json:"ownership"` | |||
Status string `json:"status"` | |||
} `json:"volume"` | |||
ActionProgress []struct { | |||
Step int `json:"step"` | |||
Status string `json:"status"` | |||
Description string `json:"description"` | |||
} `json:"action_progress"` | |||
} | |||
type GetTokenParams struct { | |||
@@ -1694,6 +1746,11 @@ type NotebookList struct { | |||
JobName string `json:"name"` | |||
JobID string `json:"id"` | |||
Status string `json:"status"` | |||
Lease struct { | |||
CreateTime int64 `json:"create_at"` //实例创建的时间,UTC毫秒 | |||
Duration int64 `json:"duration"` //实例运行时长,以创建时间为起点计算,即“创建时间+duration > 当前时刻”时,系统会自动停止实例 | |||
UpdateTime int64 `json:"update_at"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒 | |||
} `json:"lease"` //实例自动停止的倒计时信息 | |||
} | |||
type GetNotebookListResult struct { | |||
@@ -1886,7 +1943,7 @@ type GrampusTasks struct { | |||
WorkServerNumber int `json:"nodeCount"` | |||
} | |||
type GrampusNotebookTask struct { | |||
AutoStopDuration int `json:"autoStopDuration"` | |||
AutoStopDuration int64 `json:"autoStopDuration"` | |||
Name string `json:"name"` | |||
Capacity int `json:"capacity"` | |||
CenterID []string `json:"centerID"` | |||
@@ -2227,6 +2284,22 @@ func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, e | |||
return cloudbrains, int(count), nil | |||
} | |||
func GetCloudbrainEarlyVersionList(task *Cloudbrain) ([]*Cloudbrain, error) { | |||
cloudbrains := make([]*Cloudbrain, 0) | |||
if err := x.Where(builder.NewCond(). | |||
And(builder.Eq{"cloudbrain.repo_id": task.RepoID}). | |||
And(builder.Eq{"cloudbrain.type": task.Type}). | |||
And(builder.Eq{"cloudbrain.job_id": task.JobID}). | |||
And(builder.Eq{"cloudbrain.job_type": task.JobType}). | |||
And(builder.Lt{"cloudbrain.created_unix": task.CreatedUnix})). | |||
OrderBy("cloudbrain.created_unix DESC"). | |||
Find(&cloudbrains); err != nil { | |||
return nil, fmt.Errorf("Find: %v", err) | |||
} | |||
return cloudbrains, nil | |||
} | |||
func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) { | |||
session := x.NewSession() | |||
defer session.Close() | |||
@@ -2302,6 +2375,26 @@ func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) { | |||
return getRepoCloudBrain(cb) | |||
} | |||
func GetCloudbrainListByJobID(jobID string) ([]*Cloudbrain, error) { | |||
r := make([]*Cloudbrain, 0) | |||
if err := x.Where("job_id = ?", jobID).OrderBy("id desc").Find(&r); err != nil { | |||
return nil, err | |||
} | |||
return r, nil | |||
} | |||
func GetNewestCloudbrainByJobId(jobID string) (*Cloudbrain, error) { | |||
r := &Cloudbrain{} | |||
if has, err := x.Where("job_id = ?", jobID).OrderBy("id desc").Limit(1).Get(r); err != nil { | |||
return nil, err | |||
} else if !has { | |||
return nil, ErrRecordNotExist{} | |||
} | |||
return r, nil | |||
} | |||
func GetCloudbrainByJobIDWithDeleted(jobID string) (*Cloudbrain, error) { | |||
cb := &Cloudbrain{JobID: jobID} | |||
return getRepoCloudBrainWithDeleted(cb) | |||
@@ -2663,7 +2756,7 @@ func GetModelSafetyCountByUserID(userID int64) (int, error) { | |||
} | |||
func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTypes ...JobType) (int64, error) { | |||
sess := x.Where("status=? and type=?", JobWaiting, cloudbrainType) | |||
sess := x.Where(builder.NewCond().And(builder.In("status", JobWaiting, LocalStatusPreparing, LocalStatusCreating)).And(builder.Eq{"type": cloudbrainType})) | |||
if len(jobTypes) > 0 { | |||
sess.In("job_type", jobTypes) | |||
} | |||
@@ -2947,6 +3040,15 @@ func CloudbrainAllStatic(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, er | |||
return cloudbrains, count, nil | |||
} | |||
func GetLastestNCloudbrain(n int) ([]*Cloudbrain, error) { | |||
r := make([]*Cloudbrain, 0) | |||
err := x.Where("ai_center!='' or type!=2").Desc("id").Limit(n).Unscoped().Find(&r) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return r, nil | |||
} | |||
func CloudbrainAllKanBan(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||
sess := x.NewSession() | |||
defer sess.Close() | |||
@@ -3224,3 +3326,5 @@ func GetCloudBrainByRepoIdAndModelName(repoId int64, modelName string) ([]*Cloud | |||
err := x.AllCols().Where("model_name=? and repo_id=?", modelName, repoId).OrderBy("created_unix asc").Find(&cloudBrains) | |||
return cloudBrains, err | |||
} | |||
var SubTaskName = "task1" |
@@ -134,3 +134,27 @@ func GetCloudbrainTaskUnitPrice(task Cloudbrain) (int, error) { | |||
} | |||
return s.UnitPrice * n, nil | |||
} | |||
func UpdateCloudbrainSpec(cloudbrainId int64, s *Specification) (int64, error) { | |||
new := CloudbrainSpec{ | |||
CloudbrainID: cloudbrainId, | |||
SpecId: s.ID, | |||
SourceSpecId: s.SourceSpecId, | |||
AccCardsNum: s.AccCardsNum, | |||
AccCardType: s.AccCardType, | |||
CpuCores: s.CpuCores, | |||
MemGiB: s.MemGiB, | |||
GPUMemGiB: s.GPUMemGiB, | |||
ShareMemGiB: s.ShareMemGiB, | |||
ComputeResource: s.ComputeResource, | |||
UnitPrice: s.UnitPrice, | |||
QueueId: s.QueueId, | |||
QueueCode: s.QueueCode, | |||
Cluster: s.Cluster, | |||
AiCenterCode: s.AiCenterCode, | |||
AiCenterName: s.AiCenterName, | |||
IsExclusive: s.IsExclusive, | |||
ExclusiveOrg: s.ExclusiveOrg, | |||
} | |||
return x.Where("cloudbrain_id = ?", cloudbrainId).Update(&new) | |||
} |
@@ -144,6 +144,23 @@ func GetCloudbrainStatusCount() ([]map[string]string, error) { | |||
return x.QueryString(countSql) | |||
} | |||
func GetCloudbrainCardTimeAndCountGroupByAICenter() ([]map[string]string, error) { | |||
countSql := `select ai_center,SUM( | |||
COALESCE(a.duration * | |||
CASE | |||
WHEN a.work_server_number = 0 THEN 1 | |||
ELSE COALESCE(a.work_server_number, 1) | |||
END * | |||
COALESCE(cloudbrain_spec.acc_cards_num, 1), 0) | |||
) as card_duration,count(*) num from | |||
(select id,duration,work_server_number,case when type=0 then 'OpenIOne' when type=1 then 'OpenITwo' when type=3 then 'OpenIChengdu' else split_part(ai_center, '+',1) | |||
end ai_center | |||
FROM public.cloudbrain ) a Left JOIN cloudbrain_spec on a.id = cloudbrain_spec.cloudbrain_id | |||
where ai_center!='' group by a.ai_center order by card_duration desc` | |||
return x.QueryString(countSql) | |||
} | |||
func GetCloudbrainTpyeDurationSum() ([]map[string]string, error) { | |||
countSql := "SELECT type,sum(duration) FROM public.cloudbrain group by type order by sum(duration) desc" | |||
return x.QueryString(countSql) | |||
@@ -5,6 +5,7 @@ import ( | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"xorm.io/builder" | |||
"xorm.io/xorm" | |||
) | |||
@@ -92,6 +93,27 @@ func getFileChunkByMD5AndUser(e Engine, md5 string, userID int64, typeCloudBrain | |||
return fileChunk, nil | |||
} | |||
// GetAttachmentByID returns attachment by given id | |||
func GetFileChunksByUserId(userId int64, lastTime int64, isUploadFinished bool) ([]*FileChunk, error) { | |||
return getFileChunksByUserId(x, userId, lastTime, isUploadFinished) | |||
} | |||
func getFileChunksByUserId(e Engine, userId int64, lastTime int64, isUploadFinished bool) ([]*FileChunk, error) { | |||
fileChunks := make([]*FileChunk, 0) | |||
cond := builder.NewCond() | |||
cond = cond.And(builder.Eq{"user_id": userId}) | |||
if lastTime > 0 { | |||
cond = cond.And(builder.Gte{"created_unix": lastTime}) | |||
} | |||
if !isUploadFinished { | |||
cond = cond.And(builder.Eq{"is_uploaded": 0}) | |||
} | |||
if err := e.Where(cond).Find(&fileChunks); err != nil { | |||
return nil, err | |||
} | |||
return fileChunks, nil | |||
} | |||
// GetAttachmentByID returns attachment by given id | |||
func GetFileChunkByUUID(uuid string) (*FileChunk, error) { | |||
return getFileChunkByUUID(x, uuid) | |||
@@ -0,0 +1,30 @@ | |||
package models | |||
type IPLocation struct { | |||
ID int64 `xorm:"pk autoincr"` | |||
IpAddr string `xorm:"unique"` | |||
Longitude string | |||
Latitude string | |||
} | |||
func CreateIPLocation(ipLocation *IPLocation) (err error) { | |||
_, err = x.Insert(ipLocation) | |||
return err | |||
} | |||
func GetIpLocation(ip string) (*IPLocation, error) { | |||
ipLocation := &IPLocation{IpAddr: ip} | |||
has, err := x.Get(ipLocation) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if has { | |||
return ipLocation, nil | |||
} else { | |||
return nil, ErrRecordNotExist{} | |||
} | |||
} |
@@ -176,8 +176,9 @@ func UpdateModelMigrateRecordByStep(record *ModelMigrateRecord) error { | |||
func GetUnfinishedModelMigrateRecords() ([]*ModelMigrateRecord, error) { | |||
records := make([]*ModelMigrateRecord, 0, 10) | |||
return records, x. | |||
Where(builder.NewCond().And(builder.In("current_step", UnFinishedMigrateSteps))). | |||
return records, x.Cols("model_migrate_record.id", "model_migrate_record.cloudbrain_id", "model_migrate_record.dest_bucket", "model_migrate_record.dest_endpoint", "model_migrate_record.dest_object_key", "model_migrate_record.dest_proxy", "model_migrate_record.src_bucket", "model_migrate_record.src_endpoint", "model_migrate_record.src_object_key", "model_migrate_record.status", "model_migrate_record.current_step", "model_migrate_record.retry_count", "model_migrate_record.created_unix", "model_migrate_record.updated_unix", "model_migrate_record.deleted_at", "model_migrate_record.remark").Table("model_migrate_record"). | |||
Join("inner", "cloudbrain", "cloudbrain.id = model_migrate_record.cloudbrain_id"). | |||
Where(builder.NewCond().And(builder.In("model_migrate_record.current_step", UnFinishedMigrateSteps)).And(builder.Eq{"cloudbrain.deleted_at": "0001-01-01 00:00:00"}.Or(builder.IsNull{"cloudbrain.deleted_at"}))). | |||
Limit(100). | |||
Find(&records) | |||
} | |||
@@ -241,3 +241,14 @@ func DeployStatusConvert(status string) string { | |||
return statusConvert | |||
} | |||
} | |||
func GetModelartsDeployFinishTimebyJobID(jobID string) (finishTime timeutil.TimeStamp, err error) { | |||
finishTime = timeutil.TimeStamp(0) | |||
deploy, err := GetModelartsDeployByJobID(jobID) | |||
if err != nil || deploy.CompleteUnix == timeutil.TimeStamp(0) { | |||
return finishTime, err | |||
} else { | |||
finishTime = deploy.CompleteUnix.Add(int64(30 * 60)) | |||
return finishTime, nil | |||
} | |||
} |
@@ -173,6 +173,7 @@ func init() { | |||
new(AiModelCollect), | |||
new(AiModelFile), | |||
new(ModelMigrateRecord), | |||
new(IPLocation), | |||
new(ModelartsDeploy), | |||
new(ModelartsDeployQueue), | |||
) | |||
@@ -332,6 +332,8 @@ func NotifyWatchers(actions ...*Action) error { | |||
func producer(actions ...*Action) { | |||
for _, action := range actions { | |||
if !action.IsPrivate { | |||
action.loadCloudbrain() | |||
action.FilterCloudbrainInfo() | |||
ActionChan <- action | |||
} | |||
} | |||
@@ -42,6 +42,7 @@ func GetTaskTypeFromAction(a ActionType) TaskType { | |||
ActionCreateGrampusGCUDebugTask, | |||
ActionCreateGrampusGCUTrainTask, | |||
ActionCreateGrampusMLUDebugTask, | |||
ActionCreateGrampusGPUOnlineInferTask, | |||
ActionCreateGrampusGPUTrainTask: | |||
return TaskCreateCloudbrainTask | |||
case ActionCreateRepo: | |||
@@ -13,6 +13,15 @@ type UserLoginLog struct { | |||
CreatedUnix timeutil.TimeStamp `xorm:"created"` | |||
} | |||
func GetIpByUID(uid int64) string { | |||
userLoginLog := new(UserLoginLog) | |||
has, err := xStatistic.Where("u_id=?", uid).Desc("id").Limit(1).Get(userLoginLog) | |||
if err != nil || !has { | |||
return "" | |||
} | |||
return userLoginLog.IpAddr | |||
} | |||
func SaveLoginInfoToDb(r *http.Request, u *User) { | |||
statictisSess := xStatistic.NewSession() | |||
defer statictisSess.Close() | |||
@@ -1,11 +1,12 @@ | |||
package wechat | |||
import ( | |||
"fmt" | |||
"time" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"fmt" | |||
"time" | |||
) | |||
type JobOperateType string | |||
@@ -157,6 +158,8 @@ func getJobTypeDisplayName(jobType string) string { | |||
switch jobType { | |||
case string(models.JobTypeDebug): | |||
return "调试任务" | |||
case string(models.JobTypeOnlineInference): | |||
return "在线推理" | |||
case string(models.JobTypeBenchmark): | |||
return "评测任务" | |||
case string(models.JobTypeTrain): | |||
@@ -378,7 +378,6 @@ func RepoAssignment() macaron.Handler { | |||
owner *models.User | |||
err error | |||
) | |||
userName := ctx.Params(":username") | |||
repoName := ctx.Params(":reponame") | |||
@@ -431,7 +430,6 @@ func RepoAssignment() macaron.Handler { | |||
if ctx.Written() { | |||
return | |||
} | |||
ctx.Repo.RepoLink = repo.Link() | |||
ctx.Data["RepoLink"] = ctx.Repo.RepoLink | |||
ctx.Data["RepoRelPath"] = ctx.Repo.Owner.Name + "/" + ctx.Repo.Repository.Name | |||
@@ -464,7 +462,6 @@ func RepoAssignment() macaron.Handler { | |||
ctx.ServerError("CanUserFork", err) | |||
return | |||
} | |||
ctx.Data["DisableSSH"] = setting.SSH.Disabled | |||
ctx.Data["ExposeAnonSSH"] = setting.SSH.ExposeAnonymous | |||
ctx.Data["DisableHTTP"] = setting.Repository.DisableHTTPGit | |||
@@ -581,7 +578,6 @@ func RepoAssignment() macaron.Handler { | |||
} | |||
ctx.Data["CanCompareOrPull"] = canCompare | |||
ctx.Data["PullRequestCtx"] = ctx.Repo.PullRequest | |||
if ctx.Query("go-get") == "1" { | |||
ctx.Data["GoGetImport"] = ComposeGoGetImport(owner.Name, repo.Name) | |||
prefix := setting.AppURL + path.Join(owner.Name, repo.Name, "src", "branch", ctx.Repo.BranchName) | |||
@@ -696,7 +692,6 @@ func RepoRefByType(refType RepoRefType) macaron.Handler { | |||
if ctx.Repo.Repository.IsEmpty { | |||
return | |||
} | |||
var ( | |||
refName string | |||
err error | |||
@@ -718,7 +713,6 @@ func RepoRefByType(refType RepoRefType) macaron.Handler { | |||
} | |||
}() | |||
} | |||
// Get default branch. | |||
if len(ctx.Params("*")) == 0 { | |||
refName = ctx.Repo.Repository.DefaultBranch | |||
@@ -789,7 +783,6 @@ func RepoRefByType(refType RepoRefType) macaron.Handler { | |||
return | |||
} | |||
} | |||
ctx.Data["BranchName"] = ctx.Repo.BranchName | |||
ctx.Data["BranchNameSubURL"] = ctx.Repo.BranchNameSubURL() | |||
ctx.Data["CommitID"] = ctx.Repo.CommitID | |||
@@ -805,7 +798,6 @@ func RepoRefByType(refType RepoRefType) macaron.Handler { | |||
return | |||
} | |||
ctx.Data["CommitsCount"] = ctx.Repo.CommitsCount | |||
ctx.Next() | |||
} | |||
} | |||
@@ -133,10 +133,12 @@ func getDatasetGrampus(datasetInfos map[string]models.DatasetInfo) []models.Gram | |||
endPoint := getEndPoint() | |||
for _, datasetInfo := range datasetInfos { | |||
datasetGrampus = append(datasetGrampus, models.GrampusDataset{ | |||
Name: datasetInfo.FullName, | |||
Bucket: setting.Bucket, | |||
EndPoint: endPoint, | |||
ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, | |||
Name: datasetInfo.FullName, | |||
Bucket: setting.Bucket, | |||
EndPoint: endPoint, | |||
ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, | |||
ReadOnly: true, | |||
ContainerPath: "/tmp/dataset/" + datasetInfo.FullName, | |||
}) | |||
} | |||
@@ -352,21 +354,26 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobId str | |||
for i, ckptName := range req.CkptNames { | |||
if len(req.CkptNames) != 0 { | |||
ckptGrampus = models.GrampusDataset{ | |||
Name: ckptName, | |||
Bucket: setting.Bucket, | |||
EndPoint: getEndPoint(), | |||
ObjectKey: req.PreTrainModelPaths[i], | |||
Name: ckptName, | |||
Bucket: setting.Bucket, | |||
EndPoint: getEndPoint(), | |||
ObjectKey: req.PreTrainModelPaths[i], | |||
ContainerPath: "/tmp/pretrainmodel/" + req.CkptName, | |||
ReadOnly: true, | |||
} | |||
} | |||
modelGrampus = append(modelGrampus, ckptGrampus) | |||
} | |||
codeGrampus = models.GrampusDataset{ | |||
Name: req.CodeName, | |||
Bucket: setting.Bucket, | |||
EndPoint: getEndPoint(), | |||
ObjectKey: req.CodeObsPath + cloudbrain.DefaultBranchName + ".zip", | |||
Name: req.CodeName, | |||
Bucket: setting.Bucket, | |||
EndPoint: getEndPoint(), | |||
ObjectKey: req.CodeObsPath + cloudbrain.DefaultBranchName + ".zip", | |||
ReadOnly: false, | |||
ContainerPath: "/tmp/code/" + cloudbrain.DefaultBranchName + ".zip", | |||
} | |||
outputGrampus = models.GrampusDataset{ | |||
ContainerPath: "/tmp/output", | |||
GetBackEndpoint: getEndPoint(), | |||
} | |||
} else if ProcessorTypeGPU == req.ProcessType { | |||
@@ -164,6 +164,10 @@ sendjob: | |||
} | |||
func GetNotebookJob(jobID string) (*models.GrampusNotebookResponse, error) { | |||
if jobID == "" { | |||
return nil, fmt.Errorf("jobID is emmpty") | |||
} | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GrampusNotebookResponse | |||
@@ -295,15 +299,20 @@ sendjob: | |||
return &result, nil | |||
} | |||
func GetTrainJobLog(jobID string) (string, error) { | |||
func GetTrainJobLog(jobID string, nodeId ...int) (string, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var logContent string | |||
url := HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log" | |||
if len(nodeId) > 0 { | |||
url = HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log/node/" + strconv.Itoa(nodeId[0]) | |||
} | |||
res, err := client.R(). | |||
SetAuthToken(TOKEN). | |||
SetResult(&logContent). | |||
Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/log") | |||
Get(url) | |||
if err != nil { | |||
return logContent, fmt.Errorf("resty GetTrainJobLog: %v", err) | |||
@@ -324,11 +333,14 @@ func GetTrainJobLog(jobID string) (string, error) { | |||
return logContent, nil | |||
} | |||
func GetGrampusMetrics(jobID string, startTime int64, endTime int64) (models.NewModelArtsMetricStatisticResult, error) { | |||
func GetGrampusMetrics(jobID string, startTime int64, endTime int64, nodeId ...int) (models.NewModelArtsMetricStatisticResult, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.NewModelArtsMetricStatisticResult | |||
url := HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics" | |||
if len(nodeId) > 0 { | |||
url = HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics/node/" + strconv.Itoa(nodeId[0]) | |||
} | |||
if startTime > 0 { | |||
var step int64 = 60 | |||
@@ -0,0 +1,44 @@ | |||
package ipinfo | |||
import ( | |||
"crypto/tls" | |||
"fmt" | |||
"net/http" | |||
"code.gitea.io/gitea/modules/setting" | |||
"github.com/go-resty/resty/v2" | |||
) | |||
var restyClient *resty.Client | |||
type IpInfoResponse struct { | |||
Ip string `json:"ip"` | |||
Loc string `json:"loc"` | |||
Bogon bool `json:"bogon"` | |||
} | |||
func getRestyClient() *resty.Client { | |||
if restyClient == nil { | |||
restyClient = resty.New() | |||
restyClient.SetTLSClientConfig(&tls.Config{InsecureSkipVerify: true}) | |||
} | |||
return restyClient | |||
} | |||
func GetLocationByIp(ip string) (*IpInfoResponse, error) { | |||
client := getRestyClient() | |||
var result IpInfoResponse | |||
res, err := client.R(). | |||
SetHeader("Accept", "application/json"). | |||
SetAuthToken(setting.IPInfo.Token). | |||
SetResult(&result). | |||
Get(setting.IPInfo.Host + "/" + ip) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if res.StatusCode() != http.StatusOK { | |||
return nil, fmt.Errorf("http status is %d", res.StatusCode()) | |||
} | |||
return &result, nil | |||
} |
@@ -40,7 +40,7 @@ const maxSinglePutObjectSize = 1024 * 1024 * 1024 * 5 | |||
// maxMultipartPutObjectSize - maximum size 5TiB of object for | |||
// Multipart operation. | |||
const MaxMultipartPutObjectSize = 1024 * 1024 * 1024 * 1024 * 5 | |||
const MaxMultipartPutObjectSize = 1024 * 1024 * 1024 * 200 | |||
// unsignedPayload - value to be set to X-Amz-Content-Sha256 header when | |||
// we don't want to sign the request payload | |||
@@ -45,7 +45,7 @@ func consumerOrder(in <-chan *models.ModelApp, url string) { | |||
continue | |||
} | |||
log.Info("goroutine id=" + fmt.Sprint(goroutine_id) + " wenxin text=" + modelApp.Desc) | |||
result, err := modelarts.CreateWenXinJob(modelApp, url) | |||
result, err := modelarts.CreateWenXinJobToCD(modelApp, url) | |||
if err == nil { | |||
if !modelarts.SendPictureReivew(result.Result) { | |||
modelApp.Status = -1 | |||
@@ -1036,14 +1036,16 @@ func DelTrainJob(jobID string) (*models.TrainJobResult, error) { | |||
var result models.TrainJobResult | |||
//get cloudbrain job by jobid | |||
finetuneJob, _ := models.GetCloudbrainByJobID(jobID) | |||
log.Info("调试:%s", finetuneJob.FineTune) | |||
if finetuneJob.FineTune { | |||
err := ServiceDelete(jobID) | |||
if err != nil { | |||
log.Error("盘古微调部署: Delete Deploy failed:%s %v", jobID, err.Error()) | |||
return &result, err | |||
if finetuneJob, err := models.GetCloudbrainByJobID(jobID); finetuneJob != nil && err == nil { | |||
if finetuneJob.FineTune { | |||
err := ServiceDelete(jobID) | |||
if err != nil { | |||
log.Error("panguService: Delete Deploy failed:%s %v", jobID, err.Error()) | |||
return nil, err | |||
} | |||
} | |||
} else if err != nil { | |||
log.Warn("DelTrainJob GetCloudbrainByJobID from DB failed:%s %v", jobID, err.Error()) | |||
} | |||
retry := 0 | |||
@@ -1145,14 +1147,16 @@ func DelTrainJobVersion(jobID string, versionID string) (*models.TrainJobResult, | |||
var result models.TrainJobResult | |||
//get cloudbrain job by jobid | |||
finetuneJob, _ := models.GetCloudbrainByJobID(jobID) | |||
log.Info("调试:%s", finetuneJob.FineTune) | |||
if finetuneJob.FineTune { | |||
err := ServiceDelete(jobID) | |||
if err != nil { | |||
log.Error("盘古微调部署: Delete Deploy failed:%s %v", jobID, err.Error()) | |||
return &result, err | |||
if finetuneJob, err := models.GetCloudbrainByJobID(jobID); finetuneJob != nil && err == nil { | |||
if finetuneJob.FineTune { | |||
err := ServiceDelete(jobID) | |||
if err != nil { | |||
log.Error("panguService: Delete Deploy failed:%s %v", jobID, err.Error()) | |||
return nil, err | |||
} | |||
} | |||
} else if err != nil { | |||
log.Warn("DelTrainJobVersion GetCloudbrainByJobID failed, cannnot get job from DB:%s %v", jobID, err.Error()) | |||
} | |||
retry := 0 | |||
@@ -1859,28 +1863,30 @@ sendjob: | |||
func ServiceDelete(jobID string) error { | |||
if deploy, _ := models.GetModelartsDeployByJobID(jobID); deploy != nil { | |||
if deploy.Status == "STOP" || deploy.Status == "FAILED" { | |||
if deploy.ServiceID != "" { | |||
err := DeleteDeployService(deploy.ServiceID) | |||
if err != nil { | |||
if err := DeleteDeployService(deploy.ServiceID); err != nil { | |||
log.Error("panguService: Delete DeployService API failed:%s %v", jobID, err.Error()) | |||
return err | |||
} else { | |||
log.Info("panguService: deploy service delete success %s", jobID) | |||
} | |||
log.Info("panguService: deploy service delete success %s", jobID) | |||
} | |||
if deploy.ModelID != "" { | |||
err := DeleteDeployModel(deploy.ModelID) | |||
if err != nil { | |||
if err := DeleteDeployModel(deploy.ModelID); err != nil { | |||
log.Error("panguService: Delete DeployModel API failed:%s %v", jobID, err.Error()) | |||
return err | |||
} else { | |||
log.Info("panguService: deploy model delete success %s", jobID) | |||
} | |||
log.Info("panguService: deploy model delete success %s", jobID) | |||
} | |||
err := models.DeleteModelartsDeploy(jobID) | |||
if err != nil { | |||
if err := models.DeleteModelartsDeploy(jobID); err != nil { | |||
log.Error("panguService: Delete ModelartsDeploy from DB failed:%s %v", jobID, err.Error()) | |||
return err | |||
} else { | |||
log.Info("panguService: deploy DB record delete success %s", jobID) | |||
} | |||
log.Info("panguService: deploy DB record delete success %s", jobID) | |||
} else { | |||
log.Error("the job(%s) is a deploying finetune job, can be not deleted", jobID) | |||
return fmt.Errorf("1") | |||
@@ -1,6 +1,8 @@ | |||
package modelarts | |||
import ( | |||
"bytes" | |||
"crypto/tls" | |||
"encoding/base64" | |||
"encoding/json" | |||
"fmt" | |||
@@ -13,6 +15,7 @@ import ( | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/modelarts_gateway/core" | |||
"code.gitea.io/gitea/modules/setting" | |||
) | |||
@@ -29,6 +32,66 @@ type WenXinResult struct { | |||
Result string `json:"result"` | |||
} | |||
var ( | |||
cdHttpClient *http.Client | |||
) | |||
func getCDHttpClient() *http.Client { | |||
if cdHttpClient == nil { | |||
cdHttpClient = &http.Client{ | |||
Timeout: 30 * time.Second, | |||
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}, | |||
} | |||
} | |||
return cdHttpClient | |||
} | |||
func CreateWenXinJobToCD(modelapp *models.ModelApp, url string) (*WenXinResult, error) { | |||
createJobParams := &CreateWenXinParams{ | |||
Data: WenXinText{ | |||
Prompt: modelapp.Desc, | |||
}, | |||
Parameters: make(map[string]string), | |||
} | |||
var result WenXinResult | |||
client := getCDHttpClient() | |||
s := core.Signer{ | |||
Key: setting.ModelartsCD.AccessKey, | |||
Secret: setting.ModelartsCD.SecretKey, | |||
} | |||
req, _ := json.Marshal(createJobParams) | |||
r, _ := http.NewRequest(http.MethodPost, url, ioutil.NopCloser(bytes.NewBuffer(req))) | |||
log.Info("send to cd modelarts") | |||
r.Header.Add("content-type", "application/json") | |||
s.Sign(r) | |||
res, err := client.Do(r) | |||
if err == nil { | |||
if res.StatusCode == 200 { | |||
defer res.Body.Close() | |||
body, err := ioutil.ReadAll(res.Body) | |||
if err != nil { | |||
log.Error("ioutil.ReadAll failed: %s", err.Error()) | |||
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error()) | |||
} | |||
err = json.Unmarshal(body, &result) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
return &result, nil | |||
} else { | |||
log.Info("res.status=" + fmt.Sprint(res.StatusCode)) | |||
return nil, fmt.Errorf("Service unavailable") | |||
} | |||
} else { | |||
log.Info("error =" + err.Error()) | |||
return nil, fmt.Errorf("Service unavailable") | |||
} | |||
} | |||
func CreateWenXinJob(modelapp *models.ModelApp, url string) (*WenXinResult, error) { | |||
createJobParams := &CreateWenXinParams{ | |||
Data: WenXinText{ | |||
@@ -0,0 +1,24 @@ | |||
package setting | |||
var ScreenMap = struct { | |||
ShowData bool | |||
MinValue int | |||
MaxValue int | |||
}{} | |||
var IPInfo = struct { | |||
Host string | |||
Token string | |||
}{} | |||
func NewScreenMapConfig() { | |||
sec := Cfg.Section("Screen") | |||
ScreenMap.ShowData = sec.Key("ShowData").MustBool(false) | |||
ScreenMap.MinValue = sec.Key("MinValue").MustInt(130) | |||
ScreenMap.MaxValue = sec.Key("MaxValue").MustInt(190) | |||
sec = Cfg.Section("IPInfo") | |||
IPInfo.Host = sec.Key("Host").MustString("https://ipinfo.io") | |||
IPInfo.Token = sec.Key("Token").MustString("df2b002afe582a") | |||
} |
@@ -70,6 +70,8 @@ type C2NetSequenceInfo struct { | |||
Name string `json:"name"` | |||
Content string `json:"content"` | |||
ContentEN string `json:"content_en"` | |||
Loc string `json:"loc"` | |||
Type string `json:"type"` | |||
} | |||
type C2NetSqInfos struct { | |||
@@ -624,20 +626,21 @@ var ( | |||
//grampus config | |||
Grampus = struct { | |||
Env string | |||
Host string | |||
UserName string | |||
Password string | |||
SpecialPools string | |||
C2NetSequence string | |||
SyncScriptProject string | |||
LocalCenterID string | |||
GPULocalCenterID string | |||
AiCenterInfo string | |||
AiCenterCodeAndNameInfo string | |||
UsageRateBeginTime string | |||
GPUImageCommonName string | |||
MultiNode string | |||
Env string | |||
Host string | |||
UserName string | |||
Password string | |||
SpecialPools string | |||
C2NetSequence string | |||
SyncScriptProject string | |||
LocalCenterID string | |||
GPULocalCenterID string | |||
AiCenterInfo string | |||
AiCenterCodeAndNameInfo string | |||
AiCenterCodeAndNameAndLocInfo string | |||
UsageRateBeginTime string | |||
GPUImageCommonName string | |||
MultiNode string | |||
}{} | |||
ClearStrategy = struct { | |||
@@ -655,6 +658,8 @@ var ( | |||
C2NetMapInfo map[string]*C2NetSequenceInfo | |||
AiCenterCodeAndNameMapInfo map[string]*C2NetSequenceInfo | |||
AiCenterCodeAndNameAndLocMapInfo map[string]*C2NetSequenceInfo | |||
//elk config | |||
ElkUrl string | |||
ElkUser string | |||
@@ -842,6 +847,13 @@ var ( | |||
ModelApp = struct { | |||
DesensitizationUrl string | |||
}{} | |||
FLOW_CONTROL = struct { | |||
ATTACHEMENT_NUM_A_USER_LAST24HOUR int | |||
ATTACHEMENT_NUM_A_USER_LAST10M int | |||
ATTACHEMENT_SIZE_A_USER int64 //G | |||
ALL_ATTACHEMENT_NUM_SDK int | |||
}{} | |||
) | |||
// DateLang transforms standard language locale name to corresponding value in datetime plugin. | |||
@@ -1623,30 +1635,7 @@ func NewContext() { | |||
UserBasePath = sec.Key("BASE_PATH_USER").MustString("users/") | |||
PROXYURL = sec.Key("PROXY_URL").MustString("") | |||
sec = Cfg.Section("modelarts") | |||
ModelArtsHost = sec.Key("ENDPOINT").MustString("") | |||
IamHost = sec.Key("IAMHOST").MustString("") | |||
ProjectID = sec.Key("PROJECT_ID").MustString("") | |||
ProjectName = sec.Key("PROJECT_NAME").MustString("") | |||
ModelArtsUsername = sec.Key("USERNAME").MustString("") | |||
ModelArtsPassword = sec.Key("PASSWORD").MustString("") | |||
ModelArtsDomain = sec.Key("DOMAIN").MustString("") | |||
AllowedOrg = sec.Key("ORGANIZATION").MustString("") | |||
ProfileID = sec.Key("PROFILE_ID").MustString("") | |||
PoolInfos = sec.Key("POOL_INFOS").MustString("") | |||
ImageInfos = sec.Key("IMAGE_INFOS").MustString("") | |||
Capacity = sec.Key("CAPACITY").MustInt(100) | |||
MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30) | |||
ResourcePools = sec.Key("Resource_Pools").MustString("") | |||
Engines = sec.Key("Engines").MustString("") | |||
EngineVersions = sec.Key("Engine_Versions").MustString("") | |||
FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("") | |||
TrainJobFLAVORINFOS = sec.Key("TrainJob_FLAVOR_INFOS").MustString("") | |||
ModelArtsSpecialPools = sec.Key("SPECIAL_POOL").MustString("") | |||
ModelArtsMultiNode = sec.Key("MULTI_NODE").MustString("") | |||
ModelArtsShareAddr = sec.Key("ModelArts_Share_Addr").MustString("192.168.0.30:/") | |||
ModelArtsMountPath = sec.Key("ModelArts_Mount_Path").MustString("/cache/sfs") | |||
ModelArtsNasType = sec.Key("ModelArts_Nas_Type").MustString("nfs") | |||
GetModelartsConfig() | |||
sec = Cfg.Section("elk") | |||
ElkUrl = sec.Key("ELKURL").MustString("") | |||
@@ -1742,13 +1731,43 @@ func NewContext() { | |||
BaiduWenXin.RUN_WORKERS = sec.Key("RUN_WORKERS").MustInt(1) | |||
BaiduWenXin.MODEL_SERVERS = sec.Key("MODEL_SERVERS").MustInt(1) | |||
getGrampusConfig() | |||
getModelartsCDConfig() | |||
GetGrampusConfig() | |||
GetModelartsCDConfig() | |||
getModelConvertConfig() | |||
getModelSafetyConfig() | |||
getModelAppConfig() | |||
getClearStrategy() | |||
NewScreenMapConfig() | |||
} | |||
func GetModelartsConfig() { | |||
sec := Cfg.Section("modelarts") | |||
ModelArtsHost = sec.Key("ENDPOINT").MustString("") | |||
IamHost = sec.Key("IAMHOST").MustString("") | |||
ProjectID = sec.Key("PROJECT_ID").MustString("") | |||
ProjectName = sec.Key("PROJECT_NAME").MustString("") | |||
ModelArtsUsername = sec.Key("USERNAME").MustString("") | |||
ModelArtsPassword = sec.Key("PASSWORD").MustString("") | |||
ModelArtsDomain = sec.Key("DOMAIN").MustString("") | |||
AllowedOrg = sec.Key("ORGANIZATION").MustString("") | |||
ProfileID = sec.Key("PROFILE_ID").MustString("") | |||
PoolInfos = sec.Key("POOL_INFOS").MustString("") | |||
ImageInfos = sec.Key("IMAGE_INFOS").MustString("") | |||
Capacity = sec.Key("CAPACITY").MustInt(100) | |||
MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30) | |||
ResourcePools = sec.Key("Resource_Pools").MustString("") | |||
Engines = sec.Key("Engines").MustString("") | |||
EngineVersions = sec.Key("Engine_Versions").MustString("") | |||
FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("") | |||
TrainJobFLAVORINFOS = sec.Key("TrainJob_FLAVOR_INFOS").MustString("") | |||
ModelArtsSpecialPools = sec.Key("SPECIAL_POOL").MustString("") | |||
ModelArtsMultiNode = sec.Key("MULTI_NODE").MustString("") | |||
ModelArtsShareAddr = sec.Key("ModelArts_Share_Addr").MustString("192.168.0.30:/") | |||
ModelArtsMountPath = sec.Key("ModelArts_Mount_Path").MustString("/cache/sfs") | |||
ModelArtsNasType = sec.Key("ModelArts_Nas_Type").MustString("nfs") | |||
getFineTuneConfig() | |||
getFlowControlConfig() | |||
} | |||
func getModelSafetyConfig() { | |||
@@ -1787,14 +1806,20 @@ func getModelConvertConfig() { | |||
ModelConvert.PaddleOnnxBootFile = sec.Key("PaddleOnnxBootFile").MustString("convert_paddle.py") | |||
ModelConvert.MXnetOnnxBootFile = sec.Key("MXnetOnnxBootFile").MustString("convert_mxnet.py") | |||
} | |||
func getFlowControlConfig() { | |||
sec := Cfg.Section("flow_control") | |||
FLOW_CONTROL.ALL_ATTACHEMENT_NUM_SDK = sec.Key("ALL_ATTACHEMENT_NUM_SDK").MustInt(100) | |||
FLOW_CONTROL.ATTACHEMENT_NUM_A_USER_LAST24HOUR = sec.Key("ATTACHEMENT_NUM_A_USER_LAST24HOUR").MustInt(1000) | |||
FLOW_CONTROL.ATTACHEMENT_NUM_A_USER_LAST10M = sec.Key("ATTACHEMENT_NUM_A_USER_LAST10M").MustInt(10) | |||
FLOW_CONTROL.ATTACHEMENT_SIZE_A_USER = sec.Key("ATTACHEMENT_SIZE_A_USER").MustInt64(500) | |||
} | |||
func getModelAppConfig() { | |||
sec := Cfg.Section("model_app") | |||
ModelApp.DesensitizationUrl = sec.Key("desensitization_url").MustString("") | |||
} | |||
func getModelartsCDConfig() { | |||
func GetModelartsCDConfig() { | |||
sec := Cfg.Section("modelarts-cd") | |||
ModelartsCD.Enabled = sec.Key("ENABLED").MustBool(false) | |||
@@ -1821,7 +1846,7 @@ func getClearStrategy() { | |||
ClearStrategy.RunAtStart = sec.Key("RUN_AT_START").MustBool(false) | |||
} | |||
func getGrampusConfig() { | |||
func GetGrampusConfig() { | |||
sec := Cfg.Section("grampus") | |||
Grampus.Env = sec.Key("ENV").MustString("TEST") | |||
@@ -1831,6 +1856,8 @@ func getGrampusConfig() { | |||
Grampus.SpecialPools = sec.Key("SPECIAL_POOL").MustString("") | |||
Grampus.C2NetSequence = sec.Key("C2NET_SEQUENCE").MustString("{\"sequence\":[{\"id\":1,\"name\":\"cloudbrain_one\",\"content\":\"鹏城云脑一号\",\"content_en\":\"Pencheng Cloudbrain Ⅰ\"},{\"id\":2,\"name\":\"cloudbrain_two\",\"content\":\"鹏城云脑二号\",\"content_en\":\"Pencheng Cloudbrain Ⅱ\"},{\"id\":3,\"name\":\"beida\",\"content\":\"北大人工智能集群系统\",\"content_en\":\"Peking University AI Center\"},{\"id\":4,\"name\":\"hefei\",\"content\":\"合肥类脑智能开放平台\",\"content_en\":\"Hefei AI Center\"},{\"id\":5,\"name\":\"wuhan\",\"content\":\"武汉人工智能计算中心\",\"content_en\":\"Wuhan AI Center\"},{\"id\":6,\"name\":\"xian\",\"content\":\"西安未来人工智能计算中心\",\"content_en\":\"Xi'an AI Center\"},{\"id\":7,\"pclcci\":\"more\",\"content\":\"鹏城云计算所\",\"content_en\":\"Pengcheng Cloud Computing Institute\"},{\"id\":8,\"name\":\"xuchang\",\"content\":\"中原人工智能计算中心\",\"content_en\":\"Zhongyuan AI Center\"},{\"id\":9,\"name\":\"chengdu\",\"content\":\"成都人工智能计算中心\",\"content_en\":\"Chengdu AI Center\"},{\"id\":10,\"name\":\"more\",\"content\":\"横琴先进智能计算中心\",\"content_en\":\"Hengqin AI Center\"},{\"id\":11,\"name\":\"more\",\"content\":\"国家超级计算济南中心\",\"content_en\":\"HPC & AI Center\"}]}") | |||
Grampus.AiCenterCodeAndNameInfo = sec.Key("AI_CENTER_CODE_AND_NAME").MustString("{\"sequence\":[{\"id\":1,\"name\":\"cloudbrain_one\",\"content\":\"鹏城云脑一号\",\"content_en\":\"Pencheng Cloudbrain Ⅰ\"},{\"id\":2,\"name\":\"cloudbrain_two\",\"content\":\"鹏城云脑二号\",\"content_en\":\"Pencheng Cloudbrain Ⅱ\"},{\"id\":3,\"name\":\"beida\",\"content\":\"北大人工智能集群系统\",\"content_en\":\"Peking University AI Center\"},{\"id\":4,\"name\":\"hefei\",\"content\":\"合肥类脑智能开放平台\",\"content_en\":\"Hefei AI Center\"},{\"id\":5,\"name\":\"wuhan\",\"content\":\"武汉人工智能计算中心\",\"content_en\":\"Wuhan AI Center\"},{\"id\":6,\"name\":\"xian\",\"content\":\"西安未来人工智能计算中心\",\"content_en\":\"Xi'an AI Center\"},{\"id\":7,\"pclcci\":\"more\",\"content\":\"鹏城云计算所\",\"content_en\":\"Pengcheng Cloud Computing Institute\"},{\"id\":8,\"name\":\"xuchang\",\"content\":\"中原人工智能计算中心\",\"content_en\":\"Zhongyuan AI Center\"},{\"id\":9,\"name\":\"chengdu\",\"content\":\"成都人工智能计算中心\",\"content_en\":\"Chengdu AI Center\"},{\"id\":10,\"name\":\"more\",\"content\":\"横琴先进智能计算中心\",\"content_en\":\"Hengqin AI Center\"},{\"id\":11,\"name\":\"more\",\"content\":\"国家超级计算济南中心\",\"content_en\":\"HPC & AI Center\"}]}") | |||
Grampus.AiCenterCodeAndNameAndLocInfo = sec.Key("AI_CENTER_CODE_AND_NAME_AND_LOC").MustString("{\"sequence\":[{\"id\":1,\"name\":\"cloudbrain_one\",\"content\":\"鹏城云脑一号\",\"content_en\":\"Pencheng Cloudbrain Ⅰ\"},{\"id\":2,\"name\":\"cloudbrain_two\",\"content\":\"鹏城云脑二号\",\"content_en\":\"Pencheng Cloudbrain Ⅱ\"},{\"id\":3,\"name\":\"beida\",\"content\":\"北大人工智能集群系统\",\"content_en\":\"Peking University AI Center\"},{\"id\":4,\"name\":\"hefei\",\"content\":\"合肥类脑智能开放平台\",\"content_en\":\"Hefei AI Center\"},{\"id\":5,\"name\":\"wuhan\",\"content\":\"武汉人工智能计算中心\",\"content_en\":\"Wuhan AI Center\"},{\"id\":6,\"name\":\"xian\",\"content\":\"西安未来人工智能计算中心\",\"content_en\":\"Xi'an AI Center\"},{\"id\":7,\"pclcci\":\"more\",\"content\":\"鹏城云计算所\",\"content_en\":\"Pengcheng Cloud Computing Institute\"},{\"id\":8,\"name\":\"xuchang\",\"content\":\"中原人工智能计算中心\",\"content_en\":\"Zhongyuan AI Center\"},{\"id\":9,\"name\":\"chengdu\",\"content\":\"成都人工智能计算中心\",\"content_en\":\"Chengdu AI Center\"},{\"id\":10,\"name\":\"more\",\"content\":\"横琴先进智能计算中心\",\"content_en\":\"Hengqin AI Center\"},{\"id\":11,\"name\":\"more\",\"content\":\"国家超级计算济南中心\",\"content_en\":\"HPC & AI Center\"}]}") | |||
Grampus.UsageRateBeginTime = sec.Key("USAGE_RATE_BEGIN_TIME").MustString("2021-01-01 00:00:00") | |||
Grampus.GPUImageCommonName = sec.Key("GPU_IMAGE_COMMON_NAME").MustString("image") | |||
if Grampus.C2NetSequence != "" { | |||
@@ -1842,6 +1869,15 @@ func getGrampusConfig() { | |||
C2NetMapInfo[value.Name] = value | |||
} | |||
} | |||
if Grampus.AiCenterCodeAndNameAndLocInfo != "" { | |||
if err := json.Unmarshal([]byte(Grampus.AiCenterCodeAndNameAndLocInfo), &C2NetInfos); err != nil { | |||
log.Error("Unmarshal(AiCenterCodeAndNameLocInfo) failed:%v", err) | |||
} | |||
AiCenterCodeAndNameAndLocMapInfo = make(map[string]*C2NetSequenceInfo) | |||
for _, value := range C2NetInfos.C2NetSqInfo { | |||
AiCenterCodeAndNameAndLocMapInfo[value.Name] = value | |||
} | |||
} | |||
if Grampus.AiCenterCodeAndNameInfo != "" { | |||
if err := json.Unmarshal([]byte(Grampus.AiCenterCodeAndNameInfo), &C2NetInfos); err != nil { | |||
log.Error("Unmarshal(AiCenterCodeAndNameInfo) failed:%v", err) | |||
@@ -1851,6 +1887,7 @@ func getGrampusConfig() { | |||
AiCenterCodeAndNameMapInfo[value.Name] = value | |||
} | |||
} | |||
Grampus.SyncScriptProject = sec.Key("SYNC_SCRIPT_PROJECT").MustString("script_for_grampus") | |||
Grampus.LocalCenterID = sec.Key("LOCAL_CENTER_ID").MustString("cloudbrain2") | |||
Grampus.GPULocalCenterID = sec.Key("GPU_LOCAL_CENTER_ID").MustString("openi") | |||
@@ -1984,22 +2021,20 @@ func ensureLFSDirectory() { | |||
} | |||
func getNotebookImageInfos() { | |||
if StImageInfos == nil { | |||
if ModelartsCD.Enabled { | |||
json.Unmarshal([]byte(ModelartsCD.ImageInfos), &StImageInfos) | |||
} else { | |||
json.Unmarshal([]byte(ImageInfos), &StImageInfos) | |||
} | |||
if ModelartsCD.Enabled { | |||
json.Unmarshal([]byte(ModelartsCD.ImageInfos), &StImageInfos) | |||
} else { | |||
json.Unmarshal([]byte(ImageInfos), &StImageInfos) | |||
} | |||
} | |||
func getNotebookFlavorInfos() { | |||
if StFlavorInfo == nil { | |||
if ModelartsCD.Enabled { | |||
json.Unmarshal([]byte(ModelartsCD.FlavorInfos), &StFlavorInfo) | |||
} else { | |||
json.Unmarshal([]byte(FlavorInfos), &StFlavorInfo) | |||
} | |||
if ModelartsCD.Enabled { | |||
json.Unmarshal([]byte(ModelartsCD.FlavorInfos), &StFlavorInfo) | |||
} else { | |||
json.Unmarshal([]byte(FlavorInfos), &StFlavorInfo) | |||
} | |||
} | |||
@@ -99,6 +99,7 @@ type CreateFileNotebookJobOption struct { | |||
OwnerName string `json:"owner_name" binding:"Required"` | |||
ProjectName string `json:"project_name" binding:"Required"` | |||
JobId string `json:"job_id"` | |||
ID int64 `json:"id"` | |||
} | |||
type Cloudbrain struct { | |||
@@ -794,7 +794,7 @@ func licenses() []string { | |||
// Dataset tasks | |||
func tasks() []string { | |||
return []string{"machine_translation", "question_answering_system", "information_retrieval", "knowledge_graph", "text_annotation", "text_categorization", "emotion_analysis", "language_modeling", "speech_recognition", "automatic_digest", "information_extraction", "description_generation", "image_classification", "face_recognition", "image_search", "target_detection", "image_description_generation", "vehicle_license_plate_recognition", "medical_image_analysis", "unmanned", "unmanned_security", "drone", "vr_ar", "2_d_vision", "2.5_d_vision", "3_d_reconstruction", "image_processing", "video_processing", "visual_input_system", "speech_coding", "speech_enhancement", "speech_synthesis","ROS_hmci"} | |||
return []string{"machine_translation", "question_answering_system", "information_retrieval", "knowledge_graph", "text_annotation", "text_categorization", "emotion_analysis", "language_modeling", "speech_recognition", "automatic_digest", "information_extraction", "description_generation", "image_classification", "face_recognition", "image_search", "target_detection", "image_description_generation", "vehicle_license_plate_recognition", "medical_image_analysis", "unmanned", "unmanned_security", "drone", "vr_ar", "2_d_vision", "2.5_d_vision", "3_d_reconstruction", "image_processing", "video_processing", "visual_input_system", "speech_coding", "speech_enhancement", "speech_synthesis", "ros_hmci_datasets"} | |||
} | |||
func GetRefType(ref string) string { | |||
@@ -933,7 +933,7 @@ task.speech_coding= speech coding | |||
task.speech_enhancement= speech enhancement | |||
task.speech_recognition= speech recognition | |||
task.speech_synthesis= speech synthesis | |||
task.ROS_hmci=ROS-hmci Community | |||
task.ros_hmci_datasets=ROS-hmci datasets | |||
category.computer_vision= computer vision | |||
category.natural_language_processing= natural language processing | |||
category.speech_processing= speech processing | |||
@@ -968,7 +968,8 @@ download = Download | |||
modify_description = Modify Description | |||
set_public = Set Public | |||
set_private = Set Private | |||
annotation = Annotation | |||
annotation = Image Annotation | |||
more_annotation = More Annotation | |||
upload_dataset_file = Upload Dataset File | |||
file_description = File Description | |||
data_upload = Dataset Upload | |||
@@ -1091,6 +1092,7 @@ repo_mirror_add=Mirror Project Increment | |||
repo_self_add=Custom Project Increment | |||
debug=Debug | |||
online_debug = Start | |||
debug_again=Restart | |||
stop=Stop | |||
delete=Delete | |||
@@ -1267,6 +1269,7 @@ cloudbrain.morethanonejob=You already have a running or waiting task, create it | |||
cloudbrain.morethanonejob1=You have created an <span style="color:rgba(242, 113, 28, 1);"> equivalent task </span> that is waiting or running, please wait for the task to finish before creating it. | |||
cloudbrain.morethanonejob2=You can view all your Cloud Brain tasks in <a href="/cloudbrains" target="_blank"> Home > Cloudbrain Task </a>. | |||
modelarts.online_infer = Online Inference | |||
modelarts.infer_job_model = Model | |||
modelarts.infer_job_model_file = Model File | |||
modelarts.infer_job = Inference Job | |||
@@ -3176,6 +3179,7 @@ task_c2ent_gcudebugjob=`created GCU type debugging task <a href="%s/grampus/trai | |||
task_c2ent_gcutrainjob=`created GCU type train task <a href="%s/modelarts/train-job/%s">%s</a>` | |||
task_c2ent_mludebugjob=`created MLU type debugging task <a href="%s/grampus/train-job/%s">%s</a>` | |||
task_c2ent_mlutrainjob=`created MLU type train task <a href="%s/modelarts/train-job/%s">%s</a>` | |||
task_c2ent_onlineinferjob=`created GPU type online inference task <a href="%s/grampus/onlineinfer/%s">%s</a>` | |||
task_nputrainjob=`created NPU training task <a href="%s/modelarts/train-job/%s">%s</a>` | |||
task_inferencejob=`created reasoning task <a href="%s/modelarts/inference-job/%s">%s</a>` | |||
task_benchmark=`created profiling task <a href="%s/cloudbrain/benchmark/%s">%s</a>` | |||
@@ -3344,6 +3348,7 @@ SIM2BRAIN_SNN = BENCHMARK | |||
TRAIN = TRAIN | |||
INFERENCE = INFERENCE | |||
BENCHMARK = BENCHMARK | |||
ONLINEINFERENCE = ONLINEINFERENCE | |||
brain_area = Brain Area | |||
Delete_failed=Fail to delete the job, please try again later. | |||
@@ -3363,7 +3368,7 @@ new_debug_gpu_tooltips1 = The code is storaged in <strong style="color:#010101"> | |||
new_train_npu_tooltips = The code is storaged in <strong style="color:#010101">%s</strong>, the pre-trained model is storaged in the run parameter <strong style="color:#010101">%s</strong>, and please put your model into <strong style="color:#010101">%s</strong> then you can download it online | |||
new_infer_gpu_tooltips = The dataset is stored in <strong style="color:#010101">%s</strong>, the model file is stored in <strong style="color:#010101">%s</strong>, please store the inference output in <strong style="color:#010101">%s</strong> for subsequent downloads. | |||
code_obs_address = Code OBS address | |||
task_save_most_time = <p><span>*</span>The platform only retains the results of debugge, train, inference and evaluation tasks for nearly<span> 30 </span> days <span>Tasks over 30 days will not be able to download results and view logs, and cannot be debugged or trained again</span></p> | |||
task_save_most_time = <p><span>*</span>The platform only retains the results of debug, train, inference and evaluation tasks for nearly<span> 30 </span> days. <span>Tasks over 30 days will not be able to download results and view logs, and cannot be debugged or trained again</span></p> | |||
query_finetune_fail=Fail to query fine tuning job, please try again later. | |||
finetune_max=The number of fine tuning job you created exceed the limit. please delete some first. | |||
dataset_same_fail=The name of dataset file is used by the fine tune job, please select other dataset file. | |||
@@ -3422,12 +3427,19 @@ multi_task = You have already a running or waiting task, can not create more | |||
job_name_already_used = The job name did already exist | |||
insufficient_point_balance = Insufficient point balance | |||
create_failed = Create AI task failed | |||
restart_failed = Restart AI task failed | |||
restart_failed = Restart AI task failed, please try again later. | |||
stop_failed = Fail to stop the job, please try again later. | |||
can_not_restart = The task was not scheduled successfully before, so it cannot be restart. | |||
dataset_size_over_limit = The size of dataset exceeds limitation (%dGB) | |||
boot_file_must_python = The boot file must be a python file | |||
boot_file_not_exist= The boot file is not exists. | |||
branch_not_exists= The branch does not exist. Please refresh and select again. | |||
[common_error] | |||
system_error = System error.Please try again later | |||
insufficient_permission = Insufficient permissions | |||
param_error = The parameter you submitted is incorrect | |||
wechat_not_bind = Please scan the code and bind to wechat first | |||
[deployment] | |||
deploy_max = The maximum deployment is %v per user | |||
@@ -3437,4 +3449,4 @@ model_copy_failed = Failed to copy the model files | |||
builidng_fail = Failed to build AI Model, please try again later | |||
deletion_notice_repo = There is a deploying or running service related to this repository, please stop the service before deletion. | |||
deletion_notice_trainjob = There is a deploying or running service related to this task, please stop the service before deletion. | |||
stop_service_failed = Failed to stop deploy service | |||
stop_service_failed = Failed to stop deploy service |
@@ -938,7 +938,7 @@ task.speech_coding=语音编码 | |||
task.speech_enhancement=语音增强 | |||
task.speech_recognition=语音识别 | |||
task.speech_synthesis=语音合成 | |||
task.ROS_hmci=开源开放社区 | |||
task.ros_hmci_datasets=开源开放社区数据集 | |||
category.computer_vision=计算机视觉 | |||
category.natural_language_processing=自然语言处理 | |||
category.speech_processing=语音处理 | |||
@@ -973,7 +973,8 @@ download = 下载 | |||
modify_description = 修改描述 | |||
set_public = 设为公开 | |||
set_private = 设为私有 | |||
annotation = 标注 | |||
annotation = 图片标注 | |||
more_annotation = 更多标注 | |||
upload_dataset_file = 上传数据集文件 | |||
file_description = 文件描述 | |||
data_upload = 数据上传 | |||
@@ -1090,6 +1091,7 @@ repo_mirror_add=新增镜像项目 | |||
repo_self_add=新增自建项目 | |||
debug=调试 | |||
online_debug = 在线推理 | |||
debug_again=再次调试 | |||
stop=停止 | |||
delete=删除 | |||
@@ -1279,6 +1281,7 @@ cloudbrain.morethanonejob=您已经创建了一个正在等待或运行中的同 | |||
cloudbrain.morethanonejob1=您已经有 <span style="color:rgba(242, 113, 28, 1);">同类任务</span> 正在等待或运行中,请等待任务结束再创建; | |||
cloudbrain.morethanonejob2=可以在 “<a href="/cloudbrains" target="_blank" >个人中心 > 云脑任务</a>” 查看您所有的云脑任务。 | |||
modelarts.online_infer = 在线推理 | |||
modelarts.infer_job_model = 模型名称 | |||
modelarts.infer_job_model_file = 模型文件 | |||
modelarts.infer_job = 推理任务 | |||
@@ -3194,6 +3197,7 @@ task_c2ent_gcudebugjob=`创建了GCU类型调试任务 <a href="%s/grampus/noteb | |||
task_c2ent_gcutrainjob=`创建了GCU类型训练任务 <a href="%s/grampus/train-job/%s">%s</a>` | |||
task_c2ent_mludebugjob=`创建了MLU类型调试任务 <a href="%s/grampus/notebook/%s">%s</a>` | |||
task_c2ent_mlutrainjob=`创建了MLU类型训练任务 <a href="%s/grampus/train-job/%s">%s</a>` | |||
task_c2ent_onlineinferjob=`创建了GPU类型在线推理任务 <a href="%s/grampus/onlineinfer/%s">%s</a>` | |||
task_nputrainjob=`创建了NPU类型训练任务 <a href="%s/modelarts/train-job/%s">%s</a>` | |||
task_inferencejob=`创建了推理任务 <a href="%s/modelarts/inference-job/%s">%s</a>` | |||
task_benchmark=`创建了评测任务 <a href="%s/cloudbrain/benchmark/%s">%s</a>` | |||
@@ -3365,6 +3369,7 @@ SIM2BRAIN_SNN = 评测任务 | |||
TRAIN = 训练任务 | |||
INFERENCE = 推理任务 | |||
BENCHMARK = 评测任务 | |||
ONLINEINFERENCE = 在线推理 | |||
brain_area = 脑区 | |||
Delete_failed=任务删除失败,请稍后再试。 | |||
@@ -3444,12 +3449,19 @@ multi_task = 您已经有一个正在等待或运行中的任务,请结束该 | |||
job_name_already_used = 任务名已被使用,请换一个名称 | |||
insufficient_point_balance = 积分余额不足 | |||
create_failed = 创建AI任务失败 | |||
restart_failed = 再次调试AI任务失败 | |||
restart_failed = 再次调试失败,请稍后再试 | |||
stop_failed = 任务停止失败,请稍后再试 | |||
can_not_restart = 这个任务之前没有调度成功,不能再次调试。 | |||
dataset_size_over_limit = 数据集大小超过限制(%dGB) | |||
boot_file_must_python = 启动文件必须是python文件 | |||
boot_file_not_exist =启动文件不存在 | |||
branch_not_exists= 代码分支不存在,请刷新后重试 | |||
[common_error] | |||
system_error = 当前服务不可用,请稍后再试 | |||
insufficient_permission = 权限不足 | |||
param_error = 提交的参数有误 | |||
wechat_not_bind = 请先扫码绑定微信 | |||
[deployment] | |||
deploy_max = 每个用户只能同时创建 %v 个部署任务 | |||
@@ -3459,4 +3471,4 @@ model_copy_failed = 模型拷贝失败,请重新部署 | |||
builidng_fail = AI应用创建失败 | |||
deletion_notice_repo = 此项目有正在部署或正在体验的服务,请先停止服务,然后再删除。 | |||
deletion_notice_trainjob = 此任务有正在部署或正在体验的服务,请先停止服务,然后再删除。 | |||
stop_service_failed = 停止部署服务失败 | |||
stop_service_failed = 停止部署服务失败 |
@@ -1,5 +1,5 @@ | |||
{ | |||
"name": "aiforge", | |||
"name": "aiforge1", | |||
"lockfileVersion": 2, | |||
"requires": true, | |||
"packages": { | |||
@@ -21,7 +21,7 @@ | |||
"dayjs": "1.10.7", | |||
"domino": "2.1.5", | |||
"dropzone": "5.7.2", | |||
"echarts": "3.8.5", | |||
"echarts": "5.4.2", | |||
"element-ui": "2.15.5", | |||
"esdk-obs-browserjs": "3.22.3", | |||
"esdk-obs-nodejs": "3.20.11", | |||
@@ -5448,13 +5448,19 @@ | |||
} | |||
}, | |||
"node_modules/echarts": { | |||
"version": "3.8.5", | |||
"resolved": "https://registry.npmmirror.com/echarts/download/echarts-3.8.5.tgz", | |||
"integrity": "sha1-WOSlHSdDxvt1JXsNwKnPn1N4rA4=", | |||
"version": "5.4.2", | |||
"resolved": "https://registry.npmmirror.com/echarts/-/echarts-5.4.2.tgz", | |||
"integrity": "sha512-2W3vw3oI2tWJdyAz+b8DuWS0nfXtSDqlDmqgin/lfzbkB01cuMEN66KWBlmur3YMp5nEDEEt5s23pllnAzB4EA==", | |||
"dependencies": { | |||
"zrender": "3.7.4" | |||
"tslib": "2.3.0", | |||
"zrender": "5.4.3" | |||
} | |||
}, | |||
"node_modules/echarts/node_modules/tslib": { | |||
"version": "2.3.0", | |||
"resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.3.0.tgz", | |||
"integrity": "sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==" | |||
}, | |||
"node_modules/editions": { | |||
"version": "1.3.4", | |||
"resolved": "https://registry.npmjs.org/editions/-/editions-1.3.4.tgz", | |||
@@ -20940,9 +20946,17 @@ | |||
"integrity": "sha1-6NV3TRwHOKR7z6hynzcS4t7d6yU=" | |||
}, | |||
"node_modules/zrender": { | |||
"version": "3.7.4", | |||
"resolved": "https://registry.nlark.com/zrender/download/zrender-3.7.4.tgz", | |||
"integrity": "sha1-+EfVOUhIHvbUKQbR6prux6y+/fI=" | |||
"version": "5.4.3", | |||
"resolved": "https://registry.npmmirror.com/zrender/-/zrender-5.4.3.tgz", | |||
"integrity": "sha512-DRUM4ZLnoaT0PBVvGBDO9oWIDBKFdAVieNWxWwK0niYzJCMwGchRk21/hsE+RKkIveH3XHCyvXcJDkgLVvfizQ==", | |||
"dependencies": { | |||
"tslib": "2.3.0" | |||
} | |||
}, | |||
"node_modules/zrender/node_modules/tslib": { | |||
"version": "2.3.0", | |||
"resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.3.0.tgz", | |||
"integrity": "sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==" | |||
} | |||
}, | |||
"dependencies": { | |||
@@ -25312,11 +25326,19 @@ | |||
} | |||
}, | |||
"echarts": { | |||
"version": "3.8.5", | |||
"resolved": "https://registry.npmmirror.com/echarts/download/echarts-3.8.5.tgz", | |||
"integrity": "sha1-WOSlHSdDxvt1JXsNwKnPn1N4rA4=", | |||
"version": "5.4.2", | |||
"resolved": "https://registry.npmmirror.com/echarts/-/echarts-5.4.2.tgz", | |||
"integrity": "sha512-2W3vw3oI2tWJdyAz+b8DuWS0nfXtSDqlDmqgin/lfzbkB01cuMEN66KWBlmur3YMp5nEDEEt5s23pllnAzB4EA==", | |||
"requires": { | |||
"zrender": "3.7.4" | |||
"tslib": "2.3.0", | |||
"zrender": "5.4.3" | |||
}, | |||
"dependencies": { | |||
"tslib": { | |||
"version": "2.3.0", | |||
"resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.3.0.tgz", | |||
"integrity": "sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==" | |||
} | |||
} | |||
}, | |||
"editions": { | |||
@@ -37726,9 +37748,19 @@ | |||
"integrity": "sha1-6NV3TRwHOKR7z6hynzcS4t7d6yU=" | |||
}, | |||
"zrender": { | |||
"version": "3.7.4", | |||
"resolved": "https://registry.nlark.com/zrender/download/zrender-3.7.4.tgz", | |||
"integrity": "sha1-+EfVOUhIHvbUKQbR6prux6y+/fI=" | |||
"version": "5.4.3", | |||
"resolved": "https://registry.npmmirror.com/zrender/-/zrender-5.4.3.tgz", | |||
"integrity": "sha512-DRUM4ZLnoaT0PBVvGBDO9oWIDBKFdAVieNWxWwK0niYzJCMwGchRk21/hsE+RKkIveH3XHCyvXcJDkgLVvfizQ==", | |||
"requires": { | |||
"tslib": "2.3.0" | |||
}, | |||
"dependencies": { | |||
"tslib": { | |||
"version": "2.3.0", | |||
"resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.3.0.tgz", | |||
"integrity": "sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==" | |||
} | |||
} | |||
} | |||
} | |||
} |
@@ -20,7 +20,7 @@ | |||
"dayjs": "1.10.7", | |||
"domino": "2.1.5", | |||
"dropzone": "5.7.2", | |||
"echarts": "3.8.5", | |||
"echarts": "5.4.2", | |||
"element-ui": "2.15.5", | |||
"esdk-obs-browserjs": "3.22.3", | |||
"esdk-obs-nodejs": "3.20.11", | |||
@@ -246,11 +246,21 @@ document.onreadystatechange = function () { | |||
else if(record.OpType == "24" || record.OpType == "26" || record.OpType == "27" || record.OpType == "28" || record.OpType == "30" | |||
|| record.OpType == "31" || record.OpType == "32" || record.OpType == "33" || record.OpType == "42" || record.OpType == "44"){ | |||
html += recordPrefix + actionName; | |||
html += " <a href=\"" + getTaskLink(record) + "\" rel=\"nofollow\">" + record.RefName + "</a>" | |||
const taskLink = getTaskLink(record); | |||
if (taskLink) { | |||
html += " <a href=\"" + taskLink + "\" rel=\"nofollow\">" + record.RefName + "</a>" | |||
} else { | |||
html += " <span style=\"color: rgba(0,0,0,0.3)\">" + record.RefName + "</span>" | |||
} | |||
} | |||
else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41" || record.OpType == "43"){ | |||
else if(record.OpType == "25" || record.OpType == "29" || record.OpType == "39" || record.OpType == "40" || record.OpType == "41" || record.OpType == "43"|| record.OpType == "44"|| record.OpType == "45"){ | |||
html += recordPrefix + actionName; | |||
html += " <a href=\"" + getTaskLink(record) + "\" rel=\"nofollow\">" + record.RefName + "</a>" | |||
const taskLink = getTaskLink(record); | |||
if (taskLink) { | |||
html += " <a href=\"" + taskLink + "\" rel=\"nofollow\">" + record.RefName + "</a>" | |||
} else { | |||
html += " <span style=\"color: rgba(0,0,0,0.3)\">" + record.RefName + "</span>" | |||
} | |||
} | |||
else if(record.OpType == "35"){ | |||
var datasetLink = "<a href=\"" + getRepoLink(record) + "/datasets" + "\" rel=\"nofollow\">" + record.Content.split('|')[1] + "</a>"; | |||
@@ -280,9 +290,17 @@ function getTaskLink(record){ | |||
if(record.OpType == 24){ | |||
re = re + "/datasets"; | |||
}else if(record.OpType == 25){ | |||
re = re + "/cloudbrain/" + record.Content; | |||
if (record.Cloudbrain) { | |||
re = re + "/cloudbrain/" + record.Cloudbrain.ID; | |||
} else { | |||
re = ''; | |||
} | |||
}else if(record.OpType == 26){ | |||
re = re + "/modelarts/notebook/" + record.Content; | |||
if (record.Cloudbrain) { | |||
re = re + "/modelarts/notebook/" + record.Content; | |||
} else { | |||
re = ''; | |||
} | |||
}else if(record.OpType == 27){ | |||
re = re + "/modelarts/train-job/" + record.Content; | |||
}else if(record.OpType == 28){ | |||
@@ -296,9 +314,14 @@ function getTaskLink(record){ | |||
}else if(record.OpType == 32 || record.OpType == 33 || record.OpType == 42 || record.OpType == 44){ | |||
re = re + "/grampus/train-job/" + record.Content; | |||
}else if(record.OpType == 39 || record.OpType == 40 || record.OpType == 41 || record.OpType == 43){ | |||
re = re + "/grampus/notebook/" + record.Content; | |||
if (record.Cloudbrain) { | |||
re = re + "/grampus/notebook/" + record.Cloudbrain.ID; | |||
} else { | |||
re = ''; | |||
} | |||
} else if(record.OpType == 45){ | |||
re = re + "/grampus/onlineinfer/" + record.Content; | |||
} | |||
re = encodeURI(re); | |||
return re; | |||
} | |||
@@ -455,12 +478,13 @@ var actionNameZH={ | |||
"35":"创建的数据集 {dataset} 被设置为推荐数据集", | |||
"36":"提交了镜像 {image}", | |||
"37":"提交的镜像 {image} 被设置为推荐镜像", | |||
"39":"创建了CPU/GPU类型调试任务", | |||
"40":"创建了NPU类型调试任务", | |||
"39":"创建了NPU类型调试任务", | |||
"40":"创建了CPU/GPU类型调试任务", | |||
"41":"创建了GCU类型调试任务", | |||
"42":"创建了GCU类型训练任务", | |||
"43":"创建了MLU类型调试任务", | |||
"44":"创建了MLU类型训练任务", | |||
"45":"创建了GPU在线推理任务", | |||
}; | |||
var actionNameEN={ | |||
@@ -492,12 +516,13 @@ var actionNameEN={ | |||
"35":" created dataset {dataset} was set as recommended dataset", | |||
"36":"committed image {image}", | |||
"37":"committed image {image} was set as recommended image", | |||
"39":" created CPU/GPU type debugging task ", | |||
"40":" created NPU type debugging task ", | |||
"39":" created NPU type debugging task ", | |||
"40":" created CPU/GPU type debugging task ", | |||
"41":" created GCU type debugging task ", | |||
"42":" created GCU type training task ", | |||
"43":" created MLU type debugging task ", | |||
"44":" created MLU type training task ", | |||
"45":" created GPU type online inference task ", | |||
}; | |||
var repoAndOrgZH={ | |||
@@ -390,6 +390,7 @@ var taskDesc = { | |||
speech_enhancement: "语音增强", | |||
speech_recognition: "语音识别", | |||
speech_synthesis: "语音合成", | |||
ros_hmci_datasets: "开源开放社区", | |||
}; | |||
var taskENDesc = { | |||
@@ -426,6 +427,7 @@ var taskENDesc = { | |||
speech_enhancement: "speech enhancement", | |||
speech_recognition: "speech recognition", | |||
speech_synthesis: "speech synthesis", | |||
ros_hmci_datasets: "ROS-hmci datasets", | |||
}; | |||
function getCategoryDesc(isZh, key) { | |||
@@ -1,46 +1,22 @@ | |||
package ai_task | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"net/http" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/context" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/util" | |||
"code.gitea.io/gitea/routers/response" | |||
creation_context "code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/task" | |||
cloudbrainService "code.gitea.io/gitea/services/cloudbrain" | |||
"code.gitea.io/gitea/services/lock" | |||
"net/http" | |||
) | |||
func CreateAITask(ctx *context.Context, form ai_task_entity.CreateReq) { | |||
func CreateAITask(ctx *context.Context, form entity.CreateReq) { | |||
handCreateReq(&form) | |||
t, err := task.GetAITask(form.JobType, form.Cluster) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
lockOperator, errMsg := cloudbrainService.Lock4CloudbrainCreation(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: form.DisplayJobName, JobType: string(form.JobType)}, User: ctx.User}) | |||
defer func() { | |||
if lockOperator != nil { | |||
lockOperator.Unlock() | |||
} | |||
}() | |||
if errMsg != "" { | |||
log.Error("lock processed failed:%s", errMsg) | |||
ctx.JSON(http.StatusOK, response.OuterServerError(ctx.Tr(errMsg))) | |||
return | |||
} | |||
res, err := t.Create(&creation_context.CreationContext{ | |||
Request: form, | |||
GitRepo: ctx.Repo.GitRepo, | |||
Repository: ctx.Repo.Repository, | |||
User: ctx.User, | |||
}) | |||
res, err := task.CreateAITask(form, ctx.Repo.GitRepo, ctx.Repo.Repository, ctx.User) | |||
if err != nil { | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
@@ -49,7 +25,7 @@ func CreateAITask(ctx *context.Context, form ai_task_entity.CreateReq) { | |||
} | |||
func DelAITask(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, _ := task.GetAITaskByCloudbrainId(id) | |||
t, _ := task.GetAITaskTemplateByCloudbrainId(id) | |||
if t == nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.PARAM_ERROR, ctx)) | |||
@@ -65,7 +41,7 @@ func DelAITask(ctx *context.Context) { | |||
} | |||
func StopAITask(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskByCloudbrainId(id) | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.PARAM_ERROR, ctx)) | |||
@@ -81,36 +57,7 @@ func StopAITask(ctx *context.Context) { | |||
} | |||
func RestartAITask(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
cloudbrain, err := models.GetCloudbrainByCloudbrainID(id) | |||
if err != nil { | |||
log.Error("RestartAITask GetCloudbrainByJobID err.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
t, bizErr := task.GetAITaskFromCloudbrain(cloudbrain) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.PARAM_ERROR, ctx)) | |||
return | |||
} | |||
lockOperator, errMsg := cloudbrainService.Lock4CloudbrainRestart(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{DisplayJobName: cloudbrain.DisplayJobName, JobType: cloudbrain.JobType}, User: ctx.User}) | |||
defer func() { | |||
if lockOperator != nil { | |||
lockOperator.Unlock() | |||
} | |||
}() | |||
if errMsg != "" { | |||
log.Error("lock processed failed:%s", errMsg) | |||
ctx.JSON(http.StatusOK, response.OuterServerError(ctx.Tr(errMsg))) | |||
return | |||
} | |||
res, bizErr := t.Restart(&creation_context.CreationContext{ | |||
GitRepo: ctx.Repo.GitRepo, | |||
Repository: ctx.Repo.Repository, | |||
User: ctx.User, | |||
SourceCloudbrain: cloudbrain, | |||
}) | |||
res, bizErr := task.RestartAITask(id, ctx.Repo.GitRepo, ctx.Repo.Repository, ctx.User) | |||
if bizErr != nil { | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(bizErr, ctx)) | |||
return | |||
@@ -121,7 +68,7 @@ func RestartAITask(ctx *context.Context) { | |||
func GetAITaskLog(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskByCloudbrainId(id) | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
@@ -133,18 +80,63 @@ func GetAITaskLog(ctx *context.Context) { | |||
func GetAITaskInfo(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskByCloudbrainId(id) | |||
cloudbrain, bizErr := models.GetCloudbrainByCloudbrainID(id) | |||
if bizErr != nil { | |||
log.Error("GetAITaskInfo GetCloudbrainByCloudbrainID err.%v", bizErr) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.AI_TASK_NOT_EXISTS, ctx)) | |||
return | |||
} | |||
t, err := task.GetAITaskTemplateFromCloudbrain(cloudbrain) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
resultTask, err := t.Query(id) | |||
if err != nil { | |||
log.Error("Query error.id=%d err=%v", id, err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
//国际化 | |||
resultTask.Tr(ctx.Language()) | |||
//根据权限去掉数据集和模型信息 | |||
var operatorId int64 | |||
if ctx.User != nil { | |||
operatorId = ctx.User.ID | |||
} | |||
if operatorId == 0 || cloudbrain.UserID != operatorId { | |||
resultTask.RemoveDatasets() | |||
resultTask.RemovePretrainModelList() | |||
} | |||
//加载关联版本 | |||
earlyVersionList, bizErr := task.QueryTaskEarlyVersionList(id, operatorId) | |||
if bizErr != nil { | |||
log.Error("QueryTaskEarlyVersionList err.id=%d err=%v", id, err) | |||
ctx.JSON(http.StatusOK, response.OuterResponseError(bizErr)) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(&entity.QueryAITaskRes{ | |||
Task: resultTask, | |||
EarlyVersionList: earlyVersionList, | |||
CanCreateVersion: cloudbrain.CanUserModify(ctx.User), | |||
})) | |||
} | |||
func GetAITaskBriefInfo(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
res, err := t.Query(id) | |||
res, err := t.BriefQuery(id) | |||
if err != nil { | |||
log.Error("Query error.%v", err) | |||
log.Error("BriefQuery error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
res.Tr(ctx.Language()) | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(res)) | |||
} | |||
@@ -154,35 +146,44 @@ func GetAITaskOutput(ctx *context.Context) { | |||
func GetNotebookUrl(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskByCloudbrainId(id) | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
url, err := t.GetDebugUrl(id) | |||
fileName := ctx.QueryTrim("file") | |||
url, err := t.GetDebugUrl(id, fileName) | |||
if err != nil { | |||
log.Error("GetNotebookUrl error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
m := map[string]interface{}{"url": url} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(m)) | |||
} | |||
func GetCreationRequiredInfo(ctx *context.Context) { | |||
jobType := ctx.Query("job_type") | |||
var isOnlineType bool | |||
if models.JobType(jobType) == (models.JobTypeOnlineInference) { | |||
isOnlineType = true | |||
jobType = string(models.JobTypeDebug) | |||
} | |||
log.Info("required jobType=" + jobType) | |||
computeSourceName := ctx.Query("compute_source") | |||
clusterType := ctx.Query("cluster_type") | |||
computeSource := models.GetComputeSourceInstance(computeSourceName) | |||
result, err := task.GetAITaskCreationInfo(ai_task_entity.GetAITaskCreationInfoReq{ | |||
result, err := task.GetAITaskCreationInfo(entity.GetAITaskCreationInfoReq{ | |||
User: ctx.User, | |||
JobType: models.JobType(jobType), | |||
ClusterType: ai_task_entity.ClusterType(clusterType), | |||
ClusterType: entity.ClusterType(clusterType), | |||
ComputeSource: computeSource, | |||
Repo: ctx.Repo.Repository, | |||
GitRepo: ctx.Repo.GitRepo, | |||
IsOnlineType: isOnlineType, | |||
}) | |||
if err != nil { | |||
log.Error("GetAITaskCreationInfo error,err=%v", err) | |||
@@ -192,7 +193,56 @@ func GetCreationRequiredInfo(ctx *context.Context) { | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(result)) | |||
} | |||
func handCreateReq(req *ai_task_entity.CreateReq) { | |||
func GetAITaskList(ctx *context.Context) { | |||
jobType := ctx.Query("job_type") | |||
computeSourceName := ctx.Query("compute_source") | |||
page := ctx.QueryInt("page") | |||
computeSource := models.GetComputeSourceInstance(computeSourceName) | |||
if page <= 0 { | |||
page = 1 | |||
} | |||
jobTypes := make([]string, 0) | |||
if jobType != "" { | |||
jobTypes = append(jobTypes, jobType) | |||
} | |||
result, err := task.GetAITaskList(entity.GetTaskListReq{ | |||
ListOptions: models.ListOptions{ | |||
PageSize: setting.UI.IssuePagingNum, | |||
Page: page, | |||
}, | |||
ComputeSource: computeSource, | |||
JobTypes: jobTypes, | |||
RepoID: ctx.Repo.Repository.ID, | |||
Operator: ctx.User, | |||
IsRepoOwner: ctx.Repo.IsOwner(), | |||
}) | |||
if err != nil { | |||
log.Error("GetAITaskList error,err=%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
result.CanCreateTask = cloudbrain.CanCreateOrDebugJob(ctx) | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(result)) | |||
} | |||
func GetAITaskOperationProfile(ctx *context.Context) { | |||
id := ctx.QueryInt64("id") | |||
t, err := task.GetAITaskTemplateByCloudbrainId(id) | |||
if err != nil { | |||
log.Error("param error") | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
r, err := t.GetOperationProfile(id) | |||
if err != nil { | |||
log.Error("GetOperationProfile error.%v", err) | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(err, ctx)) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, response.OuterSuccessWithData(r)) | |||
} | |||
func handCreateReq(req *entity.CreateReq) { | |||
req.JobName = util.ConvertDisplayJobNameToJobName(req.DisplayJobName) | |||
if req.WorkServerNumber == 0 { | |||
req.WorkServerNumber = 1 | |||
@@ -0,0 +1 @@ | |||
package ai_task |
@@ -59,10 +59,15 @@ | |||
package v1 | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/routers/response" | |||
"net/http" | |||
"strings" | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
cloudbrainService "code.gitea.io/gitea/services/cloudbrain" | |||
"code.gitea.io/gitea/routers/ai_task" | |||
"code.gitea.io/gitea/routers/api/v1/finetune" | |||
@@ -123,6 +128,29 @@ func sudo() macaron.Handler { | |||
} | |||
} | |||
func reqAITaskInRepo() macaron.Handler { | |||
return func(ctx *context.APIContext) { | |||
if ctx.Repo == nil { | |||
ctx.Context.Error(http.StatusUnauthorized) | |||
return | |||
} | |||
id := ctx.QueryInt64("id") | |||
if id <= 0 { | |||
ctx.Context.Error(http.StatusUnauthorized) | |||
return | |||
} | |||
t, err := models.GetCloudbrainByCloudbrainID(id) | |||
if err != nil { | |||
ctx.Context.Error(http.StatusUnauthorized) | |||
return | |||
} | |||
if t.RepoID != ctx.Repo.Repository.ID { | |||
ctx.Context.Error(http.StatusUnauthorized) | |||
return | |||
} | |||
} | |||
} | |||
func repoAssignment() macaron.Handler { | |||
return func(ctx *context.APIContext) { | |||
userName := ctx.Params(":username") | |||
@@ -341,6 +369,15 @@ func reqWeChat() macaron.Handler { | |||
} | |||
} | |||
func reqWeChatStandard() macaron.Handler { | |||
return func(ctx *context.Context) { | |||
if setting.WechatAuthSwitch && ctx.User.WechatOpenId == "" { | |||
ctx.JSON(http.StatusOK, response.OuterTrBizError(response.WECHAT_NOT_BIND, ctx)) | |||
return | |||
} | |||
} | |||
} | |||
// reqAnyRepoReader user should have any permission to read repository or permissions of site admin | |||
func reqAnyRepoReader() macaron.Handler { | |||
return func(ctx *context.Context) { | |||
@@ -610,17 +647,22 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Group("/:username/:reponame", func() { | |||
m.Group("/ai_task", func() { | |||
m.Post("/create", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), bind(ai_task_entity.CreateReq{}), ai_task.CreateAITask) | |||
m.Get("", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetAITaskInfo) | |||
m.Post("/stop", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), reqAdminOrOwnerAITaskCreator(), ai_task.StopAITask) | |||
m.Post("/del", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), reqAdminOrOwnerAITaskCreator(), ai_task.DelAITask) | |||
m.Post("/restart", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), reqAdminOrAITaskCreator(), ai_task.RestartAITask) | |||
m.Get("/log", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetAITaskLog) | |||
m.Get("/output", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetAITaskOutput) | |||
m.Get("/debug_url", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetNotebookUrl) | |||
m.Get("/creation/required", reqWeChat(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetCreationRequiredInfo) | |||
}, context.RepoRef()) | |||
}, reqToken(), repoAssignment()) | |||
m.Post("/create", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), bind(entity.CreateReq{}), ai_task.CreateAITask) | |||
m.Post("/stop", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.StopAITask) | |||
m.Post("/del", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrOwnerAITaskCreator(), ai_task.DelAITask) | |||
m.Post("/restart", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), reqAdminOrAITaskCreator(), ai_task.RestartAITask) | |||
m.Get("/log", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskLog) | |||
m.Get("/output", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskOutput) | |||
m.Get("/debug_url", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetNotebookUrl) | |||
m.Get("/creation/required", reqWeChatStandard(), reqRepoWriter(models.UnitTypeCloudBrain), ai_task.GetCreationRequiredInfo) | |||
}, reqToken(), context.RepoRef()) | |||
m.Group("/ai_task", func() { | |||
m.Get("", reqRepoReader(models.UnitTypeCloudBrain), ai_task.GetAITaskInfo) | |||
m.Get("/brief", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskBriefInfo) | |||
m.Get("/list", reqRepoReader(models.UnitTypeCloudBrain), ai_task.GetAITaskList) | |||
m.Get("/operation_profile", reqRepoReader(models.UnitTypeCloudBrain), reqAITaskInRepo(), ai_task.GetAITaskOperationProfile) | |||
}) | |||
}, repoAssignment()) | |||
// Miscellaneous | |||
if setting.API.EnableSwagger { | |||
m.Get("/swagger", misc.Swagger) | |||
@@ -670,6 +712,12 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Post("/complete_multipart", repo.CompleteMultipart) | |||
}, reqToken()) | |||
m.Group("/attachments/model", func() { | |||
m.Get("/get_chunks", repo.GetModelChunks) | |||
m.Get("/new_multipart", repo.NewModelMultipart) | |||
m.Get("/get_multipart_url", repo.GetModelMultipartUploadUrl) | |||
m.Post("/complete_multipart", repo.CompleteModelMultipart) | |||
}) | |||
m.Group("/pipeline", func() { | |||
m.Post("/notification", bind(api.PipelineNotification{}), notify.PipelineNotify) | |||
@@ -748,6 +796,9 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
//cloudbrain board | |||
m.Get("/cloudbrainboard/cloudbrain/resource_queues", repo.GetResourceQueues) | |||
m.Get("/cloudbrainboard/ai_center_overview", repo.GetCloubrainOverviewGroupByAiCenter) | |||
m.Get("/cloudbrainboard/location", cloudbrainService.GetCloudbrainLocationInfo) | |||
m.Group("/cloudbrainboard", func() { | |||
m.Get("/downloadAll", repo.DownloadCloudBrainBoard) | |||
m.Group("/cloudbrain", func() { | |||
@@ -870,11 +921,16 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Get("/my_datasets", repo.MyDatasetsMultiple) | |||
m.Get("/public_datasets", repo.PublicDatasetMultiple) | |||
m.Get("/my_favorite", repo.MyFavoriteDatasetMultiple) | |||
m.Group("/model", func() { | |||
m.Get("/getmodelfile", repo.GetDataSetSelectItemByJobId) | |||
m.Get("/getprogress", repo.GetExportDataSetByMsgId) | |||
m.Post("/export_exist_dataset", repo.ExportModelToExistDataSet) | |||
}) | |||
}, reqToken(), repoAssignment()) | |||
m.Group("/file_notebook", func() { | |||
m.Get("", repo.GetFileNoteBookInfo) | |||
m.Post("/create", reqToken(), reqWeChat(), bind(api.CreateFileNotebookJobOption{}), repo.CreateFileNoteBook) | |||
m.Post("/create", reqToken(), reqWeChatStandard(), bind(api.CreateFileNotebookJobOption{}), repo.CreateFileNoteBook) | |||
m.Post("/status", reqToken(), bind(api.CreateFileNotebookJobOption{}), repo.FileNoteBookStatus) | |||
}) | |||
@@ -1179,6 +1235,8 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
}, reqRepoReader(models.UnitTypeCloudBrain)) | |||
m.Group("/modelmanage", func() { | |||
m.Post("/create_new_model", repo.CreateNewModel) | |||
m.Post("/create_local_model", repo.SaveLocalModel) | |||
m.Delete("/delete_model_file", repo.DeleteModelFile) | |||
m.Get("/show_model_api", repo.ShowModelManageApi) | |||
m.Delete("/delete_model", repo.DeleteModel) | |||
m.Get("/downloadall", repo.DownloadModel) | |||
@@ -1225,6 +1283,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Post("/del_version", repo.DelTrainJobVersion) | |||
m.Post("/stop_version", repo.StopTrainJobVersion) | |||
m.Get("/result_list", repo.ResultList) | |||
m.Get("/downloadall", repo.DownloadMultiResultFile) | |||
}) | |||
}) | |||
}, reqRepoReader(models.UnitTypeCloudBrain)) | |||
@@ -1239,8 +1298,11 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.GrampusStopJob) | |||
m.Get("/log", repo_ext.GrampusGetLog) | |||
m.Get("/metrics", repo_ext.GrampusMetrics) | |||
m.Get("/metrics/:nodeId", repo_ext.GrampusMetrics) | |||
m.Get("/log/:nodeId", repo_ext.GrampusGetLog) | |||
m.Get("/download_multi_model", cloudbrain.AdminOrJobCreaterRightForTrain, repo.MultiModelDownload) | |||
m.Get("/download_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo_ext.GrampusDownloadLog) | |||
m.Get("/download_log/:nodeId", cloudbrain.AdminOrJobCreaterRightForTrain, repo_ext.GrampusDownloadLog) | |||
m.Get("/job_event", repo_ext.GrampusTrainJobEvents) | |||
}) | |||
}) | |||
@@ -199,9 +199,26 @@ func SyncPanguDeployStatus() { | |||
func GetPanguDeployStatus(ctx *context.APIContext) { | |||
var jobID = ctx.Params(":jobid") | |||
status, _ := models.GetModelartsDeployStatusByJobID(jobID) | |||
status, err := models.GetModelartsDeployStatusByJobID(jobID) | |||
if err != nil { | |||
log.Info("panguService: GetPanguDeployStatus, jobID %s, err %v", jobID, status, err) | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) | |||
return | |||
} | |||
finishTime, err := models.GetModelartsDeployFinishTimebyJobID(jobID) | |||
if err != nil { | |||
log.Info("panguService: GetModelartsDeployFinishTimebyJobID, jobID %s, err %v", jobID, status, err) | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) | |||
return | |||
} | |||
log.Info("panguService: GetPanguDeployStatus, jobID %s, status %s, finishTime %s", jobID, status, finishTime) | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"fineTuneDeployStatus": status, | |||
"fineTuneDeployStatus": status, | |||
"fineTuneDeployFinishTime": finishTime, | |||
}) | |||
} | |||
@@ -2,6 +2,7 @@ package repo | |||
import ( | |||
"net/http" | |||
"sync" | |||
"code.gitea.io/gitea/modules/log" | |||
@@ -10,6 +11,8 @@ import ( | |||
routeRepo "code.gitea.io/gitea/routers/repo" | |||
) | |||
var mutex *sync.Mutex = new(sync.Mutex) | |||
func GetSuccessChunks(ctx *context.APIContext) { | |||
if errStr := checkDatasetPermission(ctx); errStr != "" { | |||
ctx.JSON(http.StatusForbidden, ctx.Tr(errStr)) | |||
@@ -47,9 +50,34 @@ func checkDatasetPermission(ctx *context.APIContext) string { | |||
func NewMultipart(ctx *context.APIContext) { | |||
if errStr := checkDatasetPermission(ctx); errStr != "" { | |||
ctx.JSON(http.StatusForbidden, ctx.Tr(errStr)) | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": "-1", | |||
"msg": ctx.Tr(errStr), | |||
}) | |||
return | |||
} | |||
if err := routeRepo.CheckFlowForDatasetSDK(); err != nil { | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": "-1", | |||
"msg": err.Error(), | |||
}) | |||
return | |||
} | |||
mutex.Lock() | |||
defer mutex.Unlock() | |||
datasetId := ctx.QueryInt64("dataset_id") | |||
fileName := ctx.Query("file_name") | |||
re, err := routeRepo.NewMultipartForApi(ctx.Context, true) | |||
if err != nil { | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": "-1", | |||
"msg": err.Error(), | |||
}) | |||
} else { | |||
routeRepo.AddFileNameToCache(datasetId, fileName, ctx.User.ID) | |||
re["result_code"] = "0" | |||
ctx.JSON(200, re) | |||
} | |||
routeRepo.NewMultipart(ctx.Context) | |||
} | |||
func GetMultipartUploadUrl(ctx *context.APIContext) { | |||
if errStr := checkDatasetPermission(ctx); errStr != "" { | |||
@@ -62,9 +90,32 @@ func CompleteMultipart(ctx *context.APIContext) { | |||
if errStr := checkDatasetPermission(ctx); errStr != "" { | |||
ctx.JSON(http.StatusForbidden, ctx.Tr(errStr)) | |||
} | |||
datasetId := ctx.QueryInt64("dataset_id") | |||
fileName := ctx.Query("file_name") | |||
routeRepo.RemoveFileFromCache(datasetId, fileName, ctx.User.ID) | |||
routeRepo.CompleteMultipart(ctx.Context) | |||
} | |||
func GetAttachment(ctx *context.APIContext) { | |||
routeRepo.GetAttachment(ctx.Context) | |||
} | |||
func GetModelChunks(ctx *context.APIContext) { | |||
log.Info("GetModelChunks by api.") | |||
routeRepo.GetModelChunks(ctx.Context) | |||
} | |||
func NewModelMultipart(ctx *context.APIContext) { | |||
log.Info("NewModelMultipart by api.") | |||
routeRepo.NewModelMultipart(ctx.Context) | |||
} | |||
func GetModelMultipartUploadUrl(ctx *context.APIContext) { | |||
log.Info("GetModelMultipartUploadUrl by api.") | |||
routeRepo.GetModelMultipartUploadUrl(ctx.Context) | |||
} | |||
func CompleteModelMultipart(ctx *context.APIContext) { | |||
log.Info("CompleteModelMultipart by api.") | |||
routeRepo.CompleteModelMultipart(ctx.Context) | |||
} |
@@ -7,7 +7,12 @@ package repo | |||
import ( | |||
"bufio" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/util" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/task" | |||
"encoding/json" | |||
"fmt" | |||
"io" | |||
"io/ioutil" | |||
"net/http" | |||
@@ -111,7 +116,123 @@ func GeneralCloudBrainJobStop(ctx *context.APIContext) { | |||
} | |||
func CreateFileNoteBook(ctx *context.APIContext, option api.CreateFileNotebookJobOption) { | |||
cloudbrainTask.FileNotebookCreate(ctx.Context, option) | |||
if ctx.Written() { | |||
return | |||
} | |||
CreateFileNotebookTask(ctx.Context, option) | |||
} | |||
func CreateFileNotebookTask(ctx *context.Context, option api.CreateFileNotebookJobOption) { | |||
displayJobName := cloudbrainService.GetDisplayJobName(ctx.User.Name) | |||
jobName := util.ConvertDisplayJobNameToJobName(displayJobName) | |||
jobType := models.JobTypeDebug | |||
specId := setting.FileNoteBook.SpecIdGPU | |||
ComputeSource := models.GPU | |||
imageUrl := setting.FileNoteBook.ImageGPU | |||
imageId := "" | |||
imageName := imageUrl | |||
cluster := entity.OpenICloudbrainOne | |||
if option.Type == 0 { | |||
specId = setting.FileNoteBook.SpecIdCPU | |||
imageName = imageUrl | |||
} | |||
if option.Type > cloudbrainTask.GPUType { | |||
imageId = setting.FileNoteBook.ImageIdNPU | |||
imageName = setting.FileNoteBook.ImageNPUDescription | |||
imageUrl = "" | |||
imageNpu, err := getNpuImageId(option) | |||
if err != nil { | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.parameter_is_wrong"))) | |||
return | |||
} | |||
if imageNpu != nil { | |||
imageId = imageNpu.Id | |||
imageName = imageNpu.Value | |||
} | |||
ComputeSource = models.NPU | |||
specId = setting.FileNoteBook.SpecIdNPU | |||
if setting.ModelartsCD.Enabled { | |||
specId = setting.FileNoteBook.SpecIdNPUCD | |||
imageName = setting.FileNoteBook.ImageNPUCDDescription | |||
} | |||
cluster = entity.OpenICloudbrainTwo | |||
} | |||
sourceRepo, err := models.GetRepositoryByOwnerAndName(option.OwnerName, option.ProjectName) | |||
if err != nil { | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.notebook_file_not_exist"))) | |||
return | |||
} | |||
repo, _ := models.GetRepositoryByName(ctx.User.ID, setting.FileNoteBook.ProjectName) | |||
if repo == nil { | |||
log.Error("default file repository not exists") | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error")) | |||
return | |||
} | |||
res, bizErr := task.CreateAITask(entity.CreateReq{ | |||
JobType: jobType, | |||
DisplayJobName: displayJobName, | |||
JobName: jobName, | |||
SpecId: specId, | |||
ComputeSourceStr: ComputeSource, | |||
Cluster: cluster, | |||
WorkServerNumber: 1, | |||
ImageUrl: imageUrl, | |||
ImageName: imageName, | |||
ImageID: imageId, | |||
BootFile: cloudbrainTask.GetBootFile(option.File, option.OwnerName, option.ProjectName, option.BranchName), | |||
FileRepository: sourceRepo, | |||
FileBranchName: option.BranchName, | |||
IsFileNoteBookRequest: true, | |||
Description: getDescription(option), | |||
}, nil, repo, ctx.User) | |||
code := 0 | |||
if bizErr != nil { | |||
switch bizErr.Code { | |||
case response.MULTI_TASK.Code: | |||
code = 2 | |||
default: | |||
code = 1 | |||
} | |||
ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: code, Message: ctx.Tr(bizErr.TrCode)}) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, models.BaseMessageApi{ | |||
Code: code, | |||
Message: fmt.Sprint(res.ID), | |||
}) | |||
} | |||
const CharacterLength = 2550 | |||
func getDescription(option api.CreateFileNotebookJobOption) string { | |||
des := option.OwnerName + "/" + option.ProjectName + "/" + option.BranchName + "/" + option.File | |||
if len(des) <= CharacterLength { | |||
return des | |||
} | |||
return "" | |||
} | |||
func getNpuImageId(option api.CreateFileNotebookJobOption) (*setting.ImageInfoModelArts, error) { | |||
if option.Type != cloudbrainTask.NPUType { | |||
return nil, fmt.Errorf("type is not npu.") | |||
} | |||
if option.Image == "" { | |||
return nil, nil | |||
} | |||
for _, imageInfo := range setting.StImageInfos.ImageInfo { | |||
if imageInfo.Value == option.Image { | |||
return imageInfo, nil | |||
} | |||
} | |||
return nil, fmt.Errorf("invalid image parameter") | |||
} | |||
func FileNoteBookStatus(ctx *context.APIContext, option api.CreateFileNotebookJobOption) { | |||
cloudbrainTask.FileNotebookStatus(ctx.Context, option) | |||
} | |||
@@ -224,6 +345,36 @@ func GrampusNoteBookDebug(ctx *context.APIContext) { | |||
} | |||
func GrampusNotebookRestart(ctx *context.APIContext) { | |||
var id = ctx.Params(":id") | |||
var resultCode = "-1" | |||
var errorMsg = "" | |||
var status = "" | |||
t := ctx.Cloudbrain | |||
if t.IsNewAITask() { | |||
res, bizErr := task.RestartAITask(t.ID, ctx.Repo.GitRepo, ctx.Repo.Repository, ctx.User) | |||
if bizErr != nil { | |||
log.Error("lRestartAITask failed:task.ID=%d err=%v", t.ID, bizErr.DefaultMsg) | |||
errorMsg = ctx.Tr(bizErr.TrCode) | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": resultCode, | |||
"error_msg": errorMsg, | |||
"status": status, | |||
"id": id, | |||
}) | |||
return | |||
} | |||
id = strconv.FormatInt(res.ID, 10) | |||
status = res.Status | |||
resultCode = "0" | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": resultCode, | |||
"error_msg": errorMsg, | |||
"status": status, | |||
"id": id, | |||
}) | |||
return | |||
} | |||
cloudbrainTask.GrampusNotebookRestart(ctx.Context) | |||
} | |||
@@ -233,6 +384,15 @@ func GrampusStopJob(ctx *context.APIContext) { | |||
} | |||
func GrampusNotebookDel(ctx *context.APIContext) { | |||
if isHandled, err := task.HandleNewAITaskDelete(ctx.Cloudbrain.ID); isHandled { | |||
if err != nil { | |||
log.Error("DeleteJob(%s) failed:%v", ctx.Cloudbrain.JobName, err, ctx.Data["msgID"]) | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, models.BaseOKMessageApi) | |||
return | |||
} | |||
err := cloudbrainTask.DeleteGrampusJob(ctx.Context) | |||
if err != nil { | |||
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error())) | |||
@@ -293,6 +453,21 @@ func GetCloudbrainTask(ctx *context.APIContext) { | |||
ctx.NotFound(err) | |||
return | |||
} | |||
if job.IsNewAITask() { | |||
jobAfter, _ := task.UpdateCloudbrain(job) | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"ID": ID, | |||
"JobName": jobAfter.JobName, | |||
"JobStatus": jobAfter.Status, | |||
"SubState": "", | |||
"CreatedTime": jobAfter.CreatedUnix.Format("2006-01-02 15:04:05"), | |||
"CompletedTime": jobAfter.UpdatedUnix.Format("2006-01-02 15:04:05"), | |||
"JobDuration": jobAfter.TrainJobDuration, | |||
}) | |||
return | |||
} | |||
if job.JobType == string(models.JobTypeModelSafety) { | |||
routerRepo.GetAiSafetyTaskByJob(job) | |||
job, err = models.GetCloudbrainByID(ID) | |||
@@ -4,10 +4,13 @@ import ( | |||
"fmt" | |||
"net/http" | |||
"net/url" | |||
"sort" | |||
"strconv" | |||
"strings" | |||
"time" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/services/cloudbrain/resource" | |||
"code.gitea.io/gitea/models" | |||
@@ -163,6 +166,112 @@ func GetOverviewDuration(ctx *context.Context) { | |||
}) | |||
} | |||
func GetCloubrainOverviewGroupByAiCenter(ctx *context.Context) { | |||
cloudbrainCardTimeAndCountArray, err := models.GetCloudbrainCardTimeAndCountGroupByAICenter() | |||
if err != nil { | |||
log.Error("Can not query CardTimeAndCount.", err) | |||
} | |||
cardTimeMap, maxCardTime, _ := getCenterCardTimeInfo(cloudbrainCardTimeAndCountArray) | |||
var aiCenterLocationInfos = make(map[string][]*cloudbrainService.AiCenterLocationInfo, 0) | |||
const AI_CENTER = "智算中心" | |||
for _, value := range setting.AiCenterCodeAndNameAndLocMapInfo { | |||
long, lat := getLongLat(value.Loc) | |||
aicenterArray, ok := aiCenterLocationInfos[value.Type] | |||
if !ok { | |||
aicenterArray = make([]*cloudbrainService.AiCenterLocationInfo, 0) | |||
} | |||
if value.Type == "超算中心" || value.Type == "东数西算" { | |||
aiCenterLocationInfos[value.Type] = append(aicenterArray, &cloudbrainService.AiCenterLocationInfo{ | |||
Name: cloudbrainService.GetAiCenterShowByAiCenterId(value.Name, ctx), | |||
Longitude: long, | |||
Latitude: lat, | |||
Value: setting.ScreenMap.MinValue, | |||
}) | |||
} else if value.Type == AI_CENTER { | |||
aiCenterLocationInfos[value.Type] = append(aicenterArray, &cloudbrainService.AiCenterLocationInfo{ | |||
Name: cloudbrainService.GetAiCenterShowByAiCenterId(value.Name, ctx), | |||
Longitude: long, | |||
Latitude: lat, | |||
Value: getAiCenterSize(value.Name, cardTimeMap, maxCardTime, 0), | |||
}) | |||
} | |||
} | |||
sort.SliceStable(aiCenterLocationInfos[AI_CENTER], func(i, j int) bool { | |||
return aiCenterLocationInfos[AI_CENTER][i].Value > aiCenterLocationInfos[AI_CENTER][j].Value | |||
}) | |||
if setting.ScreenMap.ShowData || ctx.IsUserSiteAdmin() { | |||
for _, cloudbrainCardTimeAndCountMap := range cloudbrainCardTimeAndCountArray { | |||
centerId := cloudbrainCardTimeAndCountMap["ai_center"] | |||
centerShow := cloudbrainService.GetAiCenterShowByAiCenterId(centerId, ctx) | |||
cloudbrainCardTimeAndCountMap["ai_center"] = centerShow | |||
} | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"cardAndJobCount": cloudbrainCardTimeAndCountArray, | |||
"locationInfo": aiCenterLocationInfos, | |||
}) | |||
return | |||
} | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"cardAndJobCount": []map[string]string{}, | |||
"locationInfo": aiCenterLocationInfos, | |||
}) | |||
return | |||
} | |||
func getAiCenterSize(name string, timeMap map[string]int64, MaxCardTime int64, MinCardTime int64) int { | |||
cardTime, _ := timeMap[name] | |||
if cardTime == 0 { | |||
return setting.ScreenMap.MinValue | |||
} else { | |||
if MaxCardTime == MinCardTime { | |||
return setting.ScreenMap.MaxValue | |||
} else { | |||
return int(float64(cardTime-MinCardTime)/float64(MaxCardTime-MinCardTime)*float64(setting.ScreenMap.MaxValue-setting.ScreenMap.MinValue)) + setting.ScreenMap.MinValue | |||
} | |||
} | |||
} | |||
func getLongLat(loc string) (string, string) { | |||
longLat := strings.Split(loc, ",") | |||
if len(longLat) != 2 { | |||
return "", "" | |||
} | |||
return longLat[0], longLat[1] | |||
} | |||
func getCenterCardTimeInfo(cloudbrainCardTimeAndCountArray []map[string]string) (map[string]int64, int64, int64) { | |||
var centerCardTimeMap = make(map[string]int64, len(cloudbrainCardTimeAndCountArray)) | |||
var maxCardTime int64 = 0 | |||
var minCardTime int64 = 0 | |||
for i, cloudbrainCardTimeAndCount := range cloudbrainCardTimeAndCountArray { | |||
cardTime, _ := strconv.ParseInt(cloudbrainCardTimeAndCount["card_duration"], 10, 64) | |||
if i == 0 { | |||
maxCardTime = cardTime | |||
} | |||
if i == len(cloudbrainCardTimeAndCountArray)-1 { | |||
minCardTime = cardTime | |||
} | |||
centerCardTimeMap[cloudbrainCardTimeAndCount["ai_center"]] = cardTime | |||
} | |||
return centerCardTimeMap, maxCardTime, minCardTime | |||
} | |||
func GetCloudbrainCardDuration(task models.Cloudbrain) string { | |||
cardNum := int(0) | |||
spec, err := resource.GetCloudbrainSpec(task.ID) | |||
@@ -12,6 +12,7 @@ import ( | |||
"code.gitea.io/gitea/modules/context" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
routerRepo "code.gitea.io/gitea/routers/repo" | |||
) | |||
func PublicDatasetMultiple(ctx *context.APIContext) { | |||
@@ -143,3 +144,15 @@ func getSearchOrderByInValues(datasetIds []int64) models.SearchOrderBy { | |||
searchOrderBy += " ELSE 0 END" | |||
return models.SearchOrderBy(searchOrderBy) | |||
} | |||
func GetDataSetSelectItemByJobId(ctx *context.APIContext) { | |||
routerRepo.GetDataSetSelectItemByJobId(ctx.Context) | |||
} | |||
func GetExportDataSetByMsgId(ctx *context.APIContext) { | |||
routerRepo.GetExportDataSetByMsgId(ctx.Context) | |||
} | |||
func ExportModelToExistDataSet(ctx *context.APIContext) { | |||
routerRepo.ExportModelToExistDataSet(ctx.Context) | |||
} |
@@ -15,6 +15,7 @@ import ( | |||
"time" | |||
"code.gitea.io/gitea/services/ai_task_service/schedule" | |||
"code.gitea.io/gitea/services/ai_task_service/task" | |||
"code.gitea.io/gitea/routers/response" | |||
@@ -51,11 +52,16 @@ func GetModelArtsNotebook2(ctx *context.APIContext) { | |||
return | |||
} | |||
if !job.Cleared { | |||
err = modelarts.HandleNotebookInfo(job) | |||
if err != nil { | |||
ctx.NotFound(err) | |||
return | |||
if job.IsNewAITask() { | |||
job, _ = task.UpdateCloudbrain(job) | |||
} else { | |||
err = modelarts.HandleNotebookInfo(job) | |||
if err != nil { | |||
ctx.NotFound(err) | |||
return | |||
} | |||
} | |||
} | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
"ID": ID, | |||
@@ -652,3 +658,8 @@ func trainJobGetMetricStatistic(jobID string, versionName string) (*models.GetTr | |||
return result, err | |||
} | |||
func DownloadMultiResultFile(ctx *context.APIContext) { | |||
log.Info("DownloadMultiResultFile by api") | |||
routerRepo.DownloadMultiResultFile(ctx.Context) | |||
} |
@@ -187,3 +187,13 @@ func DownloadModeConvertResultFile(ctx *context.APIContext) { | |||
ctx.Context.SetParams("id", ctx.Query("id")) | |||
routerRepo.ModelConvertDownloadModel(ctx.Context) | |||
} | |||
func SaveLocalModel(ctx *context.APIContext) { | |||
log.Info("SaveLocalModel by api.") | |||
routerRepo.SaveLocalModel(ctx.Context) | |||
} | |||
func DeleteModelFile(ctx *context.APIContext) { | |||
log.Info("DeleteModelFile by api.") | |||
routerRepo.DeleteModelFile(ctx.Context) | |||
} |
@@ -55,6 +55,8 @@ const ( | |||
tplRepoSquare base.TplName = "explore/repos/square" | |||
tplRepoSearch base.TplName = "explore/repos/search" | |||
tplRoshmci base.TplName = "explore/ros-hmci" | |||
tplExploreCenterMap base.TplName = "explore/center_map" | |||
) | |||
// Home render home page | |||
@@ -541,6 +543,11 @@ func ExploreDatasetsUI(ctx *context.Context) { | |||
ctx.HTML(200, tplExploreDataset) | |||
} | |||
func CenterMapUI(ctx *context.Context) { | |||
ctx.HTML(200, tplExploreCenterMap) | |||
} | |||
func getDatasetOrderBy(ctx *context.Context) models.SearchOrderBy { | |||
var orderBy models.SearchOrderBy | |||
switch ctx.Query("sort") { | |||
@@ -59,6 +59,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Post("/repos/cnt_stat/handle_historical_task", admin.RefreshHistorySpec) | |||
m.Post("/duration_statisctic/history_handle", repo.CloudbrainUpdateHistoryData) | |||
m.Post("/square/repo/stat/refresh", repository.RefreshRepoStatData) | |||
m.Get("/setting/refresh", RefreshSetting) | |||
}, CheckInternalToken) | |||
} |
@@ -0,0 +1,17 @@ | |||
package private | |||
import ( | |||
"code.gitea.io/gitea/modules/setting" | |||
"gitea.com/macaron/macaron" | |||
) | |||
func RefreshSetting(ctx *macaron.Context) { | |||
setting.Cfg.Reload() | |||
setting.NewScreenMapConfig() | |||
setting.GetGrampusConfig() | |||
setting.GetModelartsConfig() | |||
setting.GetModelartsCDConfig() | |||
ctx.PlainText(200, []byte("success")) | |||
} |
@@ -667,23 +667,36 @@ func GetSuccessChunks(ctx *context.Context) { | |||
} | |||
func NewMultipart(ctx *context.Context) { | |||
func NewMultipartForApi(ctx *context.Context, isFlowControl bool) (map[string]string, error) { | |||
if !setting.Attachment.Enabled { | |||
ctx.Error(404, "attachment is not enabled") | |||
return | |||
return nil, errors.New("attachment is not enabled") | |||
} | |||
err := upload.VerifyFileType(ctx.Query("fileType"), strings.Split(setting.Attachment.AllowedTypes, ",")) | |||
typeCloudBrain := ctx.QueryInt("type") | |||
fileMD5 := ctx.Query("md5") | |||
fileChunk, err := models.GetFileChunkByMD5AndUser(fileMD5, ctx.User.ID, typeCloudBrain) | |||
if err == nil { | |||
if fileChunk != nil { | |||
log.Info("cannot reupload,name" + ctx.Query("file_name")) | |||
return nil, errors.New("Cannot upload repeatedly,name is " + ctx.Query("file_name")) | |||
} | |||
} | |||
if isFlowControl { | |||
err = CheckFlowForDataset(ctx) | |||
if err != nil { | |||
log.Info("check error," + err.Error()) | |||
return nil, err | |||
} | |||
} | |||
err = upload.VerifyFileType(ctx.Query("fileType"), strings.Split(setting.Attachment.AllowedTypes, ",")) | |||
if err != nil { | |||
ctx.Error(400, err.Error()) | |||
return | |||
log.Info("VerifyFileType error," + err.Error()) | |||
return nil, errors.New("Not support file type.") | |||
} | |||
typeCloudBrain := ctx.QueryInt("type") | |||
err = checkTypeCloudBrain(typeCloudBrain) | |||
if err != nil { | |||
ctx.ServerError("checkTypeCloudBrain failed", err) | |||
return | |||
log.Info("checkTypeCloudBrain error," + err.Error()) | |||
return nil, err | |||
} | |||
fileName := ctx.Query("file_name") | |||
@@ -691,14 +704,15 @@ func NewMultipart(ctx *context.Context) { | |||
if setting.Attachment.StoreType == storage.MinioStorageType { | |||
totalChunkCounts := ctx.QueryInt("totalChunkCounts") | |||
if totalChunkCounts > minio_ext.MaxPartsCount { | |||
ctx.Error(400, fmt.Sprintf("chunk counts(%d) is too much", totalChunkCounts)) | |||
return | |||
log.Info(fmt.Sprintf("chunk counts(%d) is too much", totalChunkCounts)) | |||
return nil, errors.New(fmt.Sprintf("chunk counts(%d) is too much", totalChunkCounts)) | |||
} | |||
fileSize := ctx.QueryInt64("size") | |||
if fileSize > minio_ext.MaxMultipartPutObjectSize { | |||
ctx.Error(400, fmt.Sprintf("file size(%d) is too big", fileSize)) | |||
return | |||
log.Info(fmt.Sprintf("file size(%d) is too big", fileSize)) | |||
return nil, errors.New(fmt.Sprintf("file size(%d) is too big", fileSize)) | |||
} | |||
uuid := gouuid.NewV4().String() | |||
@@ -706,17 +720,16 @@ func NewMultipart(ctx *context.Context) { | |||
if typeCloudBrain == models.TypeCloudBrainOne { | |||
uploadID, err = storage.NewMultiPartUpload(strings.TrimPrefix(path.Join(setting.Attachment.Minio.BasePath, path.Join(uuid[0:1], uuid[1:2], uuid)), "/")) | |||
if err != nil { | |||
ctx.ServerError("NewMultipart", err) | |||
return | |||
log.Info("NewMultipart " + err.Error()) | |||
return nil, err | |||
} | |||
} else { | |||
uploadID, err = storage.NewObsMultiPartUpload(strings.TrimPrefix(path.Join(setting.BasePath, path.Join(uuid[0:1], uuid[1:2], uuid, fileName)), "/")) | |||
if err != nil { | |||
ctx.ServerError("NewObsMultiPartUpload", err) | |||
return | |||
log.Info("NewObsMultiPartUpload " + err.Error()) | |||
return nil, err | |||
} | |||
} | |||
_, err = models.InsertFileChunk(&models.FileChunk{ | |||
UUID: uuid, | |||
UserID: ctx.User.ID, | |||
@@ -728,18 +741,26 @@ func NewMultipart(ctx *context.Context) { | |||
}) | |||
if err != nil { | |||
ctx.Error(500, fmt.Sprintf("InsertFileChunk: %v", err)) | |||
return | |||
log.Info(fmt.Sprintf("InsertFileChunk: %v", err)) | |||
return nil, err | |||
} | |||
ctx.JSON(200, map[string]string{ | |||
return map[string]string{ | |||
"uuid": uuid, | |||
"uploadID": uploadID, | |||
}) | |||
}, nil | |||
} else { | |||
ctx.Error(404, "storage type is not enabled") | |||
return nil, errors.New("storage type is not enabled") | |||
} | |||
} | |||
func NewMultipart(ctx *context.Context) { | |||
re, err := NewMultipartForApi(ctx, false) | |||
if err != nil { | |||
ctx.ServerError("NewMultipart failed", err) | |||
return | |||
} | |||
ctx.JSON(200, re) | |||
} | |||
func PutOBSProxyUpload(ctx *context.Context) { | |||
@@ -850,24 +871,31 @@ func CompleteMultipart(ctx *context.Context) { | |||
fileChunk, err := models.GetFileChunkByUUID(uuid) | |||
if err != nil { | |||
if models.IsErrFileChunkNotExist(err) { | |||
ctx.Error(404) | |||
} else { | |||
ctx.ServerError("GetFileChunkByUUID", err) | |||
} | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": "-1", | |||
"msg": "The upload file not found.", | |||
}) | |||
return | |||
} | |||
if typeCloudBrain == models.TypeCloudBrainOne { | |||
_, err = storage.CompleteMultiPartUpload(strings.TrimPrefix(path.Join(setting.Attachment.Minio.BasePath, path.Join(fileChunk.UUID[0:1], fileChunk.UUID[1:2], fileChunk.UUID)), "/"), uploadID, fileChunk.TotalChunks) | |||
if err != nil { | |||
ctx.Error(500, fmt.Sprintf("CompleteMultiPartUpload failed: %v", err)) | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": "-1", | |||
"msg": fmt.Sprintf("CompleteMultiPartUpload failed: %v", err), | |||
}) | |||
//ctx.Error(500, fmt.Sprintf("CompleteMultiPartUpload failed: %v", err)) | |||
return | |||
} | |||
} else { | |||
err = storage.CompleteObsMultiPartUpload(strings.TrimPrefix(path.Join(setting.BasePath, path.Join(fileChunk.UUID[0:1], fileChunk.UUID[1:2], fileChunk.UUID, fileName)), "/"), uploadID, fileChunk.TotalChunks) | |||
if err != nil { | |||
ctx.Error(500, fmt.Sprintf("CompleteObsMultiPartUpload failed: %v", err)) | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": "-1", | |||
"msg": fmt.Sprintf("CompleteObsMultiPartUpload failed: %v", err), | |||
}) | |||
//ctx.Error(500, fmt.Sprintf("CompleteObsMultiPartUpload failed: %v", err)) | |||
return | |||
} | |||
} | |||
@@ -876,7 +904,11 @@ func CompleteMultipart(ctx *context.Context) { | |||
err = models.UpdateFileChunk(fileChunk) | |||
if err != nil { | |||
ctx.Error(500, fmt.Sprintf("UpdateFileChunk: %v", err)) | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": "-1", | |||
"msg": fmt.Sprintf("UpdateFileChunk: %v", err), | |||
}) | |||
//ctx.Error(500, fmt.Sprintf("UpdateFileChunk: %v", err)) | |||
return | |||
} | |||
@@ -20,9 +20,9 @@ func GetModelChunks(ctx *context.Context) { | |||
fileMD5 := ctx.Query("md5") | |||
typeCloudBrain := ctx.QueryInt("type") | |||
fileName := ctx.Query("file_name") | |||
scene := ctx.Query("scene") | |||
//scene := ctx.Query("scene") | |||
modeluuid := ctx.Query("modeluuid") | |||
log.Info("scene=" + scene + " typeCloudBrain=" + fmt.Sprint(typeCloudBrain)) | |||
log.Info(" typeCloudBrain=" + fmt.Sprint(typeCloudBrain)) | |||
var chunks string | |||
err := checkTypeCloudBrain(typeCloudBrain) | |||
@@ -124,14 +124,14 @@ func GetModelChunks(ctx *context.Context) { | |||
}) | |||
} else { | |||
ctx.JSON(200, map[string]string{ | |||
"uuid": fileChunk.UUID, | |||
"uploaded": strconv.Itoa(fileChunk.IsUploaded), | |||
"uploadID": fileChunk.UploadID, | |||
"chunks": string(chunks), | |||
"attachID": "0", | |||
"datasetID": "0", | |||
"fileName": "", | |||
"datasetName": "", | |||
"uuid": fileChunk.UUID, | |||
"uploaded": strconv.Itoa(fileChunk.IsUploaded), | |||
"uploadID": fileChunk.UploadID, | |||
"chunks": string(chunks), | |||
"attachID": "0", | |||
"datasetID": "0", | |||
"fileName": "", | |||
"modelName": "", | |||
}) | |||
} | |||
} | |||
@@ -200,14 +200,16 @@ func prepareCloudbrainOneSpecs(ctx *context.Context) { | |||
} | |||
func CloudBrainNew(ctx *context.Context) { | |||
err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeDebug)) | |||
if err != nil { | |||
ctx.ServerError("get new cloudbrain info failed", err) | |||
return | |||
} | |||
ctx.Data["PageIsGPUDebug"] = true | |||
// err := cloudBrainNewDataPrepare(ctx, string(models.JobTypeDebug)) | |||
// if err != nil { | |||
// ctx.ServerError("get new cloudbrain info failed", err) | |||
// return | |||
// } | |||
// ctx.Data["PageIsGPUDebug"] = true | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.HTML(200, tplCloudBrainNew) | |||
} | |||
func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
ctx.Data["IsCreate"] = true | |||
cloudBrainCreate(ctx, form) | |||
@@ -703,6 +705,32 @@ func CloudBrainRestart(ctx *context.Context) { | |||
var status = string(models.JobWaiting) | |||
task := ctx.Cloudbrain | |||
if task.IsNewAITask() { | |||
res, bizErr := ai_task.RestartAITask(task.ID, ctx.Repo.GitRepo, ctx.Repo.Repository, ctx.User) | |||
if bizErr != nil { | |||
log.Error("RestartAITask failed:task.ID=%d err=%v", task.ID, bizErr.DefaultMsg) | |||
errorMsg = ctx.Tr(bizErr.TrCode) | |||
resultCode = "-1" | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": resultCode, | |||
"error_msg": errorMsg, | |||
"status": status, | |||
"id": ID, | |||
}) | |||
return | |||
} | |||
id := strconv.FormatInt(res.ID, 10) | |||
status = res.Status | |||
resultCode = "0" | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": resultCode, | |||
"error_msg": errorMsg, | |||
"status": status, | |||
"id": id, | |||
}) | |||
return | |||
} | |||
lockOperator, errMsg := cloudbrainService.Lock4CloudbrainRestart(&lock.LockContext{Repo: ctx.Repo.Repository, Task: &models.Cloudbrain{JobType: task.JobType}, User: ctx.User}) | |||
defer func() { | |||
if lockOperator != nil { | |||
@@ -838,7 +866,9 @@ func CloudBrainBenchMarkShow(ctx *context.Context) { | |||
} | |||
func CloudBrainShow(ctx *context.Context) { | |||
cloudBrainShow(ctx, tplCloudBrainShow, models.JobTypeDebug) | |||
// cloudBrainShow(ctx, tplCloudBrainShow, models.JobTypeDebug) | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.HTML(200, tplCloudBrainShow) | |||
} | |||
func CloudBrainTrainJobShow(ctx *context.Context) { | |||
@@ -871,11 +901,15 @@ func cloudBrainShow(ctx *context.Context, tpName base.TplName, jobType models.Jo | |||
return | |||
} | |||
if task.Status == string(models.JobWaiting) || task.Status == string(models.JobRunning) { | |||
task, err = cloudbrainTask.SyncCloudBrainOneStatus(task) | |||
if err != nil { | |||
log.Info("error:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
if task.IsNewAITask() { | |||
task, _ = ai_task.UpdateCloudbrain(task) | |||
} else { | |||
task, err = cloudbrainTask.SyncCloudBrainOneStatus(task) | |||
if err != nil { | |||
log.Info("error:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
} | |||
} | |||
@@ -1307,6 +1341,16 @@ func CloudBrainStop(ctx *context.Context) { | |||
resultCode = task.Status | |||
break | |||
} | |||
if res, isHandled, err := ai_task.HandleNewAITaskStop(task.ID); isHandled { | |||
if err != nil { | |||
log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) | |||
resultCode = "-1" | |||
errorMsg = "cloudbrain.Stopped_failed" | |||
break | |||
} | |||
status = res.Status | |||
break | |||
} | |||
err := cloudbrain.StopJob(task.JobID) | |||
if err != nil { | |||
@@ -1513,6 +1557,14 @@ func CloudBrainDel(ctx *context.Context) { | |||
func deleteCloudbrainJob(ctx *context.Context) error { | |||
task := ctx.Cloudbrain | |||
if isHandled, err := ai_task.HandleNewAITaskDelete(task.ID); isHandled { | |||
if err != nil { | |||
log.Error("DeleteJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) | |||
return err | |||
} | |||
return nil | |||
} | |||
if task.Status != string(models.JobStopped) && task.Status != string(models.JobFailed) && task.Status != string(models.JobSucceeded) { | |||
log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"]) | |||
return errors.New("the job has not been stopped") | |||
@@ -1949,6 +2001,13 @@ func mkPathAndReadMeFile(path string, text string) error { | |||
} | |||
func SyncCloudbrainStatus() { | |||
defer func() { | |||
if err := recover(); err != nil { | |||
combinedErr := fmt.Errorf("%s\n%s", err, log.Stack(2)) | |||
log.Error("PANIC:%v", combinedErr) | |||
} | |||
}() | |||
cloudBrains, err := models.GetCloudBrainUnStoppedJob() | |||
if err != nil { | |||
log.Error("GetCloudBrainUnStoppedJob failed:", err.Error()) | |||
@@ -1956,12 +2015,18 @@ func SyncCloudbrainStatus() { | |||
} | |||
for _, task := range cloudBrains { | |||
if task.JobType == string(models.JobTypeModelSafety) { | |||
continue | |||
} | |||
if task.IsNewAITask() { | |||
task, _ = ai_task.UpdateCloudbrain(task) | |||
if task.Duration >= setting.MaxDuration && task.JobType == string(models.JobTypeDebug) { | |||
ai_task.StopCloudbrain(task) | |||
} | |||
continue | |||
} | |||
if task.Type == models.TypeCloudBrainOne { | |||
task, err = cloudbrainTask.SyncCloudBrainOneStatus(task) | |||
if err != nil { | |||
log.Error("Sync cloud brain one (%s) failed:%v", task.JobName, err) | |||
@@ -1986,13 +2051,7 @@ func SyncCloudbrainStatus() { | |||
} | |||
} else if task.Type == models.TypeC2Net { | |||
if task.JobType == string(models.JobTypeDebug) { | |||
if task.IsNewAITask() { | |||
ai_task.UpdateCloudbrain(task) | |||
task, _ = models.GetCloudbrainByCloudbrainID(task.ID) | |||
} else { | |||
cloudbrainTask.SyncGrampusNotebookStatus(task) | |||
} | |||
cloudbrainTask.SyncGrampusNotebookStatus(task) | |||
} else { | |||
result, err := grampus.GetJob(task.JobID) | |||
if err != nil { | |||
@@ -15,6 +15,7 @@ import ( | |||
func CloudbrainDurationStatisticHour() { | |||
if setting.IsCloudbrainTimingEnabled { | |||
log.Info("CloudbrainDurationStatisticHour start") | |||
var statisticTime time.Time | |||
var count int64 | |||
recordDurationUpdateTime, err := models.GetDurationRecordUpdateTime() | |||
@@ -173,7 +173,10 @@ func DatasetIndex(ctx *context.Context) { | |||
//load attachment creator | |||
for _, attachment := range pageAttachments { | |||
uploader, _ := models.GetUserByID(attachment.UploaderID) | |||
uploader, err1 := models.GetUserByID(attachment.UploaderID) | |||
if err1 != nil { | |||
log.Info("query dataset user error." + err1.Error()) | |||
} | |||
attachment.Uploader = uploader | |||
if !strings.HasSuffix(attachment.Name, ".zip") && !strings.HasSuffix(attachment.Name, ".tar.gz") { | |||
attachment.DecompressState = -1 //非压缩文件 | |||
@@ -192,7 +195,7 @@ func DatasetIndex(ctx *context.Context) { | |||
ctx.Data["Type"] = cloudbrainType | |||
renderAttachmentSettings(ctx) | |||
log.Info("dataset index finished.") | |||
ctx.HTML(200, tplIndex) | |||
} | |||
@@ -0,0 +1,146 @@ | |||
package repo | |||
import ( | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
"strconv" | |||
"sync" | |||
"time" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/context" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/redis/redis_client" | |||
"code.gitea.io/gitea/modules/setting" | |||
) | |||
const ( | |||
REDIS_FLOW_ATTACHMENT_KEY = "flow_attachment_key" | |||
) | |||
var mutex *sync.RWMutex = new(sync.RWMutex) | |||
func CheckFlowForDataset(ctx *context.Context) error { | |||
if ctx.User == nil { | |||
return errors.New("User not login.") | |||
} | |||
log.Info("start to check flow for upload dataset file.") | |||
fileName := ctx.Query("file_name") | |||
currentTimeNow := time.Now() | |||
currentLongTime := currentTimeNow.Unix() | |||
last24Hour := currentTimeNow.AddDate(0, 0, -1).Unix() | |||
filechunks, err := models.GetFileChunksByUserId(ctx.User.ID, last24Hour, true) | |||
if err == nil { | |||
if len(filechunks) > setting.FLOW_CONTROL.ATTACHEMENT_NUM_A_USER_LAST24HOUR { | |||
log.Info("A single user cannot upload more than " + fmt.Sprint(setting.FLOW_CONTROL.ATTACHEMENT_NUM_A_USER_LAST24HOUR) + " files within the last 24 hours. so " + fileName + " is rejected. user id=" + fmt.Sprint(ctx.User.ID)) | |||
return errors.New("A single user cannot upload more than " + fmt.Sprint(setting.FLOW_CONTROL.ATTACHEMENT_NUM_A_USER_LAST24HOUR) + " files within the last 24 hours.") | |||
} | |||
var totalSize int64 | |||
totalSize += ctx.QueryInt64("size") | |||
concurrentUpload := 0 | |||
for _, file := range filechunks { | |||
totalSize += file.Size | |||
if (currentLongTime - int64(file.CreatedUnix)) < 10*60 { | |||
log.Info("the file " + file.Md5 + " in 10min upload." + file.CreatedUnix.Format("2006-01-02 15:04:05")) | |||
concurrentUpload += 1 | |||
} else { | |||
log.Info("the file " + file.Md5 + " not in 10min upload." + file.CreatedUnix.Format("2006-01-02 15:04:05")) | |||
} | |||
} | |||
log.Info("The concurrentUpload is " + fmt.Sprint(concurrentUpload) + " to checked " + fileName + ". user id=" + fmt.Sprint(ctx.User.ID)) | |||
if concurrentUpload >= setting.FLOW_CONTROL.ATTACHEMENT_NUM_A_USER_LAST10M { | |||
log.Info("A single user cannot upload more than " + fmt.Sprint(setting.FLOW_CONTROL.ATTACHEMENT_NUM_A_USER_LAST10M) + " files within the past 10 minutes. so " + fileName + " is rejected. user id=" + fmt.Sprint(ctx.User.ID)) | |||
return errors.New("A single user cannot upload more than " + fmt.Sprint(setting.FLOW_CONTROL.ATTACHEMENT_NUM_A_USER_LAST10M) + " files within the past 10 minutes.") | |||
} | |||
if totalSize >= setting.FLOW_CONTROL.ATTACHEMENT_SIZE_A_USER*1024*1024*1024 { | |||
log.Info("The total file size uploaded by a single user within the past 24 hours cannot exceed " + fmt.Sprint(setting.FLOW_CONTROL.ATTACHEMENT_SIZE_A_USER) + "G. so " + fileName + " is rejected. user id=" + fmt.Sprint(ctx.User.ID)) | |||
return errors.New("The total file size uploaded by a single user within the past 24 hours cannot exceed " + fmt.Sprint(setting.FLOW_CONTROL.ATTACHEMENT_SIZE_A_USER) + "G.") | |||
} | |||
} | |||
return nil | |||
} | |||
func AddFileNameToCache(datasetId int64, fileName string, userId int64) { | |||
mutex.Lock() | |||
defer mutex.Unlock() | |||
cacheMap := getSDKUploadFileMap(REDIS_FLOW_ATTACHMENT_KEY) | |||
expireTimeKeys := make([]string, 0) | |||
currentTime := time.Now().Unix() | |||
for tmpKey, tmpValue := range cacheMap { | |||
time, err := strconv.ParseInt(tmpValue, 10, 64) | |||
if err == nil { | |||
if currentTime-time > 24*3600 { | |||
expireTimeKeys = append(expireTimeKeys, tmpKey) | |||
continue | |||
} | |||
} | |||
} | |||
for _, delKey := range expireTimeKeys { | |||
delete(cacheMap, delKey) | |||
} | |||
key := fmt.Sprint(datasetId) + "_" + fileName + "_" + fmt.Sprint(userId) | |||
value := fmt.Sprint(time.Now().Unix()) | |||
cacheMap[key] = value | |||
log.Info("set key=" + key + " value=" + value + " to cache.") | |||
setSDKUploadFileCache(REDIS_FLOW_ATTACHMENT_KEY, cacheMap) | |||
} | |||
func RemoveFileFromCache(datasetId int64, fileName string, userId int64) { | |||
mutex.Lock() | |||
defer mutex.Unlock() | |||
key := fmt.Sprint(datasetId) + "_" + fileName + "_" + fmt.Sprint(userId) | |||
cacheMap := getSDKUploadFileMap(REDIS_FLOW_ATTACHMENT_KEY) | |||
delete(cacheMap, key) | |||
log.Info("remove key=" + key + " from cache.") | |||
setSDKUploadFileCache(REDIS_FLOW_ATTACHMENT_KEY, cacheMap) | |||
} | |||
func getSDKUploadFileMap(msgKey string) map[string]string { | |||
valueStr, err := redis_client.Get(msgKey) | |||
msgMap := make(map[string]string, 0) | |||
if err == nil { | |||
if valueStr != "" { | |||
err1 := json.Unmarshal([]byte(valueStr), &msgMap) | |||
if err1 != nil { | |||
log.Info("unmarshal json failed. " + err1.Error()) | |||
} | |||
} | |||
} else { | |||
log.Info("Failed to load from reids. " + err.Error()) | |||
} | |||
return msgMap | |||
} | |||
func setSDKUploadFileCache(msgKey string, msgMap map[string]string) { | |||
msgMapJson, _ := json.Marshal(msgMap) | |||
redisValue := string(msgMapJson) | |||
log.Info("set redis key=" + msgKey + " value=" + redisValue) | |||
re, err := redis_client.Setex(msgKey, redisValue, 24*3600*time.Second) | |||
if err == nil { | |||
log.Info("re =" + fmt.Sprint(re)) | |||
} else { | |||
log.Info("set redis error:" + err.Error()) | |||
} | |||
} | |||
func CheckFlowForDatasetSDK() error { | |||
cacheMap := getSDKUploadFileMap(REDIS_FLOW_ATTACHMENT_KEY) | |||
currentTime := time.Now().Unix() | |||
count := 0 | |||
for _, tmpValue := range cacheMap { | |||
time, err := strconv.ParseInt(tmpValue, 10, 64) | |||
if err == nil { | |||
if currentTime-time > 24*3600 { | |||
continue | |||
} | |||
} | |||
count += 1 | |||
} | |||
log.Info("total find " + fmt.Sprint(count) + " uploading files.") | |||
if count >= setting.FLOW_CONTROL.ALL_ATTACHEMENT_NUM_SDK { | |||
log.Info("The number of datasets uploaded using the SDK simultaneously cannot exceed " + fmt.Sprint(setting.FLOW_CONTROL.ALL_ATTACHEMENT_NUM_SDK)) | |||
return errors.New("The number of datasets uploaded using the SDK simultaneously cannot exceed " + fmt.Sprint(setting.FLOW_CONTROL.ALL_ATTACHEMENT_NUM_SDK)) | |||
} | |||
return nil | |||
} |
@@ -8,6 +8,7 @@ import ( | |||
"net/http" | |||
"os" | |||
"path" | |||
"strconv" | |||
"strings" | |||
"time" | |||
"unicode/utf8" | |||
@@ -70,31 +71,33 @@ const ( | |||
) | |||
func GrampusNotebookNew(ctx *context.Context) { | |||
ctx.Data["IsCreate"] = true | |||
ctx.Data["PageIsCloudBrain"] = true | |||
notebookType := ctx.QueryInt("type") | |||
processType := grampus.ProcessorTypeGPU | |||
if notebookType == 1 { | |||
processType = grampus.ProcessorTypeNPU | |||
} else if notebookType == 2 { | |||
processType = grampus.ProcessorTypeGCU | |||
} else if notebookType == 3 { | |||
processType = grampus.ProcessorTypeMLU | |||
ctx.HTML(http.StatusOK, tplGrampusNotebookMLUNew) | |||
return | |||
} | |||
err := grampusNotebookNewDataPrepare(ctx, processType) | |||
if err != nil { | |||
ctx.ServerError("get new notebook-job info failed", err) | |||
return | |||
} | |||
if processType == grampus.ProcessorTypeGPU { | |||
ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew) | |||
} else if processType == grampus.ProcessorTypeNPU { | |||
ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew) | |||
} else if processType == grampus.ProcessorTypeGCU { | |||
ctx.HTML(http.StatusOK, tplGrampusNotebookGCUNew) | |||
} | |||
ctx.HTML(http.StatusOK, tplGrampusNotebookNew) | |||
// ctx.Data["IsCreate"] = true | |||
// ctx.Data["PageIsCloudBrain"] = true | |||
// notebookType := ctx.QueryInt("type") | |||
// processType := grampus.ProcessorTypeGPU | |||
// if notebookType == 1 { | |||
// processType = grampus.ProcessorTypeNPU | |||
// } else if notebookType == 2 { | |||
// processType = grampus.ProcessorTypeGCU | |||
// } else if notebookType == 3 { | |||
// processType = grampus.ProcessorTypeMLU | |||
// ctx.HTML(http.StatusOK, tplGrampusNotebookMLUNew) | |||
// return | |||
// } | |||
// err := grampusNotebookNewDataPrepare(ctx, processType) | |||
// if err != nil { | |||
// ctx.ServerError("get new notebook-job info failed", err) | |||
// return | |||
// } | |||
// if processType == grampus.ProcessorTypeGPU { | |||
// ctx.HTML(http.StatusOK, tplGrampusNotebookGPUNew) | |||
// } else if processType == grampus.ProcessorTypeNPU { | |||
// ctx.HTML(http.StatusOK, tplGrampusNotebookNPUNew) | |||
// } else if processType == grampus.ProcessorTypeGCU { | |||
// ctx.HTML(http.StatusOK, tplGrampusNotebookGCUNew) | |||
// } | |||
} | |||
func GrampusTrainJobGPUNew(ctx *context.Context) { | |||
@@ -1207,12 +1210,12 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||
} | |||
//todo: upload code (send to file_server todo this work?) | |||
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { | |||
/**if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { | |||
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) | |||
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) | |||
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) | |||
return | |||
} | |||
}*/ | |||
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { | |||
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) | |||
@@ -1339,8 +1342,7 @@ func GetGrampusNotebook(ctx *context.APIContext) { | |||
var jobAfter *models.Cloudbrain | |||
if job.IsNewAITask() { | |||
ai_task.UpdateCloudbrain(job) | |||
jobAfter, _ = models.GetCloudbrainByCloudbrainID(job.ID) | |||
jobAfter, _ = ai_task.UpdateCloudbrain(job) | |||
} else { | |||
jobAfter, err = cloudbrainTask.SyncGrampusNotebookStatus(job) | |||
} | |||
@@ -1365,15 +1367,45 @@ func GetGrampusNotebook(ctx *context.APIContext) { | |||
} | |||
func GrampusStopJob(ctx *context.Context) { | |||
if res, isHandled, err := ai_task.HandleNewAITaskStop(ctx.Cloudbrain.ID); isHandled { | |||
if err != nil { | |||
log.Error("StopJob(%s) failed:%v", ctx.Cloudbrain.JobName, err, ctx.Data["msgID"]) | |||
ctx.JSON(200, map[string]interface{}{ | |||
"result_code": "-1", | |||
"error_msg": ctx.Tr("cloudbrain.Stopped_failed"), | |||
"status": "", | |||
"id": ctx.Params(":id"), | |||
"StatusOK": 0, | |||
}) | |||
return | |||
} | |||
ctx.JSON(200, map[string]interface{}{ | |||
"result_code": "0", | |||
"error_msg": "", | |||
"status": res.Status, | |||
"id": ctx.Params(":id"), | |||
"StatusOK": 0, | |||
}) | |||
return | |||
} | |||
cloudbrainTask.GrampusStopJob(ctx) | |||
} | |||
func GrampusNotebookDel(ctx *context.Context) { | |||
var listType = ctx.Query("listType") | |||
if err := cloudbrainTask.DeleteGrampusJob(ctx); err != nil { | |||
log.Error("deleteGrampusJob failed: %v", err, ctx.Data["msgID"]) | |||
ctx.ServerError(err.Error(), err) | |||
return | |||
if isHandled, err := ai_task.HandleNewAITaskDelete(ctx.Cloudbrain.ID); isHandled { | |||
if err != nil { | |||
log.Error("DeleteJob(%s) failed:%v", ctx.Cloudbrain.JobName, err, ctx.Data["msgID"]) | |||
ctx.ServerError(err.Error(), err) | |||
return | |||
} | |||
} else { | |||
if err := cloudbrainTask.DeleteGrampusJob(ctx); err != nil { | |||
log.Error("deleteGrampusJob failed: %v", err, ctx.Data["msgID"]) | |||
ctx.ServerError(err.Error(), err) | |||
return | |||
} | |||
} | |||
var isAdminPage = ctx.Query("isadminpage") | |||
@@ -1412,94 +1444,96 @@ type NotebookDataset struct { | |||
func GrampusNotebookShow(ctx *context.Context) { | |||
ctx.Data["PageIsCloudBrain"] = true | |||
var task *models.Cloudbrain | |||
task, err := models.GetCloudbrainByIDWithDeleted(ctx.Params(":id")) | |||
if err != nil { | |||
log.Error("GetCloudbrainByID failed:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
task.ContainerIp = "" | |||
if task.IsNewAITask() { | |||
ai_task.UpdateCloudbrain(task) | |||
task, _ = models.GetCloudbrainByCloudbrainID(task.ID) | |||
} else if task.DeletedAt.IsZero() && cloudbrainTask.IsTaskNotStop(task) { //normal record | |||
result, err := grampus.GetNotebookJob(task.JobID) | |||
if err != nil { | |||
log.Error("GetJob failed:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
if result != nil { | |||
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { | |||
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] | |||
ctx.HTML(http.StatusOK, tplGrampusNotebookShow) | |||
return | |||
/* | |||
var task *models.Cloudbrain | |||
task, err := models.GetCloudbrainByIDWithDeleted(ctx.Params(":id")) | |||
if err != nil { | |||
log.Error("GetCloudbrainByID failed:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
oldStatus := task.Status | |||
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||
if task.Status != oldStatus || task.Status == models.GrampusStatusRunning { | |||
task.Duration = result.JobInfo.RunSec | |||
if task.Duration < 0 { | |||
task.Duration = 0 | |||
task.ContainerIp = "" | |||
if task.IsNewAITask() { | |||
task, _ = ai_task.UpdateCloudbrain(task) | |||
} else if task.DeletedAt.IsZero() && cloudbrainTask.IsTaskNotStop(task) { //normal record | |||
result, err := grampus.GetNotebookJob(task.JobID) | |||
if err != nil { | |||
log.Error("GetJob failed:" + err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||
if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { | |||
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) | |||
} | |||
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||
task.EndTime = task.StartTime.Add(task.Duration) | |||
} | |||
task.CorrectCreateUnix() | |||
if oldStatus != task.Status { | |||
notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||
if result != nil { | |||
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { | |||
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] | |||
} | |||
oldStatus := task.Status | |||
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||
if task.Status != oldStatus || task.Status == models.GrampusStatusRunning { | |||
task.Duration = result.JobInfo.RunSec | |||
if task.Duration < 0 { | |||
task.Duration = 0 | |||
} | |||
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||
if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { | |||
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) | |||
} | |||
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||
task.EndTime = task.StartTime.Add(task.Duration) | |||
} | |||
task.CorrectCreateUnix() | |||
if oldStatus != task.Status { | |||
notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||
} | |||
} | |||
err = models.UpdateJob(task) | |||
if err != nil { | |||
log.Error("UpdateJob failed:" + err.Error()) | |||
} | |||
} | |||
} | |||
err = models.UpdateJob(task) | |||
if err != nil { | |||
log.Error("UpdateJob failed:" + err.Error()) | |||
} | |||
} | |||
} | |||
if len(task.Parameters) > 0 { | |||
var parameters models.Parameters | |||
err := json.Unmarshal([]byte(task.Parameters), ¶meters) | |||
if err != nil { | |||
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) | |||
ctx.ServerError("system error", err) | |||
return | |||
} | |||
if len(task.Parameters) > 0 { | |||
var parameters models.Parameters | |||
err := json.Unmarshal([]byte(task.Parameters), ¶meters) | |||
if err != nil { | |||
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) | |||
ctx.ServerError("system error", err) | |||
return | |||
} | |||
if len(parameters.Parameter) > 0 { | |||
paramTemp := "" | |||
for _, Parameter := range parameters.Parameter { | |||
param := Parameter.Label + " = " + Parameter.Value + "; " | |||
paramTemp = paramTemp + param | |||
if len(parameters.Parameter) > 0 { | |||
paramTemp := "" | |||
for _, Parameter := range parameters.Parameter { | |||
param := Parameter.Label + " = " + Parameter.Value + "; " | |||
paramTemp = paramTemp + param | |||
} | |||
task.Parameters = paramTemp[:len(paramTemp)-2] | |||
} else { | |||
task.Parameters = "" | |||
} | |||
} | |||
user, err := models.GetUserByID(task.UserID) | |||
if err == nil { | |||
task.User = user | |||
} | |||
task.Parameters = paramTemp[:len(paramTemp)-2] | |||
} else { | |||
task.Parameters = "" | |||
} | |||
} | |||
user, err := models.GetUserByID(task.UserID) | |||
if err == nil { | |||
task.User = user | |||
} | |||
prepareSpec4Show(ctx, task) | |||
ctx.Data["task"] = task | |||
ctx.Data["datasetDownload"] = getDatasetDownloadInfo(ctx, task) | |||
ctx.Data["modelDownload"] = getModelDownloadInfo(ctx, task) | |||
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, task) | |||
ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) | |||
ctx.Data["code_path"] = cloudbrain.CodeMountPath | |||
ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath | |||
ctx.Data["model_path"] = cloudbrain.ModelMountPath | |||
ctx.HTML(http.StatusOK, tplGrampusNotebookShow) | |||
prepareSpec4Show(ctx, task) | |||
ctx.Data["task"] = task | |||
ctx.Data["datasetDownload"] = getDatasetDownloadInfo(ctx, task) | |||
ctx.Data["modelDownload"] = getModelDownloadInfo(ctx, task) | |||
ctx.Data["canDownload"] = cloudbrain.CanDownloadJob(ctx, task) | |||
ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) | |||
ctx.Data["code_path"] = cloudbrain.CodeMountPath | |||
ctx.Data["dataset_path"] = cloudbrain.DataSetMountPath | |||
ctx.Data["model_path"] = cloudbrain.ModelMountPath | |||
ctx.HTML(http.StatusOK, tplGrampusNotebookShow) | |||
*/ | |||
} | |||
func getDatasetDownloadInfo(ctx *context.Context, task *models.Cloudbrain) []*models.DatasetDownload { | |||
@@ -1659,13 +1693,26 @@ func GrampusDownloadLog(ctx *context.Context) { | |||
ctx.ServerError(err.Error(), err) | |||
return | |||
} | |||
fileName := job.JobName + "-log.txt" | |||
content, err := grampus.GetTrainJobLog(job.JobID) | |||
nodeIdStr := ctx.Params(":nodeId") | |||
var content string | |||
if nodeIdStr != "" { | |||
nodeId, _ := strconv.Atoi(nodeIdStr) | |||
fileName = job.JobName + "-" + strconv.Itoa(nodeId+1) + "-log.txt" | |||
if job.WorkServerNumber < 1 || nodeId > job.WorkServerNumber-1 { | |||
ctx.NotFound("query parameter is wrong", nil) | |||
return | |||
} | |||
content, err = grampus.GetTrainJobLog(job.JobID, nodeId) | |||
} else { | |||
content, err = grampus.GetTrainJobLog(job.JobID) | |||
} | |||
if err != nil { | |||
log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) | |||
content = "" | |||
} | |||
fileName := job.JobName + "-log.txt" | |||
ctx.Resp.Header().Set("Content-Disposition", "attachment; filename="+fileName) | |||
ctx.Resp.Header().Set("Content-Type", "application/octet-stream") | |||
var b []byte = []byte(content) | |||
@@ -1696,7 +1743,19 @@ func GrampusGetLog(ctx *context.Context) { | |||
exitDiagnostics = result.ExitDiagnostics | |||
} | |||
content, err := grampus.GetTrainJobLog(job.JobID) | |||
nodeIdStr := ctx.Params(":nodeId") | |||
var content string | |||
if nodeIdStr != "" { | |||
nodeId, _ := strconv.Atoi(nodeIdStr) | |||
if job.WorkServerNumber < 1 || nodeId > job.WorkServerNumber-1 { | |||
ctx.NotFound("query parameter is wrong", nil) | |||
return | |||
} | |||
content, err = grampus.GetTrainJobLog(job.JobID, nodeId) | |||
} else { | |||
content, err = grampus.GetTrainJobLog(job.JobID) | |||
} | |||
if err != nil { | |||
log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) | |||
ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
@@ -1734,7 +1793,17 @@ func GrampusMetrics(ctx *context.Context) { | |||
} | |||
var result models.NewModelArtsMetricStatisticResult | |||
if job.IsNPUTask() { | |||
result, err = grampus.GetGrampusMetrics(job.JobID, 0, 0) | |||
nodeIdStr := ctx.Params(":nodeId") | |||
if nodeIdStr != "" { | |||
nodeId, _ := strconv.Atoi(nodeIdStr) | |||
if job.WorkServerNumber < 1 || nodeId > job.WorkServerNumber-1 { | |||
ctx.NotFound("query parameter is wrong", nil) | |||
return | |||
} | |||
result, err = grampus.GetGrampusMetrics(job.JobID, 0, 0, nodeId) | |||
} else { | |||
result, err = grampus.GetGrampusMetrics(job.JobID, 0, 0) | |||
} | |||
} else if job.IsGPUTask() { | |||
startTime := int64(job.StartTime) | |||
if startTime == 0 { | |||
@@ -2016,6 +2085,36 @@ func GrampusNotebookDebug(ctx *context.Context) { | |||
} | |||
func GrampusNotebookRestart(ctx *context.Context) { | |||
var id = ctx.Params(":id") | |||
var resultCode = "-1" | |||
var errorMsg = "" | |||
var status = "" | |||
t := ctx.Cloudbrain | |||
if t.IsNewAITask() { | |||
res, bizErr := ai_task.RestartAITask(t.ID, ctx.Repo.GitRepo, ctx.Repo.Repository, ctx.User) | |||
if bizErr != nil { | |||
log.Error("lRestartAITask failed:task.ID=%d err=%v", t.ID, bizErr.DefaultMsg) | |||
errorMsg = ctx.Tr(bizErr.TrCode) | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": resultCode, | |||
"error_msg": errorMsg, | |||
"status": status, | |||
"id": id, | |||
}) | |||
return | |||
} | |||
id = strconv.FormatInt(res.ID, 10) | |||
status = res.Status | |||
resultCode = "0" | |||
ctx.JSON(200, map[string]string{ | |||
"result_code": resultCode, | |||
"error_msg": errorMsg, | |||
"status": status, | |||
"id": id, | |||
}) | |||
return | |||
} | |||
cloudbrainTask.GrampusNotebookRestart(ctx) | |||
} | |||
@@ -0,0 +1,30 @@ | |||
package repo | |||
import ( | |||
"net/http" | |||
"code.gitea.io/gitea/modules/base" | |||
"code.gitea.io/gitea/modules/context" | |||
) | |||
const ( | |||
tplGrampusOnlineInferIndex base.TplName = "repo/grampus/onlineinfer/list" | |||
tplGrampusOnlineInferShow base.TplName = "repo/grampus/onlineinfer/show" | |||
tplGrampusOnlineInferNew base.TplName = "repo/grampus/onlineinfer/new" | |||
) | |||
func GrampusOnlineInferNew(ctx *context.Context) { | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.HTML(http.StatusOK, tplGrampusOnlineInferNew) | |||
} | |||
func GrampusOnlineInferShow(ctx *context.Context) { | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.HTML(http.StatusOK, tplGrampusOnlineInferShow) | |||
} | |||
func GrampusOnlineInferIndex(ctx *context.Context) { | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.HTML(http.StatusOK, tplGrampusOnlineInferIndex) | |||
} |
@@ -2,6 +2,7 @@ package repo | |||
import ( | |||
"archive/zip" | |||
ai_task "code.gitea.io/gitea/services/ai_task_service/task" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
@@ -124,8 +125,8 @@ func MustEnableModelArts(ctx *context.Context) { | |||
} | |||
func NotebookNew(ctx *context.Context) { | |||
notebookNewDataPrepare(ctx) | |||
// notebookNewDataPrepare(ctx) | |||
ctx.Data["PageIsCloudBrain"] = true | |||
ctx.HTML(200, tplModelArtsNotebookNew) | |||
} | |||
@@ -305,63 +306,67 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm | |||
func NotebookShow(ctx *context.Context) { | |||
ctx.Data["PageIsCloudBrain"] = true | |||
debugListType := ctx.Query("debugListType") | |||
if debugListType == "" { | |||
debugListType = "all" | |||
} | |||
var ID = ctx.Params(":id") | |||
task, err := models.GetCloudbrainByIDWithDeleted(ID) | |||
if err != nil { | |||
log.Error("GET job error", err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
if task.DeletedAt.IsZero() && !task.Cleared { //normal record | |||
err := modelarts.HandleNotebookInfo(task) | |||
ctx.HTML(200, tplModelArtsNotebookShow) | |||
return | |||
/* | |||
debugListType := ctx.Query("debugListType") | |||
if debugListType == "" { | |||
debugListType = "all" | |||
} | |||
var ID = ctx.Params(":id") | |||
task, err := models.GetCloudbrainByIDWithDeleted(ID) | |||
if err != nil { | |||
ctx.Data["error"] = err.Error() | |||
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) | |||
log.Error("GET job error", err.Error()) | |||
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
return | |||
} | |||
} | |||
datasetDownload := make([]*models.DatasetDownload, 0) | |||
var modelDownload models.ModelDownload | |||
if ctx.IsSigned { | |||
if task.Uuid != "" && task.UserID == ctx.User.ID { | |||
datasetDownload = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, true) | |||
} | |||
if task.ModelName != "" && task.UserID == ctx.User.ID { | |||
modelDownload = GetModelDownload(task) | |||
if task.DeletedAt.IsZero() && !task.Cleared { //normal record | |||
err := modelarts.HandleNotebookInfo(task) | |||
if err != nil { | |||
ctx.Data["error"] = err.Error() | |||
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) | |||
return | |||
} | |||
} | |||
datasetDownload := make([]*models.DatasetDownload, 0) | |||
var modelDownload models.ModelDownload | |||
if ctx.IsSigned { | |||
if task.Uuid != "" && task.UserID == ctx.User.ID { | |||
datasetDownload = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, true) | |||
} | |||
if task.ModelName != "" && task.UserID == ctx.User.ID { | |||
modelDownload = GetModelDownload(task) | |||
} | |||
user, err := models.GetUserByID(task.UserID) | |||
if err == nil { | |||
task.User = user | |||
} | |||
prepareSpec4Show(ctx, task) | |||
if task.TrainJobDuration == "" { | |||
if task.Duration == 0 { | |||
var duration int64 | |||
if task.Status == string(models.JobRunning) { | |||
duration = time.Now().Unix() - int64(task.CreatedUnix) | |||
} else { | |||
duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) | |||
} | |||
task.Duration = duration | |||
} | |||
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||
} | |||
ctx.Data["duration"] = task.TrainJobDuration | |||
ctx.Data["datasetDownload"] = datasetDownload | |||
ctx.Data["modelDownload"] = modelDownload | |||
ctx.Data["task"] = task | |||
ctx.Data["ID"] = ID | |||
ctx.Data["jobName"] = task.JobName | |||
ctx.Data["debugListType"] = debugListType | |||
ctx.HTML(200, tplModelArtsNotebookShow) | |||
user, err := models.GetUserByID(task.UserID) | |||
if err == nil { | |||
task.User = user | |||
} | |||
prepareSpec4Show(ctx, task) | |||
if task.TrainJobDuration == "" { | |||
if task.Duration == 0 { | |||
var duration int64 | |||
if task.Status == string(models.JobRunning) { | |||
duration = time.Now().Unix() - int64(task.CreatedUnix) | |||
} else { | |||
duration = int64(task.UpdatedUnix) - int64(task.CreatedUnix) | |||
} | |||
task.Duration = duration | |||
} | |||
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||
} | |||
ctx.Data["duration"] = task.TrainJobDuration | |||
ctx.Data["datasetDownload"] = datasetDownload | |||
ctx.Data["modelDownload"] = modelDownload | |||
ctx.Data["task"] = task | |||
ctx.Data["ID"] = ID | |||
ctx.Data["jobName"] = task.JobName | |||
ctx.Data["debugListType"] = debugListType | |||
ctx.HTML(200, tplModelArtsNotebookShow) | |||
*/ | |||
} | |||
func GetModelDownload(task *models.Cloudbrain) models.ModelDownload { | |||
@@ -673,6 +678,19 @@ func NotebookStop(ctx *context.Context) { | |||
errorMsg = ctx.Tr("cloudbrain.Already_stopped") | |||
break | |||
} | |||
if res, isHandled, err := ai_task.HandleNewAITaskStop(task.ID); isHandled { | |||
if err != nil { | |||
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||
resultCode = "-1" | |||
errorMsg = err.Error() | |||
if strings.Contains(err.Error(), modelarts.NotebookNotFound) { | |||
errorMsg = "the job's version is too old and can not be restarted" | |||
} | |||
break | |||
} | |||
status = res.Status | |||
break | |||
} | |||
err, res := StopModelArtsNotebook(task) | |||
@@ -734,6 +752,22 @@ func NotebookDel(ctx *context.Context) { | |||
var listType = ctx.Query("debugListType") | |||
task := ctx.Cloudbrain | |||
if isHandled, err := ai_task.HandleNewAITaskDelete(task.ID); isHandled { | |||
if err != nil { | |||
log.Error("DeleteJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) | |||
ctx.RenderWithErr("DeleteJob failed", tplDebugJobIndex, nil) | |||
} | |||
var isAdminPage = ctx.Query("isadminpage") | |||
var isHomePage = ctx.Query("ishomepage") | |||
if ctx.IsUserSiteAdmin() && isAdminPage == "true" { | |||
ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains") | |||
} else if isHomePage == "true" { | |||
ctx.Redirect(setting.AppSubURL + "/cloudbrains") | |||
} else { | |||
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=" + listType) | |||
} | |||
} | |||
if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsDeleted) { | |||
log.Error("the job(%s) has not been stopped", task.JobName) | |||
ctx.RenderWithErr("the job has not been stopped", tplDebugJobIndex, nil) | |||
@@ -489,20 +489,20 @@ func SettingsPost(ctx *context.Context, form auth.RepoSettingForm) { | |||
ctx.RenderWithErr(ctx.Tr("form.enterred_invalid_repo_name"), tplSettingsOptions, nil) | |||
return | |||
} | |||
deployments, err := models.GetRunningServiceByUser(ctx.User.ID) | |||
if err != nil { | |||
ctx.ServerError("GetRunningServiceByUser", err) | |||
return | |||
} | |||
if deployments != nil { | |||
if len(deployments) > 0 { | |||
ctx.Data["Err_RepoName"] = nil | |||
log.Error("盘古部署删除项目失败,repo id %v, 用户 id%v", repo.ID, ctx.User.ID) | |||
ctx.Flash.Error(ctx.Tr("deployment.deletion_notice_repo")) | |||
ctx.Redirect(ctx.Repo.RepoLink + "/settings") | |||
return | |||
// finetune: openi-notebook repo can not be deleted if it has running service | |||
if repo.Name == "openi-notebook" { | |||
if deployments, err := models.GetRunningServiceByUser(ctx.User.ID); deployments != nil && err == nil { | |||
if len(deployments) > 0 { | |||
ctx.Data["Err_RepoName"] = nil | |||
log.Error("panguService: delete repo failed, repo %s, user %s", repo.ID, ctx.User.ID) | |||
ctx.Flash.Error(ctx.Tr("deployment.deletion_notice_repo")) | |||
ctx.Redirect(ctx.Repo.RepoLink + "/settings") | |||
return | |||
} | |||
} | |||
} | |||
count, err := models.GetCloudbrainRunCountByRepoID(repo.ID) | |||
if err != nil { | |||
ctx.ServerError("GetCloudbrainCountByRepoID failed", err) | |||
@@ -29,7 +29,11 @@ type TrFunc func(string, ...interface{}) string | |||
func OuterTrBizError(err *BizError, locale macaron.Locale) *AiforgeOuterResponse { | |||
msg := err.DefaultMsg | |||
if locale != nil && err.TrCode != "" { | |||
msg = locale.Tr(err.TrCode) | |||
if err.TrParams == nil || len(err.TrParams) == 0 { | |||
msg = locale.Tr(err.TrCode) | |||
} else { | |||
msg = locale.Tr(err.TrCode, err.TrParams...) | |||
} | |||
} | |||
return &AiforgeOuterResponse{Code: err.Code, Msg: msg} | |||
} | |||
@@ -4,10 +4,28 @@ type BizError struct { | |||
Code int | |||
DefaultMsg string | |||
TrCode string | |||
TrParams []interface{} | |||
} | |||
//当调用此方法时意味着错误信息中有占位符,需要传入参数 | |||
//因此此时需要新建一个对象避免并发问题 | |||
func (e *BizError) WithParams(params ...interface{}) *BizError { | |||
newErr := &BizError{ | |||
Code: e.Code, | |||
DefaultMsg: e.DefaultMsg, | |||
TrCode: e.TrCode, | |||
} | |||
if e.TrParams == nil { | |||
newErr.TrParams = params | |||
} else { | |||
newErr.TrParams = append(e.TrParams, params) | |||
} | |||
return newErr | |||
} | |||
func NewBizError(err error) *BizError { | |||
return &BizError{Code: RESPONSE_CODE_ERROR_DEFAULT, DefaultMsg: err.Error()} | |||
return &BizError{Code: RESPONSE_CODE_ERROR_DEFAULT, DefaultMsg: err.Error(), TrCode: err.Error()} | |||
} | |||
func BuildBizError(code int, defaultMsg string, trCode ...string) *BizError { | |||
@@ -17,3 +35,10 @@ func BuildBizError(code int, defaultMsg string, trCode ...string) *BizError { | |||
} | |||
return &BizError{Code: code, DefaultMsg: defaultMsg, TrCode: t} | |||
} | |||
func BuildDefaultBizError(defaultMsg string, trCode ...string) *BizError { | |||
t := "" | |||
if len(t) == 0 { | |||
t = trCode[0] | |||
} | |||
return &BizError{Code: RESPONSE_CODE_ERROR_DEFAULT, DefaultMsg: defaultMsg, TrCode: t} | |||
} |
@@ -11,6 +11,7 @@ var BADGES_STILL_HAS_USERS = &BizError{Code: 1005, DefaultMsg: "Please delete us | |||
var SYSTEM_ERROR = &BizError{Code: 9009, DefaultMsg: "System error.Please try again later", TrCode: "common_error.system_error"} | |||
var INSUFFICIENT_PERMISSION = &BizError{Code: 9003, DefaultMsg: "insufficient permissions", TrCode: "common_error.insufficient_permission"} | |||
var PARAM_ERROR = &BizError{Code: 9001, DefaultMsg: "param error", TrCode: "common_error.param_error"} | |||
var WECHAT_NOT_BIND = &BizError{Code: 9002, DefaultMsg: "Please scan the code and bind to wechat first", TrCode: "common_error.wechat_not_bind"} | |||
//云脑任务相关错误 | |||
var AI_TASK_NOT_EXISTS = &BizError{Code: 2001, DefaultMsg: "AI task not exists", TrCode: "ai_task.task_not_exists"} | |||
@@ -23,4 +24,12 @@ var DATASET_NOT_EXISTS = &BizError{Code: 2007, DefaultMsg: "The part of datasets | |||
var MODEL_NOT_EXISTS = &BizError{Code: 2008, DefaultMsg: "The model in the task does not exist or has been deleted, please create a new debug job.", TrCode: "repo.debug.manage.model_not_exist"} | |||
var RESULT_CLEARD = &BizError{Code: 2009, DefaultMsg: "The files of the task have been cleared, can not restart any more, please create a new debug task instead.", TrCode: "cloudbrain.result_cleared"} | |||
var CREATE_FAILED = &BizError{Code: 2010, DefaultMsg: "Create AI task failed", TrCode: "ai_task.create_failed"} | |||
var RESTART_FAILED = &BizError{Code: 2010, DefaultMsg: "Restart AI task failed", TrCode: "ai_task.restart_failed"} | |||
var RESTART_FAILED = &BizError{Code: 2011, DefaultMsg: "Restart AI task failed", TrCode: "ai_task.restart_failed"} | |||
var STOP_FAILED = &BizError{Code: 2012, DefaultMsg: "Stop AI task failed", TrCode: "ai_task.stop_failed"} | |||
var DATASET_SIZE_OVER_LIMIT = &BizError{Code: 2013, DefaultMsg: "The size of dataset exceeds limitation", TrCode: "ai_task.dataset_size_over_limit"} | |||
var BOOT_FILE_MUST_BE_PYTHON = &BizError{Code: 2013, DefaultMsg: "The boot file must be a python file", TrCode: "ai_task.boot_file_must_python"} | |||
var BOOT_FILE_NOT_EXIST = &BizError{Code: 2014, DefaultMsg: "The boot file not exist", TrCode: "ai_task.boot_file_not_exist"} | |||
var DATASET_SELECT_ERROR = &BizError{Code: 2017, DefaultMsg: "Dataset select error: the count exceed the limit or has same name", TrCode: "cloudbrain.error.dataset_select"} | |||
var PARTIAL_DATASETS_NOT_AVAILABLE = &BizError{Code: 2018, DefaultMsg: "There are non-existent or deleted files in the selected dataset file, please select again", TrCode: "cloudbrain.error.partial_datasets_not_available"} | |||
var LOAD_CODE_FAILED = &BizError{Code: 2019, DefaultMsg: "Fail to load code, please check if the right branch is selected.", TrCode: "cloudbrain.load_code_failed"} | |||
var BRANCH_NOT_EXISTS = &BizError{Code: 2020, DefaultMsg: "The branch does not exist", TrCode: "ai_task.branch_not_exists"} |
@@ -385,7 +385,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Get("", modelapp.ModelBaseUI) | |||
m.Group("/pangufinetune", func() { | |||
m.Get("", modelapp.PanguFinetuneUI) | |||
m.Get("/create", reqSignIn, modelapp.PanguFinetuneCreateUI) | |||
m.Get("/create", reqSignIn, reqWechatBind, modelapp.PanguFinetuneCreateUI) | |||
m.Get("/inference", reqSignIn, modelapp.PanguInferenceUI) | |||
}) | |||
@@ -428,6 +428,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Get("/data_analysis/ProTrend", routers.ExploreDataAnalysisProTrend) | |||
m.Get("/data_analysis/Overview", routers.ExploreDataAnalysisOverview) | |||
m.Get("/data_analysis/BrainAnalysis", routers.ExploreDataAnalysisBrainAnalysis) | |||
m.Get("/center_map", reqSignIn, routers.CenterMapUI) | |||
}, ignSignIn) | |||
m.Combo("/install", routers.InstallInit).Get(routers.Install). | |||
@@ -1318,6 +1319,14 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateGrampusNotebookForm{}), context.PointAccount(), repo.GrampusNotebookCreate) | |||
}) | |||
m.Group("/onlineinfer", func() { | |||
m.Get("", reqRepoCloudBrainReader, repo.GrampusOnlineInferIndex) | |||
m.Group("/:id", func() { | |||
m.Get("", reqRepoCloudBrainReader, repo.GrampusOnlineInferShow) | |||
}) | |||
m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, context.PointAccount(), repo.GrampusOnlineInferNew) | |||
}) | |||
m.Group("/train-job", func() { | |||
m.Group("/:jobid", func() { | |||
m.Get("", reqRepoCloudBrainReader, repo.GrampusTrainJobShow) | |||
@@ -85,6 +85,7 @@ func retrieveFeeds(ctx *context.Context, options models.GetFeedsOptions) { | |||
if act.ActUser != nil { | |||
userCache[act.ActUserID] = act.ActUser | |||
} | |||
act.FilterCloudbrainInfo() | |||
} | |||
for _, act := range actions { | |||
@@ -1,14 +1,18 @@ | |||
package cluster | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"errors" | |||
"fmt" | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/manager/client/grampus" | |||
"code.gitea.io/gitea/models" | |||
model_grampus "code.gitea.io/gitea/modules/grampus" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"errors" | |||
"fmt" | |||
"strings" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
) | |||
type C2NetClusterAdapter struct { | |||
@@ -16,10 +20,10 @@ type C2NetClusterAdapter struct { | |||
func init() { | |||
//注册到一个Map | |||
AddCluster(ai_task_entity.C2Net, new(C2NetClusterAdapter)) | |||
AddCluster(entity.C2Net, new(C2NetClusterAdapter)) | |||
} | |||
func (c C2NetClusterAdapter) CreateNoteBook(req ai_task_entity.CreateNoteBookTaskRequest) (*ai_task_entity.CreateNoteBookTaskResponse, error) { | |||
func (c C2NetClusterAdapter) CreateNoteBook(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) { | |||
jobResult, err := grampus.CreateNotebookJob(convertNoteBookReq2Grampus(req)) | |||
if err != nil { | |||
log.Error("CreateNoteBook failed: %v", err.Error()) | |||
@@ -32,7 +36,20 @@ func (c C2NetClusterAdapter) CreateNoteBook(req ai_task_entity.CreateNoteBookTas | |||
return convertGrampus2NoteBookRes(jobResult), nil | |||
} | |||
func (c C2NetClusterAdapter) GetImages(req ai_task_entity.GetImageReq) ([]ai_task_entity.ClusterImage, bool, error) { | |||
func (c C2NetClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) { | |||
jobResult, err := grampus.CreateNotebookJob(convertOnlineInfer2Grampus(req)) | |||
if err != nil { | |||
log.Error("CreateNoteBook failed: %v", err.Error()) | |||
return nil, err | |||
} | |||
if jobResult.ErrorCode > 0 { | |||
log.Error("CreateNotebookJob err.req.Name = %s ErrorCode = %d ErrorMsg = %s", req.Name, jobResult.ErrorCode, jobResult.ErrorMsg) | |||
return nil, errors.New(fmt.Sprintf("CreateNotebookJob err[%d%s]", jobResult.ErrorCode, jobResult.ErrorMsg)) | |||
} | |||
return convertGrampus2NoteBookRes(jobResult), nil | |||
} | |||
func (c C2NetClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
processType := req.ComputeSource.FullName | |||
images, err := grampus.GetImages(processType, string(req.JobType)) | |||
if err != nil { | |||
@@ -42,23 +59,48 @@ func (c C2NetClusterAdapter) GetImages(req ai_task_entity.GetImageReq) ([]ai_tas | |||
if images == nil || images.Infos == nil || len(images.Infos) == 0 { | |||
return nil, true, err | |||
} | |||
r := make([]ai_task_entity.ClusterImage, len(images.Infos)) | |||
r := make([]entity.ClusterImage, len(images.Infos)) | |||
for i, v := range images.Infos { | |||
r[i] = ConvertGrampusImageToStandard(v) | |||
} | |||
return r, false, nil | |||
} | |||
func ConvertGrampusImageToStandard(image models.GrampusImage) ai_task_entity.ClusterImage { | |||
return ai_task_entity.ClusterImage{ | |||
func ConvertGrampusImageToStandard(image models.GrampusImage) entity.ClusterImage { | |||
return entity.ClusterImage{ | |||
ImageId: image.ID, | |||
ImageName: image.Name, | |||
} | |||
} | |||
func convertNoteBookReq2Grampus(req ai_task_entity.CreateNoteBookTaskRequest) models.CreateGrampusNotebookRequest { | |||
var commandGpuDebug = "mkdir -p /dataset;! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" | |||
command := fmt.Sprintf(commandGpuDebug, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) | |||
func convertNoteBookReq2Grampus(req entity.CreateNoteBookTaskRequest) models.CreateGrampusNotebookRequest { | |||
codePath := "/code" | |||
if len(req.Tasks[0].Code) > 0 { | |||
codePath = req.Tasks[0].Code[0].ContainerPath | |||
if strings.Contains(codePath, "/") { | |||
codePath = codePath[0:strings.LastIndex(codePath, "/")] | |||
} | |||
} | |||
var commandGpuDebug = "mkdir -p /dataset;! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='%s' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" | |||
command := fmt.Sprintf(commandGpuDebug, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval, codePath) | |||
// command := "bash && cd /code && unzip master.zip && cd test-export-data && uvicorn train:app --host 0.0.0.0 --port $OCTOPUS_NOTEBOOK_PORT" | |||
if models.NPU == req.Tasks[0].Spec.ComputeResource { | |||
command = "" | |||
} | |||
log.Info("debug cmd=" + command) | |||
tasks := make([]models.GrampusNotebookTask, len(req.Tasks)) | |||
for i := 0; i < len(req.Tasks); i++ { | |||
t := req.Tasks[i] | |||
tasks[i] = convertNoteBookTask2Grampus(t, command) | |||
} | |||
return models.CreateGrampusNotebookRequest{Name: req.Name, Tasks: tasks} | |||
} | |||
func convertOnlineInfer2Grampus(req entity.CreateNoteBookTaskRequest) models.CreateGrampusNotebookRequest { | |||
command := generateCommand(req.RepoName, req.Tasks[0].BootFile, req.PrimitiveDatasetName) | |||
tasks := make([]models.GrampusNotebookTask, len(req.Tasks)) | |||
for i := 0; i < len(req.Tasks); i++ { | |||
@@ -69,7 +111,51 @@ func convertNoteBookReq2Grampus(req ai_task_entity.CreateNoteBookTaskRequest) mo | |||
return models.CreateGrampusNotebookRequest{Name: req.Name, Tasks: tasks} | |||
} | |||
func convertNoteBookTask2Grampus(t ai_task_entity.NoteBookTask, command string) models.GrampusNotebookTask { | |||
func generateCommand(repoName, bootFile, datasetName string) string { | |||
//prepare | |||
//command := "bash && cd /code && unzip master.zip && cd test-export-data && uvicorn train:app --host 0.0.0.0 --port $OCTOPUS_NOTEBOOK_PORT" | |||
workDir := "/" | |||
command := "pip install gradio fastapi -i https://pypi.tuna.tsinghua.edu.cn/simple;" | |||
command += "pwd; cd " + workDir + fmt.Sprintf(model_grampus.CommandPrepareScriptGpu) | |||
//unzip code & dataset | |||
unZipDatasetCommand := cloudbrainTask.GenerateDatasetUnzipCommand(datasetName) | |||
bootFile = strings.ReplaceAll(bootFile, "\\", "/") | |||
bootfilepath := "" | |||
bootonlyfile := bootFile | |||
if strings.Index(bootFile, "/") != -1 { | |||
bootfilepath = bootFile[0:strings.LastIndex(bootFile, "/")] | |||
if strings.HasPrefix(bootfilepath, "/") { | |||
bootfilepath = bootfilepath[1:] | |||
} | |||
bootonlyfile = bootFile[strings.LastIndex(bootFile, "/")+1:] | |||
} | |||
log.Info("bootfilepath=" + bootfilepath + " bootonlyfile=" + bootonlyfile) | |||
copyDatasetCmd := getCopyCmd(datasetName, repoName, bootfilepath) | |||
copyDatasetPath := "/code/" + strings.ToLower(repoName) + "/" + bootfilepath | |||
commandUnzip := "export OPENI_GRADIO_URL=$OCTOPUS_NOTEBOOK_BASE_URL;" + "cd " + workDir + "code;echo \"start unzip code\";unzip -q master.zip; " + copyDatasetCmd + " echo \"start to unzip dataset\";cd " + copyDatasetPath + "; " + unZipDatasetCommand | |||
//commandUnzip := "cd " + workDir + "code;echo \"start unzip code\";unzip -q master.zip;echo \"start to unzip dataset\";cd " + workDir + "dataset;" + unZipDatasetCommand | |||
command += commandUnzip | |||
command += "echo \"unzip finished;start to exec code;\";" | |||
if strings.HasSuffix(bootonlyfile, ".py") { | |||
bootonlyfile = bootonlyfile[0 : len(bootonlyfile)-3] | |||
} | |||
command += "cd " + copyDatasetPath + ";uvicorn " + bootonlyfile + ":app --host 0.0.0.0 --port $OCTOPUS_NOTEBOOK_PORT " | |||
log.Info("comand=" + command) | |||
return command | |||
} | |||
func getCopyCmd(datasetName, repoName, bootfilepath string) string { | |||
cmd := "" | |||
datasetNameArray := strings.Split(datasetName, ";") | |||
for _, datasetNameTemp := range datasetNameArray { | |||
cmd += "cp /dataset/" + datasetNameTemp + " /code/" + strings.ToLower(repoName) + "/" + bootfilepath + ";" | |||
} | |||
return cmd | |||
} | |||
func convertNoteBookTask2Grampus(t entity.NoteBookTask, command string) models.GrampusNotebookTask { | |||
code := models.GrampusDataset{} | |||
codeArray := convertContainerArray2Grampus(t.Code) | |||
@@ -90,7 +176,7 @@ func convertNoteBookTask2Grampus(t ai_task_entity.NoteBookTask, command string) | |||
} | |||
} | |||
func convertContainerArray2Grampus(containerDatas []ai_task_entity.ContainerData) []models.GrampusDataset { | |||
func convertContainerArray2Grampus(containerDatas []entity.ContainerData) []models.GrampusDataset { | |||
res := make([]models.GrampusDataset, len(containerDatas)) | |||
for i := 0; i < len(containerDatas); i++ { | |||
d := containerDatas[i] | |||
@@ -99,7 +185,7 @@ func convertContainerArray2Grampus(containerDatas []ai_task_entity.ContainerData | |||
return res | |||
} | |||
func convertContainer2Grampus(d ai_task_entity.ContainerData) models.GrampusDataset { | |||
func convertContainer2Grampus(d entity.ContainerData) models.GrampusDataset { | |||
return models.GrampusDataset{ | |||
Name: d.Name, | |||
Bucket: d.Bucket, | |||
@@ -110,9 +196,9 @@ func convertContainer2Grampus(d ai_task_entity.ContainerData) models.GrampusData | |||
} | |||
} | |||
func convertGrampus2NoteBookRes(res *models.GrampusNotebookResponse) *ai_task_entity.CreateNoteBookTaskResponse { | |||
func convertGrampus2NoteBookRes(res *models.GrampusNotebookResponse) *entity.CreateNoteBookTaskResponse { | |||
jobInfo := res.JobInfo | |||
return &ai_task_entity.CreateNoteBookTaskResponse{ | |||
return &entity.CreateNoteBookTaskResponse{ | |||
StartedAt: jobInfo.StartedAt, | |||
RunSec: jobInfo.RunSec, | |||
CompletedAt: jobInfo.CompletedAt, | |||
@@ -126,7 +212,7 @@ func convertGrampus2NoteBookRes(res *models.GrampusNotebookResponse) *ai_task_en | |||
} | |||
} | |||
func (c C2NetClusterAdapter) RestartNoteBook(jobId string) (*ai_task_entity.RestartNoteBookTaskResponse, error) { | |||
func (c C2NetClusterAdapter) RestartNoteBook(jobId string) (*entity.RestartNoteBookTaskResponse, error) { | |||
res, err := grampus.RestartNotebookJob(jobId) | |||
if err != nil { | |||
log.Error("RestartNotebookJob err jobId=%s .%v", jobId, err) | |||
@@ -134,13 +220,16 @@ func (c C2NetClusterAdapter) RestartNoteBook(jobId string) (*ai_task_entity.Rest | |||
} | |||
if res.ErrorCode > 0 { | |||
log.Error("RestartNotebookJob err.jobId = %s ErrorCode = %d ErrorMsg = %s", jobId, res.ErrorCode, res.ErrorMsg) | |||
return nil, errors.New(fmt.Sprintf("RestartNotebookJob err[%d%s]", res.ErrorCode, res.ErrorMsg)) | |||
if entity.GrampusJobCanNotRestart.IsMatch(res.ErrorCode) { | |||
return nil, errors.New(entity.GrampusJobCanNotRestart.CodeTrCode) | |||
} | |||
return nil, errors.New(response.RESTART_FAILED.TrCode) | |||
} | |||
return convertToCreateNoteBookTaskResponse(res), nil | |||
} | |||
func convertToCreateNoteBookTaskResponse(res *models.GrampusNotebookRestartResponse) *ai_task_entity.RestartNoteBookTaskResponse { | |||
return &ai_task_entity.RestartNoteBookTaskResponse{ | |||
func convertToCreateNoteBookTaskResponse(res *models.GrampusNotebookRestartResponse) *entity.RestartNoteBookTaskResponse { | |||
return &entity.RestartNoteBookTaskResponse{ | |||
JobId: res.NewId, | |||
Status: res.Status, | |||
} | |||
@@ -159,7 +248,10 @@ func (c C2NetClusterAdapter) StopNoteBook(jobId string) error { | |||
return nil | |||
} | |||
func (c C2NetClusterAdapter) QueryNoteBook(jobId string) (*ai_task_entity.QueryTaskResponse, error) { | |||
func (c C2NetClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) { | |||
if jobId == "" { | |||
return nil, errors.New("jobID is empty") | |||
} | |||
result, err := grampus.GetNotebookJob(jobId) | |||
if err != nil { | |||
return nil, err | |||
@@ -167,19 +259,19 @@ func (c C2NetClusterAdapter) QueryNoteBook(jobId string) (*ai_task_entity.QueryT | |||
if result == nil { | |||
return nil, nil | |||
} | |||
return ai_task_entity.ConvertGrampusNotebookResponse(result.JobInfo), nil | |||
return entity.ConvertGrampusNotebookResponse(result.JobInfo), nil | |||
} | |||
func (c C2NetClusterAdapter) QueryNoteBookByJobName(jobName string) ([]*ai_task_entity.QueryTaskResponse, error) { | |||
func (c C2NetClusterAdapter) QueryNoteBookByJobName(jobName string) ([]*entity.QueryTaskResponse, error) { | |||
res, err := grampus.GetJobListByJobName(jobName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
result := make([]*ai_task_entity.QueryTaskResponse, 0) | |||
result := make([]*entity.QueryTaskResponse, 0) | |||
if res != nil { | |||
for i := 0; i < len(res.JobInfos); i++ { | |||
if res.JobInfos[i].Name == jobName { | |||
result = append(result, ai_task_entity.ConvertGrampusTrainResponse(res.JobInfos[i])) | |||
result = append(result, entity.ConvertGrampusTrainResponse(res.JobInfos[i])) | |||
} | |||
} | |||
@@ -187,7 +279,7 @@ func (c C2NetClusterAdapter) QueryNoteBookByJobName(jobName string) ([]*ai_task_ | |||
return result, nil | |||
} | |||
func (c C2NetClusterAdapter) GetNoteBookLog(jobId string) (*ai_task_entity.ClusterLog, error) { | |||
func (c C2NetClusterAdapter) GetNoteBookLog(jobId string) (*entity.ClusterLog, error) { | |||
return nil, nil | |||
} | |||
@@ -201,8 +293,46 @@ func (c C2NetClusterAdapter) GetNoteBookUrl(jobId string) (string, error) { | |||
} | |||
return res.Url + "?token=" + res.Token, nil | |||
} | |||
func (c C2NetClusterAdapter) GetNoteBookOperationProfile(jobId string) (*entity.OperationProfile, error) { | |||
if jobId == "" { | |||
log.Error("jobid is empty") | |||
return nil, errors.New("jobid is empty") | |||
} | |||
jobResult, err := grampus.GetDebugJobEvents(jobId) | |||
if err != nil { | |||
log.Error("GetDebugJobEvents failed:%v", err) | |||
return nil, err | |||
} | |||
r := parseC2NetEventsToOperationProfile(jobResult.NotebookEvents) | |||
getJobResult, err := grampus.GetJob(jobId) | |||
if err == nil && getJobResult != nil && getJobResult.ExitDiagnostics != "" { | |||
r.Events = append(r.Events, entity.ProfileEvent{ | |||
Message: getJobResult.ExitDiagnostics, | |||
Reason: "Exit", | |||
}) | |||
} | |||
return r, nil | |||
} | |||
func parseC2NetEventsToOperationProfile(notebookEvents []models.GrampusJobEvents) *entity.OperationProfile { | |||
events := make([]entity.ProfileEvent, 0) | |||
for i := 0; i < len(notebookEvents); i++ { | |||
e := notebookEvents[i] | |||
if e.Message == "" { | |||
continue | |||
} | |||
events = append(events, entity.ProfileEvent{ | |||
Message: e.Message, | |||
Reason: e.Reason, | |||
Name: e.Name, | |||
Timestamp: e.Timestamp, | |||
}) | |||
} | |||
return &entity.OperationProfile{Events: events} | |||
} | |||
func (c C2NetClusterAdapter) CreateTrainJob(req ai_task_entity.CreateTrainTaskRequest) (*ai_task_entity.CreateTrainTaskResponse, error) { | |||
func (c C2NetClusterAdapter) CreateTrainJob(req entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) { | |||
jobResult, err := grampus.CreateJob(convertTrainReq2Grampus(req)) | |||
if err != nil { | |||
log.Error("CreateNoteBook failed: %v", err.Error()) | |||
@@ -211,7 +341,7 @@ func (c C2NetClusterAdapter) CreateTrainJob(req ai_task_entity.CreateTrainTaskRe | |||
return convertGrampus2TrainRes(jobResult), nil | |||
} | |||
func convertTrainReq2Grampus(req ai_task_entity.CreateTrainTaskRequest) models.CreateGrampusJobRequest { | |||
func convertTrainReq2Grampus(req entity.CreateTrainTaskRequest) models.CreateGrampusJobRequest { | |||
command := generateGrampusTrainCommand(req) | |||
tasks := make([]models.GrampusTasks, len(req.Tasks)) | |||
@@ -223,7 +353,7 @@ func convertTrainReq2Grampus(req ai_task_entity.CreateTrainTaskRequest) models.C | |||
return models.CreateGrampusJobRequest{Name: req.Name, Tasks: tasks} | |||
} | |||
func generateGrampusTrainCommand(req ai_task_entity.CreateTrainTaskRequest) string { | |||
func generateGrampusTrainCommand(req entity.CreateTrainTaskRequest) string { | |||
var command string | |||
t := req.Tasks[0] | |||
computeResource := t.Spec.ComputeResource | |||
@@ -298,7 +428,7 @@ func getNpuModelObjectKey(jobName string) string { | |||
return setting.CodePathPrefix + jobName + RemoteModelPath + "/" + models.ModelSuffix | |||
} | |||
func convertTrainTask2Grampus(t ai_task_entity.TrainTask, command string) models.GrampusTasks { | |||
func convertTrainTask2Grampus(t entity.TrainTask, command string) models.GrampusTasks { | |||
return models.GrampusTasks{ | |||
Name: t.Name, | |||
ResourceSpecId: t.ResourceSpecId, | |||
@@ -315,9 +445,9 @@ func convertTrainTask2Grampus(t ai_task_entity.TrainTask, command string) models | |||
} | |||
} | |||
func convertGrampus2TrainRes(res *models.CreateGrampusJobResponse) *ai_task_entity.CreateTrainTaskResponse { | |||
func convertGrampus2TrainRes(res *models.CreateGrampusJobResponse) *entity.CreateTrainTaskResponse { | |||
jobInfo := res.JobInfo | |||
return &ai_task_entity.CreateTrainTaskResponse{ | |||
return &entity.CreateTrainTaskResponse{ | |||
StartedAt: jobInfo.StartedAt, | |||
RunSec: jobInfo.RunSec, | |||
CompletedAt: jobInfo.CompletedAt, | |||
@@ -337,13 +467,13 @@ func (c C2NetClusterAdapter) DeleteTrainJob(string) error { | |||
func (c C2NetClusterAdapter) StopTrainJob(string) error { | |||
return nil | |||
} | |||
func (c C2NetClusterAdapter) QueryTrainJob(string) (*ai_task_entity.QueryTaskResponse, error) { | |||
func (c C2NetClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c C2NetClusterAdapter) RestartTrainJob(string) (*ai_task_entity.CreateTrainTaskResponse, error) { | |||
func (c C2NetClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c C2NetClusterAdapter) GetTrainLog(jobId string) (*ai_task_entity.ClusterLog, error) { | |||
func (c C2NetClusterAdapter) GetTrainLog(jobId string) (*entity.ClusterLog, error) { | |||
return nil, nil | |||
} |
@@ -2,12 +2,14 @@ package cluster | |||
import "C" | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"encoding/json" | |||
"errors" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"errors" | |||
) | |||
type CloudbrainOneClusterAdapter struct { | |||
@@ -15,10 +17,10 @@ type CloudbrainOneClusterAdapter struct { | |||
func init() { | |||
//注册到一个Map | |||
AddCluster(ai_task_entity.OpenICloudbrainOne, new(CloudbrainOneClusterAdapter)) | |||
AddCluster(entity.OpenICloudbrainOne, new(CloudbrainOneClusterAdapter)) | |||
} | |||
func (c CloudbrainOneClusterAdapter) CreateNoteBook(req ai_task_entity.CreateNoteBookTaskRequest) (*ai_task_entity.CreateNoteBookTaskResponse, error) { | |||
func (c CloudbrainOneClusterAdapter) CreateNoteBook(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) { | |||
jobResult, err := cloudbrain.CreateJob(req.Name, convertNoteBookReq2CloudbrainOne(req)) | |||
if err != nil { | |||
log.Error("CreateNoteBook failed: %v", err.Error()) | |||
@@ -27,13 +29,17 @@ func (c CloudbrainOneClusterAdapter) CreateNoteBook(req ai_task_entity.CreateNot | |||
return convertCloudbrainOne2NoteBookRes(jobResult), nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetImages(req ai_task_entity.GetImageReq) ([]ai_task_entity.ClusterImage, bool, error) { | |||
func (c CloudbrainOneClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
return nil, true, nil | |||
} | |||
var SubTaskName = "task1" | |||
func convertNoteBookReq2CloudbrainOne(req ai_task_entity.CreateNoteBookTaskRequest) models.CreateJobParams { | |||
func convertNoteBookReq2CloudbrainOne(req entity.CreateNoteBookTaskRequest) models.CreateJobParams { | |||
var command = `pip3 install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;pip3 install -U "nbclassic>=0.2.8" -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --ServerApp.shutdown_no_activity_timeout=` + setting.CullIdleTimeout + ` --TerminalManager.cull_inactive_timeout=` + setting.CullIdleTimeout + ` --TerminalManager.cull_interval=` + setting.CullInterval + ` --MappingKernelManager.cull_idle_timeout=` + setting.CullIdleTimeout + ` --MappingKernelManager.cull_interval=` + setting.CullInterval + ` --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --ServerApp.token="" --LabApp.token="" --ServerApp.allow_origin="self https://cloudbrain.pcl.ac.cn" ` | |||
t := req.Tasks[0] | |||
@@ -58,11 +64,11 @@ func convertNoteBookReq2CloudbrainOne(req ai_task_entity.CreateNoteBookTaskReque | |||
UseNNI: false, | |||
}, | |||
}, | |||
Volumes: convertContainerDataArray2Volume(t.Code, t.Datasets, t.PreTrainModel), | |||
Volumes: convertContainerDataArray2Volume(t.Code, t.Datasets, t.PreTrainModel, t.OutPut), | |||
} | |||
} | |||
func convertContainerDataArray2Volume(containerDataArray ...[]ai_task_entity.ContainerData) []models.Volume { | |||
func convertContainerDataArray2Volume(containerDataArray ...[]entity.ContainerData) []models.Volume { | |||
r := make([]models.Volume, 0) | |||
for _, array := range containerDataArray { | |||
for _, d := range array { | |||
@@ -72,7 +78,7 @@ func convertContainerDataArray2Volume(containerDataArray ...[]ai_task_entity.Con | |||
return r | |||
} | |||
func convertContainerData2Volume(d ai_task_entity.ContainerData) models.Volume { | |||
func convertContainerData2Volume(d entity.ContainerData) models.Volume { | |||
return models.Volume{ | |||
HostPath: models.StHostPath{ | |||
Path: d.RealPath, | |||
@@ -82,15 +88,15 @@ func convertContainerData2Volume(d ai_task_entity.ContainerData) models.Volume { | |||
} | |||
} | |||
func convertCloudbrainOne2NoteBookRes(res *models.CreateJobResult) *ai_task_entity.CreateNoteBookTaskResponse { | |||
func convertCloudbrainOne2NoteBookRes(res *models.CreateJobResult) *entity.CreateNoteBookTaskResponse { | |||
playload := res.Payload | |||
return &ai_task_entity.CreateNoteBookTaskResponse{ | |||
return &entity.CreateNoteBookTaskResponse{ | |||
JobID: playload["jobId"].(string), | |||
Status: string(models.JobWaiting), | |||
} | |||
} | |||
func (c CloudbrainOneClusterAdapter) RestartNoteBook(string) (*ai_task_entity.RestartNoteBookTaskResponse, error) { | |||
func (c CloudbrainOneClusterAdapter) RestartNoteBook(string) (*entity.RestartNoteBookTaskResponse, error) { | |||
return nil, nil | |||
} | |||
@@ -99,10 +105,15 @@ func (c CloudbrainOneClusterAdapter) DeleteNoteBook(string) error { | |||
} | |||
func (c CloudbrainOneClusterAdapter) StopNoteBook(jobId string) error { | |||
err := cloudbrain.StopJob(jobId) | |||
if err != nil { | |||
log.Error("StopNoteBook(%s) failed:%v", jobId, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) QueryNoteBook(jobId string) (*ai_task_entity.QueryTaskResponse, error) { | |||
func (c CloudbrainOneClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) { | |||
if jobId == "" { | |||
log.Error("jobid is empty") | |||
return nil, errors.New("jobid is empty") | |||
@@ -112,15 +123,14 @@ func (c CloudbrainOneClusterAdapter) QueryNoteBook(jobId string) (*ai_task_entit | |||
log.Error("QueryNoteBook failed:%v", err) | |||
return nil, err | |||
} | |||
result, err := models.ConvertToJobResultPayload(jobResult.Payload) | |||
if err != nil { | |||
log.Error("ConvertToJobResultPayload failed:%v", err) | |||
return nil, err | |||
} | |||
return ai_task_entity.ConvertCloudbrainOneNotebookResponse(result), nil | |||
return entity.ConvertCloudbrainOneNotebookResponse(jobResult.Payload) | |||
} | |||
func (c CloudbrainOneClusterAdapter) QueryNoteBookByJobName(jobName string) ([]*ai_task_entity.QueryTaskResponse, error) { | |||
func (c CloudbrainOneClusterAdapter) QueryNoteBookByJobName(jobName string) ([]*entity.QueryTaskResponse, error) { | |||
jobResult, err := cloudbrain.GetJobListByName(jobName) | |||
if err != nil { | |||
log.Error("GetJobListByName failed:%v", err) | |||
@@ -131,23 +141,80 @@ func (c CloudbrainOneClusterAdapter) QueryNoteBookByJobName(jobName string) ([]* | |||
log.Error("ConvertToJobListResultPayload failed:%v", err) | |||
return nil, err | |||
} | |||
r := make([]*ai_task_entity.QueryTaskResponse, 0) | |||
r := make([]*entity.QueryTaskResponse, 0) | |||
for i := 0; i < len(result.Jobs); i++ { | |||
if result.Jobs[i].Name == jobName { | |||
r = append(r, ai_task_entity.ConvertCloudbrainOneQueryNotebookByNameResponse(result.Jobs[i])) | |||
r = append(r, entity.ConvertCloudbrainOneQueryNotebookByNameResponse(result.Jobs[i])) | |||
} | |||
} | |||
return r, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetNoteBookLog(jobId string) (*ai_task_entity.ClusterLog, error) { | |||
func (c CloudbrainOneClusterAdapter) GetNoteBookLog(jobId string) (*entity.ClusterLog, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetNoteBookUrl(jobId string) (string, error) { | |||
return "", nil | |||
return setting.DebugServerHost + "jpylab_" + jobId + "_" + models.SubTaskName, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) CreateTrainJob(ai_task_entity.CreateTrainTaskRequest) (*ai_task_entity.CreateTrainTaskResponse, error) { | |||
func (c CloudbrainOneClusterAdapter) GetNoteBookOperationProfile(jobId string) (*entity.OperationProfile, error) { | |||
if jobId == "" { | |||
log.Error("jobid is empty") | |||
return nil, errors.New("jobid is empty") | |||
} | |||
jobResult, err := cloudbrain.GetJob(jobId) | |||
if err != nil { | |||
log.Error("QueryNoteBook failed:%v", err) | |||
return nil, err | |||
} | |||
result, err := models.ConvertToJobResultPayload(jobResult.Payload) | |||
if err != nil { | |||
log.Error("ConvertToJobResultPayload failed:%v", err) | |||
return nil, err | |||
} | |||
return parseDiagnosticsToOperationProfile(result.JobStatus.AppExitDiagnostics), nil | |||
} | |||
func parseDiagnosticsToOperationProfile(appExitDiagnostics string) *entity.OperationProfile { | |||
if appExitDiagnostics == "" { | |||
return nil | |||
} | |||
diagnostics := entity.CloudbrainOneAppExitDiagnostics{} | |||
err := json.Unmarshal([]byte(appExitDiagnostics), &diagnostics) | |||
if err != nil { | |||
log.Error("json.Unmarshal appExitDiagnostics err.%v", err) | |||
return nil | |||
} | |||
events := make([]entity.ProfileEvent, 0) | |||
podEvents := diagnostics.PodEvents.Task10 | |||
for i := 0; i < len(podEvents); i++ { | |||
e := podEvents[i] | |||
if e.Message == "" { | |||
continue | |||
} | |||
events = append(events, entity.ProfileEvent{ | |||
Message: e.Message, | |||
Reason: e.Reason, | |||
Action: e.Action, | |||
}) | |||
} | |||
extras := diagnostics.Extras | |||
for i := 0; i < len(extras); i++ { | |||
e := extras[i] | |||
if e.Message == "" { | |||
continue | |||
} | |||
events = append(events, entity.ProfileEvent{ | |||
Message: e.Message, | |||
Reason: e.Reason, | |||
Action: e.Action, | |||
}) | |||
} | |||
return &entity.OperationProfile{Events: events} | |||
} | |||
func (c CloudbrainOneClusterAdapter) CreateTrainJob(entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) DeleteTrainJob(string) error { | |||
@@ -156,12 +223,12 @@ func (c CloudbrainOneClusterAdapter) DeleteTrainJob(string) error { | |||
func (c CloudbrainOneClusterAdapter) StopTrainJob(string) error { | |||
return nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) QueryTrainJob(string) (*ai_task_entity.QueryTaskResponse, error) { | |||
func (c CloudbrainOneClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) RestartTrainJob(string) (*ai_task_entity.CreateTrainTaskResponse, error) { | |||
func (c CloudbrainOneClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainOneClusterAdapter) GetTrainLog(string) (*ai_task_entity.ClusterLog, error) { | |||
func (c CloudbrainOneClusterAdapter) GetTrainLog(string) (*entity.ClusterLog, error) { | |||
return nil, nil | |||
} |
@@ -0,0 +1,297 @@ | |||
package cluster | |||
import "C" | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/manager/client/cloudbrain_two" | |||
"code.gitea.io/gitea/manager/client/cloudbrain_two_cd" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
) | |||
type CloudbrainTwoClusterAdapter struct { | |||
} | |||
func init() { | |||
AddCluster(entity.OpenICloudbrainTwo, new(CloudbrainTwoClusterAdapter)) | |||
} | |||
func (c CloudbrainTwoClusterAdapter) CreateNoteBook(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) { | |||
if poolInfos == nil { | |||
json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) | |||
} | |||
t := req.Tasks[0] | |||
var jobResult *models.CreateNotebookResult | |||
var err error | |||
if setting.ModelartsCD.Enabled { | |||
jobResult, err = cloudbrain_two_cd.CreateNotebook(models.CreateNotebookWithoutPoolParams{ | |||
JobName: req.Name, | |||
Description: req.Description, | |||
Flavor: t.Spec.SourceSpecId, | |||
Duration: t.AutoStopDuration, | |||
ImageID: t.ImageId, | |||
Feature: models.NotebookFeature, | |||
Volume: models.VolumeReq{ | |||
Capacity: setting.Capacity, | |||
Category: models.EVSCategory, | |||
Ownership: models.ManagedOwnership, | |||
}, | |||
WorkspaceID: "0", | |||
}) | |||
} else { | |||
jobResult, err = cloudbrain_two.CreateNotebook2(models.CreateNotebook2Params{ | |||
JobName: req.Name, | |||
Description: req.Description, | |||
Flavor: t.Spec.SourceSpecId, | |||
Duration: t.AutoStopDuration, | |||
ImageID: t.ImageId, | |||
PoolID: poolInfos.PoolInfo[0].PoolId, | |||
Feature: models.NotebookFeature, | |||
Volume: models.VolumeReq{ | |||
Capacity: setting.Capacity, | |||
Category: models.EVSCategory, | |||
Ownership: models.ManagedOwnership, | |||
}, | |||
WorkspaceID: "0", | |||
}) | |||
} | |||
if err != nil { | |||
log.Error("CreateNoteBook failed: %v", err.Error()) | |||
return nil, err | |||
} | |||
return convertCloudbrainTwo2NoteBookRes(jobResult), nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) CreateOnlineInfer(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) { | |||
return nil, nil | |||
} | |||
var cloudbrainTwoImages []entity.ClusterImage | |||
func (c CloudbrainTwoClusterAdapter) GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) { | |||
if cloudbrainTwoImages == nil || len(cloudbrainTwoImages) == 0 { | |||
images := setting.StImageInfos.ImageInfo | |||
cloudbrainTwoImages = make([]entity.ClusterImage, len(images)) | |||
for i := 0; i < len(images); i++ { | |||
cloudbrainTwoImages[i] = entity.ClusterImage{ | |||
ImageId: images[i].Id, | |||
ImageName: images[i].Value, | |||
} | |||
} | |||
} | |||
return cloudbrainTwoImages, false, nil | |||
} | |||
var poolInfos *models.PoolInfos | |||
func convertCloudbrainTwo2NoteBookRes(res *models.CreateNotebookResult) *entity.CreateNoteBookTaskResponse { | |||
return &entity.CreateNoteBookTaskResponse{ | |||
JobID: res.ID, | |||
Status: res.Status, | |||
} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) RestartNoteBook(jobId string) (*entity.RestartNoteBookTaskResponse, error) { | |||
param := models.NotebookAction{ | |||
Action: models.ActionStart, | |||
} | |||
task, err := models.GetNewestCloudbrainByJobId(jobId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
var res *models.NotebookActionResult | |||
if task.Type == models.TypeCloudBrainTwo { | |||
res, err = cloudbrain_two.ManageNotebook2(task.JobID, param) | |||
} else if task.Type == models.TypeCDCenter { | |||
res, err = cloudbrain_two_cd.ManageNotebook(task.JobID, param) | |||
} | |||
if err != nil { | |||
log.Error("ManageNotebook err.jobID=%s err=%v", jobId, err) | |||
return nil, err | |||
} | |||
return convertCloudbrainTwo2NoteBookRestartRes(jobId, res), nil | |||
} | |||
func convertCloudbrainTwo2NoteBookRestartRes(jobId string, res *models.NotebookActionResult) *entity.RestartNoteBookTaskResponse { | |||
return &entity.RestartNoteBookTaskResponse{ | |||
JobId: jobId, | |||
Status: res.Status, | |||
} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) DeleteNoteBook(jobId string) error { | |||
task, err := models.GetNewestCloudbrainByJobId(jobId) | |||
if err != nil { | |||
return err | |||
} | |||
if task.Type == models.TypeCloudBrainTwo { | |||
_, err = cloudbrain_two.DelNotebook2(task.JobID) | |||
} else if task.Type == models.TypeCDCenter { | |||
_, err = cloudbrain_two_cd.DelNotebook(task.JobID) | |||
} | |||
if err != nil { | |||
log.Error("DeleteNoteBook err.jobID=%s err=%v", jobId, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) StopNoteBook(jobId string) error { | |||
task, err := models.GetNewestCloudbrainByJobId(jobId) | |||
if err != nil { | |||
return err | |||
} | |||
param := models.NotebookAction{ | |||
Action: models.ActionStop, | |||
} | |||
if task.Type == models.TypeCloudBrainTwo { | |||
_, err = cloudbrain_two.ManageNotebook2(task.JobID, param) | |||
} else if task.Type == models.TypeCDCenter { | |||
_, err = cloudbrain_two_cd.ManageNotebook(task.JobID, param) | |||
} | |||
if err != nil { | |||
log.Error("StopNoteBook err.jobID=%s err=%v", jobId, err) | |||
return err | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) { | |||
task, err := models.GetNewestCloudbrainByJobId(jobId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
var result *models.GetNotebook2Result | |||
if task.Type == models.TypeCloudBrainTwo { | |||
result, err = cloudbrain_two.GetNotebook2(task.JobID) | |||
} else if task.Type == models.TypeCDCenter { | |||
result, err = cloudbrain_two_cd.GetNotebook(task.JobID) | |||
} | |||
if err != nil { | |||
log.Error("GetNotebook(%s) failed:%v", task.DisplayJobName, err) | |||
return nil, err | |||
} | |||
return convertCloudbrainTwo2QueryRes(result), nil | |||
} | |||
func convertCloudbrainTwo2QueryRes(res *models.GetNotebook2Result) *entity.QueryTaskResponse { | |||
startedAt := timeutil.TimeStamp(0) | |||
if res.Lease.UpdateTime > 0 { | |||
startedAt = timeutil.TimeStamp(res.Lease.UpdateTime / 1000) | |||
} | |||
completedAt := timeutil.TimeStamp(0) | |||
if models.IsCloudbrainTerminalStatus(res.Status) { | |||
completedAt = timeutil.TimeStampNow() | |||
} | |||
return &entity.QueryTaskResponse{ | |||
StartedAt: startedAt, | |||
CompletedAt: completedAt, | |||
JobId: res.ID, | |||
Status: res.Status, | |||
Url: res.Url, | |||
Token: res.Token, | |||
} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) QueryNoteBookByJobName(jobName string) ([]*entity.QueryTaskResponse, error) { | |||
result, err := cloudbrain_two.GetNotebookList(1000, 0, "createTime", "DESC", jobName) | |||
if err != nil { | |||
log.Error("QueryNoteBookByJobName failed:jobName=%s err=%v", jobName, err) | |||
return nil, err | |||
} | |||
r := make([]*entity.QueryTaskResponse, 0) | |||
for i := 0; i < len(result.NotebookList); i++ { | |||
if result.NotebookList[i].JobName == jobName { | |||
r = append(r, convertCloudbrainTwoQueryNotebookByNameResponse(result.NotebookList[i])) | |||
} | |||
} | |||
return r, nil | |||
} | |||
func convertCloudbrainTwoQueryNotebookByNameResponse(notebook models.NotebookList) *entity.QueryTaskResponse { | |||
return &entity.QueryTaskResponse{ | |||
StartedAt: timeutil.TimeStamp(notebook.Lease.CreateTime / 1000), | |||
Status: notebook.Status, | |||
JobId: notebook.JobID, | |||
} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetNoteBookLog(jobId string) (*entity.ClusterLog, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetNoteBookUrl(jobId string) (string, error) { | |||
res, err := c.QueryNoteBook(jobId) | |||
if err != nil { | |||
return "", err | |||
} | |||
return res.Url + "?token=" + res.Token, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetNoteBookOperationProfile(jobId string) (*entity.OperationProfile, error) { | |||
task, err := models.GetNewestCloudbrainByJobId(jobId) | |||
if err != nil { | |||
return nil, err | |||
} | |||
var result *models.GetNotebook2Result | |||
if task.Type == models.TypeCloudBrainTwo { | |||
result, err = cloudbrain_two.GetNotebook2(task.JobID) | |||
} else if task.Type == models.TypeCDCenter { | |||
result, err = cloudbrain_two_cd.GetNotebook(task.JobID) | |||
} | |||
if err != nil { | |||
log.Error("GetNotebook(%s) failed:%v", task.DisplayJobName, err) | |||
return nil, err | |||
} | |||
return parseCloudbrainTwoEventsToOperationProfile(result), nil | |||
} | |||
func parseCloudbrainTwoEventsToOperationProfile(result *models.GetNotebook2Result) *entity.OperationProfile { | |||
events := make([]entity.ProfileEvent, 0) | |||
if result.ActionProgress == nil || len(result.ActionProgress) == 0 { | |||
return nil | |||
} | |||
for i := 0; i < len(result.ActionProgress); i++ { | |||
e := result.ActionProgress[i] | |||
if e.Description == "" { | |||
continue | |||
} | |||
events = append(events, entity.ProfileEvent{ | |||
Message: e.Description, | |||
Reason: fmt.Sprint(e.Step), | |||
Name: e.Status, | |||
}) | |||
} | |||
return &entity.OperationProfile{Events: events} | |||
} | |||
func (c CloudbrainTwoClusterAdapter) CreateTrainJob(entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) DeleteTrainJob(string) error { | |||
return nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) StopTrainJob(string) error { | |||
return nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) QueryTrainJob(string) (*entity.QueryTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) RestartTrainJob(string) (*entity.CreateTrainTaskResponse, error) { | |||
return nil, nil | |||
} | |||
func (c CloudbrainTwoClusterAdapter) GetTrainLog(string) (*entity.ClusterLog, error) { | |||
return nil, nil | |||
} |
@@ -1,17 +1,18 @@ | |||
package cluster | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"errors" | |||
"code.gitea.io/gitea/entity" | |||
) | |||
var clusterMap = map[ai_task_entity.ClusterType]ClusterAdapter{} | |||
var clusterMap = map[entity.ClusterType]ClusterAdapter{} | |||
func AddCluster(t ai_task_entity.ClusterType, cluster ClusterAdapter) { | |||
func AddCluster(t entity.ClusterType, cluster ClusterAdapter) { | |||
clusterMap[t] = cluster | |||
} | |||
func GetCluster(t ai_task_entity.ClusterType) (ClusterAdapter, error) { | |||
func GetCluster(t entity.ClusterType) (ClusterAdapter, error) { | |||
if t == "" { | |||
return nil, errors.New("ClusterType is empty") | |||
} | |||
@@ -23,22 +24,25 @@ func GetCluster(t ai_task_entity.ClusterType) (ClusterAdapter, error) { | |||
} | |||
type ClusterAdapter interface { | |||
CreateNoteBook(req ai_task_entity.CreateNoteBookTaskRequest) (*ai_task_entity.CreateNoteBookTaskResponse, error) | |||
RestartNoteBook(jobId string) (*ai_task_entity.RestartNoteBookTaskResponse, error) | |||
CreateNoteBook(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) | |||
RestartNoteBook(jobId string) (*entity.RestartNoteBookTaskResponse, error) | |||
DeleteNoteBook(jobId string) error | |||
StopNoteBook(jobId string) error | |||
QueryNoteBook(jobId string) (*ai_task_entity.QueryTaskResponse, error) | |||
QueryNoteBookByJobName(jobName string) ([]*ai_task_entity.QueryTaskResponse, error) | |||
GetNoteBookLog(jobId string) (*ai_task_entity.ClusterLog, error) | |||
QueryNoteBook(jobId string) (*entity.QueryTaskResponse, error) | |||
QueryNoteBookByJobName(jobName string) ([]*entity.QueryTaskResponse, error) | |||
GetNoteBookLog(jobId string) (*entity.ClusterLog, error) | |||
GetNoteBookUrl(jobId string) (string, error) | |||
CreateTrainJob(req ai_task_entity.CreateTrainTaskRequest) (*ai_task_entity.CreateTrainTaskResponse, error) | |||
GetNoteBookOperationProfile(jobId string) (*entity.OperationProfile, error) | |||
CreateTrainJob(req entity.CreateTrainTaskRequest) (*entity.CreateTrainTaskResponse, error) | |||
DeleteTrainJob(jobId string) error | |||
StopTrainJob(string) error | |||
RestartTrainJob(jobId string) (*ai_task_entity.CreateTrainTaskResponse, error) | |||
QueryTrainJob(jobId string) (*ai_task_entity.QueryTaskResponse, error) | |||
GetTrainLog(jobId string) (*ai_task_entity.ClusterLog, error) | |||
RestartTrainJob(jobId string) (*entity.CreateTrainTaskResponse, error) | |||
QueryTrainJob(jobId string) (*entity.QueryTaskResponse, error) | |||
GetTrainLog(jobId string) (*entity.ClusterLog, error) | |||
//GetImages return available list of clusters | |||
//The second parameter will return true if image is no limit | |||
GetImages(req ai_task_entity.GetImageReq) ([]ai_task_entity.ClusterImage, bool, error) | |||
GetImages(req entity.GetImageReq) ([]entity.ClusterImage, bool, error) | |||
CreateOnlineInfer(req entity.CreateNoteBookTaskRequest) (*entity.CreateNoteBookTaskResponse, error) | |||
} |
@@ -0,0 +1,81 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"strings" | |||
) | |||
type CodeBuilder struct { | |||
Opts *entity.ContainerBuildOpts | |||
} | |||
func init() { | |||
o := &CodeBuilder{} | |||
RegisterContainerBuilder(o) | |||
} | |||
func (b *CodeBuilder) SetOpts(opts *entity.ContainerBuildOpts) { | |||
b.Opts = opts | |||
} | |||
func (b *CodeBuilder) GetContainerType() entity.ContainerDataType { | |||
return entity.ContainerCode | |||
} | |||
func (b *CodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { | |||
opts := b.Opts | |||
if opts.Disable { | |||
return nil, nil | |||
} | |||
storageTypes := opts.AcceptStorageType | |||
if storageTypes == nil || len(storageTypes) == 0 { | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
jobName := ctx.Request.JobName | |||
repo := ctx.Repository | |||
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" | |||
uploader := upload.SelectUploaderFromStorageType(storageTypes[0]) | |||
remoteDir := uploader.GetJobDefaultObjectKeyPrefix(jobName) + cloudbrain.CodeMountPath | |||
//再次调试和在线运行notebook不需要下载、上传代码 | |||
if !ctx.Request.IsRestartRequest && !ctx.Request.IsFileNoteBookRequest { | |||
if err := DownloadCode(ctx, codeLocalPath, b.Opts.NotArchive); err != nil { | |||
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) | |||
return nil, response.LOAD_CODE_FAILED | |||
} | |||
if err := uploader.UploadDir(codeLocalPath, remoteDir); err != nil { | |||
log.Error("Failed to UploadDir: %s (%v)", repo.FullName(), err) | |||
return nil, response.LOAD_CODE_FAILED | |||
} | |||
} | |||
codeArchiveName := "" | |||
//如果代码是压缩包形式,以默认分支命名压缩包(继承原有逻辑) | |||
if !b.Opts.NotArchive { | |||
codeArchiveName = cloudbrain.DefaultBranchName + ".zip" | |||
} | |||
containerPath := "" | |||
if opts.ContainerPath != "" { | |||
containerPath = opts.ContainerPath + "/" + codeArchiveName | |||
} | |||
objectKey := remoteDir + "/" + codeArchiveName | |||
codeData := entity.ContainerData{ | |||
Name: strings.ToLower(repo.Name), | |||
Bucket: uploader.GetBucket(), | |||
EndPoint: uploader.GetEndpoint(), | |||
ObjectKey: objectKey, | |||
ReadOnly: opts.ReadOnly, | |||
ContainerPath: containerPath, | |||
RealPath: uploader.GetRealPath(objectKey), | |||
} | |||
return []entity.ContainerData{codeData}, nil | |||
} |
@@ -0,0 +1,95 @@ | |||
package container_builder | |||
import ( | |||
"bufio" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/git" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"errors" | |||
"io" | |||
"io/ioutil" | |||
"os" | |||
"strings" | |||
) | |||
func DownloadCode(ctx *context.CreationContext, codeLocalPath string, notArchive bool) error { | |||
dir, err := ioutil.ReadDir(codeLocalPath) | |||
//ReqCommitID为空时需要下载最新的代码,把旧的删掉 | |||
if len(dir) != 0 && ctx.Request.ReqCommitID == "" { | |||
if err == nil { | |||
os.RemoveAll(codeLocalPath) | |||
} | |||
} | |||
var commitId string | |||
//目录为空时需要下载代码 | |||
if len(dir) == 0 { | |||
if notArchive { | |||
commitId, err = upload.DownloadCode(ctx.GitRepo, ctx.Repository, codeLocalPath, ctx.Request.BranchName) | |||
} else { | |||
commitId, err = upload.DownloadZipCode(ctx.GitRepo, codeLocalPath, ctx.Request.BranchName) | |||
} | |||
if err != nil { | |||
log.Error("downloadZipCode failed, server timed out: %s (%v)", ctx.Repository.FullName(), err) | |||
return errors.New("cloudbrain.load_code_failed") | |||
} | |||
} | |||
ctx.CommitID = commitId | |||
return nil | |||
} | |||
var obsUploader = &upload.OBSUploader{} | |||
var minioUploader = &upload.MinioUploader{} | |||
const CLONE_FILE_PREFIX = "file:///" | |||
func DownloadBranch(repo *models.Repository, codePath, branchName string) error { | |||
//add "file:///" prefix to make the depth valid | |||
if err := git.Clone(CLONE_FILE_PREFIX+repo.RepoPath(), codePath, git.CloneRepoOptions{Branch: branchName, Depth: 1}); err != nil { | |||
log.Error("Failed to clone repository: %s (%v)", repo.FullName(), err) | |||
return err | |||
} | |||
configFile, err := os.OpenFile(codePath+"/.git/config", os.O_RDWR, 0666) | |||
if err != nil { | |||
log.Error("open file(%s) failed:%v", codePath+"/,git/config", err) | |||
return err | |||
} | |||
defer configFile.Close() | |||
pos := int64(0) | |||
reader := bufio.NewReader(configFile) | |||
for { | |||
line, err := reader.ReadString('\n') | |||
if err != nil { | |||
if err == io.EOF { | |||
log.Error("not find the remote-url") | |||
return nil | |||
} else { | |||
log.Error("read error: %v", err) | |||
return err | |||
} | |||
} | |||
if strings.Contains(line, "url") && strings.Contains(line, ".git") { | |||
originUrl := "\turl = " + repo.CloneLink().HTTPS + "\n" | |||
if len(line) > len(originUrl) { | |||
originUrl += strings.Repeat(" ", len(line)-len(originUrl)) | |||
} | |||
bytes := []byte(originUrl) | |||
_, err := configFile.WriteAt(bytes, pos) | |||
if err != nil { | |||
log.Error("WriteAt failed:%v", err) | |||
return err | |||
} | |||
break | |||
} | |||
pos += int64(len(line)) | |||
} | |||
return nil | |||
} |
@@ -1,24 +1,52 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"strings" | |||
"fmt" | |||
"reflect" | |||
) | |||
type ContainerBuilder interface { | |||
Build(ctx *context.CreationContext) ([]ai_task_entity.ContainerData, error) | |||
GetContainerType() ai_task_entity.ContainerDataType | |||
Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) | |||
GetContainerType() entity.ContainerDataType | |||
SetOpts(opts *entity.ContainerBuildOpts) | |||
} | |||
type ContainerBuildOpts struct { | |||
ContainerPath string | |||
ReadOnly bool | |||
var containerBuilderMap = map[entity.ContainerDataType]reflect.Type{} | |||
func RegisterContainerBuilder(builder ContainerBuilder) { | |||
containerBuilderMap[builder.GetContainerType()] = reflect.TypeOf(builder) | |||
} | |||
func CreateContainerBuilder(containerType entity.ContainerDataType, opts *entity.ContainerBuildOpts) ContainerBuilder { | |||
defer func() { | |||
if err := recover(); err != nil { | |||
combinedErr := fmt.Errorf("%s\n%s", err, log.Stack(2)) | |||
log.Error("PANIC:%v", combinedErr) | |||
} | |||
}() | |||
t := containerBuilderMap[containerType] | |||
if t == nil { | |||
return nil | |||
} | |||
b := reflect.New(t.Elem()).Interface().(ContainerBuilder) | |||
//.Interface().(ContainerBuilder) | |||
//b.SetOpts(opts) | |||
b.SetOpts(opts) | |||
return b | |||
} | |||
func GetEndPoint() string { | |||
index := strings.Index(setting.Endpoint, "//") | |||
endpoint := setting.Endpoint[index+2:] | |||
return endpoint | |||
func BuildContainerDataChain(configMap map[entity.ContainerDataType]*entity.ContainerBuildOpts) *BuilderChain { | |||
c := NewBuilderChain() | |||
for k, v := range configMap { | |||
b := CreateContainerBuilder(k, v) | |||
if b == nil { | |||
continue | |||
} | |||
c.Next(b) | |||
} | |||
return c | |||
} |
@@ -1,6 +1,7 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
) | |||
@@ -17,8 +18,13 @@ func (c *BuilderChain) Next(b ContainerBuilder) *BuilderChain { | |||
return c | |||
} | |||
func (c *BuilderChain) Run(ctx *context.CreationContext) error { | |||
func (c *BuilderChain) Run(ctx *context.CreationContext) *response.BizError { | |||
for _, builder := range c.builderList { | |||
current := ctx.GetContainerDataArray(builder.GetContainerType()) | |||
//如果已经存在则不需要再构建 | |||
if current != nil && len(current) > 0 { | |||
continue | |||
} | |||
d, err := builder.Build(ctx) | |||
if err != nil { | |||
return err | |||
@@ -1,70 +1,93 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"errors" | |||
"strings" | |||
) | |||
type DatasetBuilder struct { | |||
Opts ContainerBuildOpts | |||
Opts *entity.ContainerBuildOpts | |||
} | |||
func (b DatasetBuilder) Build(ctx *context.CreationContext) ([]ai_task_entity.ContainerData, error) { | |||
func init() { | |||
o := &DatasetBuilder{} | |||
RegisterContainerBuilder(o) | |||
} | |||
func (b *DatasetBuilder) SetOpts(opts *entity.ContainerBuildOpts) { | |||
b.Opts = opts | |||
} | |||
func (b *DatasetBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { | |||
if b.Opts.Disable { | |||
return nil, nil | |||
} | |||
uuid := ctx.Request.DatasetUUIDStr | |||
if uuid == "" { | |||
return nil, nil | |||
} | |||
var attachSize int64 | |||
datasetInfos, _, err := models.GetDatasetInfo(uuid, ctx.Request.ComputeSource.Name) | |||
var datasetInfos map[string]models.DatasetInfo | |||
var datasetNames string | |||
var err error | |||
// models.GetDatasetInfo 是使用的以前的方法,所以此处按集群类型适配 | |||
if ctx.Request.Cluster == models.C2NetCluster { | |||
datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid, ctx.Request.ComputeSource.Name) | |||
} else { | |||
datasetInfos, datasetNames, err = models.GetDatasetInfo(uuid) | |||
} | |||
if err != nil { | |||
log.Error("GetDatasetInfo failed: %v", err) | |||
return nil, errors.New("cloudbrain.error.dataset_select") | |||
return nil, response.DATASET_SELECT_ERROR | |||
} | |||
uuidArray := strings.Split(uuid, ";") | |||
if datasetInfos == nil || len(datasetInfos) < len(uuidArray) { | |||
return nil, errors.New("cloudbrain.error.partial_datasets_not_available") | |||
} | |||
for _, infos := range datasetInfos { | |||
attachSize += infos.Size | |||
return nil, response.PARTIAL_DATASETS_NOT_AVAILABLE | |||
} | |||
if attachSize > int64(setting.DebugAttachSize*1000*1000*1000) { | |||
log.Error("The DatasetSize exceeds the limit (%dGB)", setting.DebugAttachSize) // GB | |||
return nil, errors.New("cloudbrain.error.debug_datasetsize") | |||
} | |||
var data []ai_task_entity.ContainerData | |||
obsEndPoint := GetEndPoint() | |||
var data []entity.ContainerData | |||
for _, datasetInfo := range datasetInfos { | |||
name := datasetInfo.FullName | |||
//如果不是压缩包,那么文件名是去掉后缀以后的数据集名称 | |||
if b.Opts.NotArchive { | |||
name = datasetInfo.Name | |||
} | |||
if datasetInfo.Type == models.TypeCloudBrainOne { | |||
data = append(data, ai_task_entity.ContainerData{ | |||
Name: datasetInfo.FullName, | |||
Bucket: setting.Attachment.Minio.Bucket, | |||
EndPoint: setting.Attachment.Minio.Endpoint, | |||
ObjectKey: datasetInfo.DataLocalPath, | |||
//如果返回的localPath已经带了实际路径的前缀,需要去除掉以后才是在minio上的objectKey | |||
objectKey := datasetInfo.DataLocalPath | |||
objectKey = strings.TrimPrefix(objectKey, setting.Attachment.Minio.RealPath) | |||
objectKey = strings.TrimPrefix(objectKey, setting.Attachment.Minio.Bucket) | |||
objectKey = strings.TrimPrefix(objectKey, "/") | |||
data = append(data, entity.ContainerData{ | |||
Name: name, | |||
Bucket: minioUploader.GetBucket(), | |||
EndPoint: minioUploader.GetEndpoint(), | |||
ObjectKey: objectKey, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ContainerPath: b.Opts.ContainerPath + "/" + datasetInfo.FullName, | |||
RealPath: setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + datasetInfo.DataLocalPath, | |||
ContainerPath: b.Opts.ContainerPath + "/" + name, | |||
RealPath: minioUploader.GetRealPath(objectKey), | |||
}) | |||
} else { | |||
data = append(data, ai_task_entity.ContainerData{ | |||
Name: datasetInfo.FullName, | |||
Bucket: setting.Bucket, | |||
EndPoint: obsEndPoint, | |||
ObjectKey: datasetInfo.DataLocalPath + datasetInfo.FullName, | |||
objectKey := datasetInfo.DataLocalPath + datasetInfo.FullName | |||
data = append(data, entity.ContainerData{ | |||
Name: name, | |||
Bucket: obsUploader.GetBucket(), | |||
EndPoint: obsUploader.GetEndpoint(), | |||
ObjectKey: objectKey, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ContainerPath: b.Opts.ContainerPath + "/" + datasetInfo.FullName, | |||
ContainerPath: b.Opts.ContainerPath + "/" + name, | |||
}) | |||
} | |||
} | |||
ctx.Request.DatasetNames = datasetNames | |||
return data, nil | |||
} | |||
func (b DatasetBuilder) GetContainerType() ai_task_entity.ContainerDataType { | |||
return ai_task_entity.ContainerDataset | |||
func (b *DatasetBuilder) GetContainerType() entity.ContainerDataType { | |||
return entity.ContainerDataset | |||
} |
@@ -0,0 +1,47 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
) | |||
type FileNoteBookCodeBuilder struct { | |||
Opts *entity.ContainerBuildOpts | |||
} | |||
func init() { | |||
o := &FileNoteBookCodeBuilder{} | |||
RegisterContainerBuilder(o) | |||
} | |||
func (b *FileNoteBookCodeBuilder) SetOpts(opts *entity.ContainerBuildOpts) { | |||
b.Opts = opts | |||
} | |||
func (b *FileNoteBookCodeBuilder) GetContainerType() entity.ContainerDataType { | |||
return entity.ContainerFileNoteBookCode | |||
} | |||
func (b *FileNoteBookCodeBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { | |||
if b.Opts.Disable { | |||
return nil, nil | |||
} | |||
repo := ctx.Request.FileRepository | |||
if repo == nil { | |||
return nil, nil | |||
} | |||
err := DownloadBranch(repo, getCodePath(ctx.Request.JobName, repo, ctx.Request.FileBranchName), ctx.Request.FileBranchName) | |||
if err != nil { | |||
log.Error("download code failed", err) | |||
return nil, response.LOAD_CODE_FAILED | |||
} | |||
return nil, nil | |||
} | |||
func getCodePath(jobName string, repo *models.Repository, branchName string) string { | |||
return setting.JobPath + jobName + "/code" + "/" + repo.OwnerName + "/" + repo.Name + "/" + branchName | |||
} |
@@ -1,59 +0,0 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"errors" | |||
"io/ioutil" | |||
"os" | |||
"strings" | |||
) | |||
type MinioCodeBuilder struct { | |||
Opts ContainerBuildOpts | |||
} | |||
func (b MinioCodeBuilder) GetContainerType() ai_task_entity.ContainerDataType { | |||
return ai_task_entity.ContainerCode | |||
} | |||
func (b MinioCodeBuilder) Build(ctx *context.CreationContext) ([]ai_task_entity.ContainerData, error) { | |||
opts := b.Opts | |||
var err error | |||
jobName := ctx.Request.JobName | |||
repo := ctx.Repository | |||
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" | |||
_, err = ioutil.ReadDir(codeLocalPath) | |||
if err == nil { | |||
os.RemoveAll(codeLocalPath) | |||
} | |||
commitId, err := upload.DownloadZipCode(ctx.GitRepo, codeLocalPath, ctx.Request.BranchName) | |||
if err != nil { | |||
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) | |||
return nil, errors.New("cloudbrain.load_code_failed") | |||
} | |||
if err := upload.UploadDirToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { | |||
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err) | |||
return nil, errors.New("cloudbrain.load_code_failed") | |||
} | |||
codeArchiveName := cloudbrain.DefaultBranchName + ".zip" | |||
codeData := ai_task_entity.ContainerData{ | |||
Name: strings.ToLower(repo.Name), | |||
Bucket: setting.Attachment.Minio.Bucket, | |||
EndPoint: setting.Attachment.Minio.Endpoint, | |||
ObjectKey: setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" + codeArchiveName, | |||
ReadOnly: opts.ReadOnly, | |||
ContainerPath: opts.ContainerPath + "/" + codeArchiveName, | |||
RealPath: storage.GetMinioPath(jobName, cloudbrain.CodeMountPath+"/"+codeArchiveName), | |||
} | |||
//todo 更好的方法? | |||
ctx.CommitID = commitId | |||
return []ai_task_entity.ContainerData{codeData}, nil | |||
} |
@@ -1,18 +0,0 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
) | |||
type ObsCodeBuilder struct { | |||
Opts ContainerBuildOpts | |||
} | |||
func (b ObsCodeBuilder) GetContainerType() ai_task_entity.ContainerDataType { | |||
return ai_task_entity.ContainerCode | |||
} | |||
func (b ObsCodeBuilder) Build(ctx *context.CreationContext) ([]ai_task_entity.ContainerData, error) { | |||
return nil, nil | |||
} |
@@ -1,21 +1,55 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
) | |||
type OutputPathBuilder struct { | |||
Opts ContainerBuildOpts | |||
Opts *entity.ContainerBuildOpts | |||
} | |||
func (b OutputPathBuilder) Build(*context.CreationContext) ([]ai_task_entity.ContainerData, error) { | |||
return []ai_task_entity.ContainerData{{ | |||
func init() { | |||
o := &OutputPathBuilder{} | |||
RegisterContainerBuilder(o) | |||
} | |||
func (b *OutputPathBuilder) SetOpts(opts *entity.ContainerBuildOpts) { | |||
b.Opts = opts | |||
} | |||
func (b *OutputPathBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { | |||
if b.Opts.Disable { | |||
return nil, nil | |||
} | |||
storageTypes := b.Opts.AcceptStorageType | |||
if storageTypes == nil || len(storageTypes) == 0 { | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
jobName := ctx.Request.JobName | |||
uploader := upload.SelectUploaderFromStorageType(storageTypes[0]) | |||
remoteDir := uploader.GetJobDefaultObjectKeyPrefix(jobName) + cloudbrain.ModelMountPath | |||
err := uploader.MKDIR(remoteDir) | |||
if err != nil { | |||
log.Error("MKDIR err.displayJobName = %s err=%v", ctx.Request.DisplayJobName, err) | |||
return nil, response.NewBizError(err) | |||
} | |||
return []entity.ContainerData{{ | |||
ContainerPath: b.Opts.ContainerPath, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ObjectKey: remoteDir, | |||
RealPath: uploader.GetRealPath(remoteDir), | |||
Bucket: uploader.GetBucket(), | |||
EndPoint: uploader.GetEndpoint(), | |||
}}, nil | |||
} | |||
func (b OutputPathBuilder) GetContainerType() ai_task_entity.ContainerDataType { | |||
return ai_task_entity.ContainerOutPutPath | |||
func (b *OutputPathBuilder) GetContainerType() entity.ContainerDataType { | |||
return entity.ContainerOutPutPath | |||
} |
@@ -1,59 +0,0 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"errors" | |||
"os" | |||
) | |||
type CloudbrainOneOutputReadmeBuilder struct { | |||
Opts ContainerBuildOpts | |||
} | |||
const README = "README" | |||
func (b CloudbrainOneOutputReadmeBuilder) Build(ctx *context.CreationContext) ([]ai_task_entity.ContainerData, error) { | |||
modelPath := setting.JobPath + ctx.Request.JobName + b.Opts.ContainerPath + "/" | |||
text := "You can put the files into this directory and download the files by the web page." | |||
err := os.MkdirAll(modelPath, os.ModePerm) | |||
if err != nil { | |||
log.Error("MkdirAll(%s) failed:%v", modelPath, err) | |||
return nil, err | |||
} | |||
fileName := modelPath + README | |||
f, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) | |||
if err != nil { | |||
log.Error("OpenFile failed", err.Error()) | |||
return nil, err | |||
} | |||
defer f.Close() | |||
_, err = f.WriteString(text) | |||
if err != nil { | |||
log.Error("WriteString failed", err.Error()) | |||
return nil, err | |||
} | |||
if err := upload.UploadDirToMinio(modelPath, ctx.Request.JobName, b.Opts.ContainerPath+"/"); err != nil { | |||
log.Error("Failed to UploadDirToMinio: %s (%v)", ctx.Request.JobName, err) | |||
return nil, errors.New("cloudbrain.load_code_failed") | |||
} | |||
return []ai_task_entity.ContainerData{{ | |||
Name: README, | |||
Bucket: setting.Attachment.Minio.Bucket, | |||
EndPoint: setting.Attachment.Minio.Endpoint, | |||
ObjectKey: setting.CBCodePathPrefix + ctx.Request.JobName + b.Opts.ContainerPath + "/" + README, | |||
ContainerPath: b.Opts.ContainerPath, | |||
ReadOnly: b.Opts.ReadOnly, | |||
}}, nil | |||
} | |||
func (b CloudbrainOneOutputReadmeBuilder) GetContainerType() ai_task_entity.ContainerDataType { | |||
return ai_task_entity.ContainerCloudbrainOneOutPutReadMe | |||
} |
@@ -1,66 +1,113 @@ | |||
package container_builder | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/routers/response" | |||
"fmt" | |||
"strings" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/ai_task_service/upload" | |||
"code.gitea.io/gitea/services/cloudbrain/cloudbrainTask" | |||
"errors" | |||
"strings" | |||
) | |||
type PretrainModelBuilder struct { | |||
Opts ContainerBuildOpts | |||
Opts *entity.ContainerBuildOpts | |||
} | |||
func (b PretrainModelBuilder) Build(ctx *context.CreationContext) ([]ai_task_entity.ContainerData, error) { | |||
form := ctx.Request | |||
func init() { | |||
o := &PretrainModelBuilder{} | |||
RegisterContainerBuilder(o) | |||
} | |||
if form.ModelName == "" { | |||
func (b *PretrainModelBuilder) SetOpts(opts *entity.ContainerBuildOpts) { | |||
b.Opts = opts | |||
} | |||
func (b *PretrainModelBuilder) Build(ctx *context.CreationContext) ([]entity.ContainerData, *response.BizError) { | |||
if b.Opts.Disable { | |||
return nil, nil | |||
} | |||
m, err := models.QueryModelByPath(form.PreTrainModelUrl) | |||
form := ctx.Request | |||
storageTypes := b.Opts.AcceptStorageType | |||
if storageTypes == nil || len(storageTypes) == 0 { | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
//未选择预训练模型,跳过此步 | |||
if form.PretrainModelName == "" { | |||
return nil, nil | |||
} | |||
if form.PretrainModelId == "" { | |||
//异常数据,理论上应该都有modelId | |||
return nil, response.RESULT_CLEARD | |||
} | |||
//查出模型数据 | |||
m, err := models.QueryModelById(form.PretrainModelId) | |||
if err != nil { | |||
log.Error("Can not find model", err) | |||
return nil, errors.New("repo.modelconvert.manage.model_not_exist") | |||
return nil, response.MODEL_NOT_EXISTS | |||
} | |||
if !cloudbrainTask.IsModelFileExists(m, form.CkptName) { | |||
log.Error("model file not exist.name = %s", form.CkptName) | |||
return nil, errors.New("repo.modelconvert.manage.model_file_not_exist") | |||
preTrainModelUrl := m.Path | |||
if err != nil { | |||
log.Error("Can not find model", err) | |||
return nil, response.MODEL_NOT_EXISTS | |||
} | |||
//模型文件存储方式 | |||
oldStorageType := entity.GetStorageTypeFromCloudbrainType(m.Type) | |||
if oldStorageType == "" { | |||
log.Error("model storage type error.modelId=%d", m.ID) | |||
return nil, response.SYSTEM_ERROR | |||
} | |||
preTrainModelPath := getPreTrainModelPath(form.PreTrainModelUrl, form.CkptName) | |||
var modelData ai_task_entity.ContainerData | |||
switch m.Type { | |||
case models.TypeCloudBrainOne: | |||
modelData = ai_task_entity.ContainerData{ | |||
Name: form.ModelName, | |||
Bucket: setting.Attachment.Minio.Bucket, | |||
EndPoint: setting.Attachment.Minio.Endpoint, | |||
ObjectKey: preTrainModelPath, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ContainerPath: b.Opts.ContainerPath + "/" + form.CkptName, | |||
RealPath: setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + preTrainModelPath, | |||
var preTrainModelPath string | |||
var preTrainModelEntity []entity.ContainerData | |||
storageType := oldStorageType | |||
ckptNames := strings.Split(form.PretrainModelCkptName, ";") | |||
for _, ckptName := range ckptNames { | |||
if !cloudbrainTask.IsModelFileExists(m, ckptName) { | |||
log.Error("model file not exist.name = %s", ckptName) | |||
return nil, response.MODEL_NOT_EXISTS | |||
} | |||
case models.TypeCloudBrainTwo: | |||
modelData = ai_task_entity.ContainerData{ | |||
Name: form.ModelName, | |||
Bucket: setting.Bucket, | |||
EndPoint: GetEndPoint(), | |||
ReadOnly: b.Opts.ReadOnly, | |||
preTrainModelPath = getPreTrainModelPath(preTrainModelUrl, ckptName) | |||
if !b.Opts.IsStorageTypeIn(oldStorageType) { | |||
//意味着模型之前存储的位置不符合要求,需要转存到指定存储 | |||
newStorageType := b.Opts.AcceptStorageType[0] | |||
//todo 可优化 | |||
if newStorageType == entity.MINIO && oldStorageType == entity.OBS { | |||
//复用以前代码 | |||
minioPreModelURL, err := dealModelInfo(form.PretrainModelId, form.JobName, ckptName) | |||
if err != nil { | |||
log.Error("Can not find model,modelId=%d err=%v", form.PretrainModelId, err) | |||
return nil, response.MODEL_NOT_EXISTS | |||
} | |||
preTrainModelUrl = minioPreModelURL | |||
preTrainModelPath = getPreTrainModelPath(minioPreModelURL, ckptName) | |||
storageType = entity.MINIO | |||
} | |||
} | |||
uploader := upload.SelectUploaderFromStorageType(storageType) | |||
modelData := entity.ContainerData{ | |||
Name: form.PretrainModelName, | |||
Bucket: uploader.GetBucket(), | |||
EndPoint: uploader.GetEndpoint(), | |||
ObjectKey: preTrainModelPath, | |||
ContainerPath: b.Opts.ContainerPath + "/" + form.CkptName, | |||
ReadOnly: b.Opts.ReadOnly, | |||
ContainerPath: b.Opts.ContainerPath + "/" + ckptName, | |||
RealPath: uploader.GetRealPath(preTrainModelPath), | |||
} | |||
preTrainModelEntity = append(preTrainModelEntity, modelData) | |||
} | |||
return []ai_task_entity.ContainerData{modelData}, nil | |||
form.PreTrainModelUrl = preTrainModelUrl | |||
return preTrainModelEntity, nil | |||
} | |||
func (b PretrainModelBuilder) GetContainerType() ai_task_entity.ContainerDataType { | |||
return ai_task_entity.ContainerPreTrainModel | |||
func (b *PretrainModelBuilder) GetContainerType() entity.ContainerDataType { | |||
return entity.ContainerPreTrainModel | |||
} | |||
func getPreTrainModelPath(pretrainModelDir string, fileName string) string { | |||
@@ -73,3 +120,40 @@ func getPreTrainModelPath(pretrainModelDir string, fileName string) string { | |||
} | |||
} | |||
func dealModelInfo(modelId string, jobName string, ckptName string) (string, error) { | |||
preModel, err := models.QueryModelById(modelId) | |||
if err != nil || preModel == nil || preModel.ID == "" { | |||
log.Error("Can not find model", err) | |||
return "", fmt.Errorf("Can not find model: %v", ckptName) | |||
} | |||
minioPreModelURL, err := downloadModelFromObs(preModel, jobName, cloudbrain.PretrainModelMountPath, ckptName) | |||
if err != nil { | |||
log.Error("Can not find model", err) | |||
return "", err | |||
} | |||
return minioPreModelURL, nil | |||
} | |||
func downloadModelFromObs(preModel *models.AiModelManage, jobName, suffixPath string, ckptFileName string) (string, error) { | |||
destPath := setting.CBCodePathPrefix + jobName + suffixPath + "/" | |||
destFile := destPath + ckptFileName | |||
returnStr := setting.Attachment.Minio.Bucket + "/" + destPath | |||
srcUrl := preModel.Path[len(setting.Bucket)+1:] + ckptFileName | |||
log.Info("dest model Path=" + returnStr + " src path=" + preModel.Path + ckptFileName) | |||
body, err := storage.ObsDownloadAFile(setting.Bucket, srcUrl) | |||
if err == nil { | |||
defer body.Close() | |||
_, err = storage.Attachments.UploadContent(setting.Attachment.Minio.Bucket, destFile, body) | |||
if err != nil { | |||
log.Error("UploadObject(%s) failed: %s", preModel.Path+ckptFileName, err.Error()) | |||
return "", err | |||
} | |||
} else { | |||
log.Info("download model failed. as " + err.Error()) | |||
return "", err | |||
} | |||
log.Info("download model from obs succeed") | |||
return returnStr, nil | |||
} |
@@ -1,48 +1,48 @@ | |||
package context | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/git" | |||
) | |||
type CreationContext struct { | |||
Request ai_task_entity.CreateReq | |||
ContainerData map[ai_task_entity.ContainerDataType][]ai_task_entity.ContainerData | |||
Request *entity.CreateReq | |||
ContainerData map[entity.ContainerDataType][]entity.ContainerData | |||
GitRepo *git.Repository | |||
Repository *models.Repository | |||
Spec *models.Specification | |||
User *models.User | |||
Datasets map[string]models.DatasetInfo | |||
CommitID string | |||
Response *ai_task_entity.CreationResponse | |||
Response *entity.CreationResponse | |||
SourceCloudbrain *models.Cloudbrain | |||
NewCloudbrain *models.Cloudbrain | |||
AITaskConfig entity.AITaskConfig | |||
} | |||
func (ctx *CreationContext) AddContainerData(t ai_task_entity.ContainerDataType, d []ai_task_entity.ContainerData) { | |||
func (ctx *CreationContext) AddContainerData(t entity.ContainerDataType, d []entity.ContainerData) { | |||
if ctx.ContainerData == nil { | |||
ctx.ContainerData = make(map[ai_task_entity.ContainerDataType][]ai_task_entity.ContainerData, 0) | |||
ctx.ContainerData = make(map[entity.ContainerDataType][]entity.ContainerData, 0) | |||
} | |||
ctx.ContainerData[t] = d | |||
} | |||
func (ctx *CreationContext) GetContainerDataArray(t ai_task_entity.ContainerDataType) []ai_task_entity.ContainerData { | |||
func (ctx *CreationContext) GetContainerDataArray(t entity.ContainerDataType) []entity.ContainerData { | |||
if ctx.ContainerData == nil { | |||
return nil | |||
} | |||
return ctx.ContainerData[t] | |||
} | |||
func (ctx *CreationContext) GetContainerData(t ai_task_entity.ContainerDataType) ai_task_entity.ContainerData { | |||
func (ctx *CreationContext) GetContainerData(t entity.ContainerDataType) entity.ContainerData { | |||
a := ctx.GetContainerDataArray(t) | |||
if a == nil || len(a) == 0 { | |||
return ai_task_entity.ContainerData{} | |||
return entity.ContainerData{} | |||
} | |||
return a[0] | |||
} | |||
func (ctx *CreationContext) WriteResponse(t ai_task_entity.ContainerDataType) ai_task_entity.ContainerData { | |||
func (ctx *CreationContext) WriteResponse(t entity.ContainerDataType) entity.ContainerData { | |||
a := ctx.GetContainerDataArray(t) | |||
if a == nil || len(a) == 0 { | |||
return ai_task_entity.ContainerData{} | |||
return entity.ContainerData{} | |||
} | |||
return a[0] | |||
} |
@@ -2,6 +2,18 @@ package schedule | |||
import ( | |||
"bytes" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
"os/exec" | |||
"path" | |||
"strings" | |||
"time" | |||
"code.gitea.io/gitea/modules/modelarts" | |||
"code.gitea.io/gitea/modules/obs" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/grampus" | |||
"code.gitea.io/gitea/modules/labelmsg" | |||
@@ -11,14 +23,7 @@ import ( | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/storage" | |||
"code.gitea.io/gitea/modules/util" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
"github.com/minio/minio-go" | |||
"os/exec" | |||
"path" | |||
"strings" | |||
"time" | |||
) | |||
const NPUModelDefaultName = "models.zip" | |||
@@ -205,7 +210,33 @@ func LocalMigrateOperate(jobName, computeSource string, r *models.ModelMigrateRe | |||
} | |||
if computeSource == models.NPUResource { | |||
//因为NPU的输出会被压缩,因此需要解压+移桶 | |||
decompress(r.DestBucket+"/"+r.DestObjectKey, setting.Bucket+"/"+strings.TrimSuffix(r.DestObjectKey, models.ModelSuffix)) | |||
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { | |||
log.Error("Failed to obsMkdir_output: %s (%v)", jobName, err) | |||
return err | |||
} | |||
log.Info("DestObjectKey", r.DestObjectKey) | |||
if strings.Contains(r.DestObjectKey, ".") { | |||
decompress(r.DestBucket+"/"+r.DestObjectKey, setting.Bucket+"/"+strings.TrimSuffix(r.DestObjectKey, models.ModelSuffix)) | |||
} else { //如果是文件夹,遍历文件 | |||
fileInfos, err := storage.GetOneLevelObjectsUnderDir(r.DestBucket, "", r.DestObjectKey) | |||
if err != nil { | |||
log.Error("UpdateModelMigrateStatusByStep err. r.ID=%d step=%d err=%v", r.ID, models.BucketMoveFailed, err) | |||
return err | |||
} | |||
for _, fileInfo := range fileInfos { | |||
log.Info("decompress file:", fileInfo.FileName) | |||
sourceFilPath := r.DestBucket + "/" + r.DestObjectKey + fileInfo.FileName | |||
if !strings.HasSuffix(r.DestObjectKey, "/") { | |||
sourceFilPath = r.DestBucket + "/" + r.DestObjectKey + "/" + fileInfo.FileName | |||
} | |||
decompress(sourceFilPath, setting.Bucket+"/"+strings.TrimSuffix(r.DestObjectKey, models.ModelSuffix)) | |||
} | |||
} | |||
} else { | |||
//因为调度无法指定桶,所以调度成功后我们还需要移桶 | |||
if setting.UseLocalMinioMigrate { | |||
@@ -233,6 +264,19 @@ func LocalMigrateOperate(jobName, computeSource string, r *models.ModelMigrateRe | |||
return nil | |||
} | |||
func obsMkdir(dir string) error { | |||
input := &obs.PutObjectInput{} | |||
input.Bucket = setting.Bucket | |||
input.Key = dir | |||
_, err := storage.ObsCli.PutObject(input) | |||
if err != nil { | |||
log.Error("PutObject(%s) failed: %s", input.Key, err.Error()) | |||
return err | |||
} | |||
return nil | |||
} | |||
func TryToUpdateNPUMoveBucketResult(record *models.ModelMigrateRecord, jobName, versionName string) error { | |||
if IsNPUModelDirHasFile(jobName, versionName) { | |||
if err := models.UpdateModelMigrateStatusByStep(record, models.BucketMoveSuccess); err != nil { | |||
@@ -1,14 +1,13 @@ | |||
package task | |||
import ( | |||
"code.gitea.io/gitea/entity/ai_task_entity" | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/cluster" | |||
"code.gitea.io/gitea/services/ai_task_service/container_builder" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"strconv" | |||
@@ -16,106 +15,139 @@ import ( | |||
) | |||
type CloudbrainOneNotebookTaskTemplate struct { | |||
DefaultCreationHandler | |||
DefaultAITaskTemplate | |||
} | |||
func init() { | |||
t := &CloudbrainOneNotebookTaskTemplate{ | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: ai_task_entity.OpenICloudbrainOne, | |||
ClusterType: entity.OpenICloudbrainOne, | |||
JobType: models.JobTypeDebug, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeDebug, ai_task_entity.OpenICloudbrainOne, t) | |||
RegisterTask(models.JobTypeDebug, entity.OpenICloudbrainOne, t) | |||
} | |||
func (g CloudbrainOneNotebookTaskTemplate) MyClusterType() ai_task_entity.ClusterType { | |||
return "" | |||
} | |||
func (t CloudbrainOneNotebookTaskTemplate) Create(ctx *context.CreationContext) (*ai_task_entity.CreateTaskRes, *response.BizError) { | |||
func (t CloudbrainOneNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasetSize). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckBranchExists). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNext(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create CloudbrainOneNotebookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &ai_task_entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
func (g CloudbrainOneNotebookTaskTemplate) Restart(*context.CreationContext) (*ai_task_entity.CreateTaskRes, *response.BizError) { | |||
return nil, nil | |||
} | |||
func (g CloudbrainOneNotebookTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { | |||
c, err := cluster.GetCluster(ai_task_entity.OpenICloudbrainOne) | |||
if err != nil { | |||
return response.SYSTEM_ERROR | |||
func (g CloudbrainOneNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig { | |||
if opts.IsFileNoteBookRequest { | |||
return entity.AITaskConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerFileNoteBookCode: {}, | |||
entity.ContainerCode: { | |||
ContainerPath: "/code", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
NotArchive: true, | |||
}, | |||
}, | |||
} | |||
} | |||
form := ctx.Request | |||
req := ai_task_entity.CreateNoteBookTaskRequest{ | |||
Name: form.JobName, | |||
Tasks: []ai_task_entity.NoteBookTask{ | |||
{ | |||
Name: form.JobName, | |||
ResourceSpecId: ctx.Spec.SourceSpecId, | |||
ImageId: form.ImageID, | |||
ImageUrl: strings.TrimSpace(form.ImageUrl), | |||
Datasets: ctx.GetContainerDataArray(ai_task_entity.ContainerDataset), | |||
Code: ctx.GetContainerDataArray(ai_task_entity.ContainerCode), | |||
PreTrainModel: ctx.GetContainerDataArray(ai_task_entity.ContainerPreTrainModel), | |||
AutoStopDuration: autoStopDurationMs, | |||
Capacity: setting.Capacity, | |||
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), | |||
Spec: ctx.Spec, | |||
return entity.AITaskConfig{ | |||
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
ContainerPath: "/code", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
NotArchive: true, | |||
}, | |||
entity.ContainerDataset: { | |||
ContainerPath: "/dataset", | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
NotArchive: true, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
ContainerPath: "/pretrainmodel", | |||
ReadOnly: true, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
}, | |||
entity.ContainerOutPutPath: { | |||
ContainerPath: "/model", | |||
ReadOnly: false, | |||
AcceptStorageType: []entity.StorageType{entity.MINIO}, | |||
}, | |||
}, | |||
} | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.CreateNoteBook(req) | |||
} | |||
func (t CloudbrainOneNotebookTaskTemplate) Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.BuildRequest4Restart). | |||
Next(t.CheckOutput4Restart). | |||
Next(t.CheckModel). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.BuildContainerData). | |||
Next(t.CallRestartAPI). | |||
Next(t.CreateCloudbrainRecord4Restart). | |||
Next(t.NotifyCreation). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("CloudbrainOneNotebookTask CreateNoteBook err.req=%+v err=%v", req, err) | |||
ctx.Response = &ai_task_entity.CreationResponse{ | |||
Error: err, | |||
} | |||
return nil | |||
} else { | |||
ctx.Response = &ai_task_entity.CreationResponse{ | |||
JobID: res.JobID, | |||
Status: res.Status, | |||
CreateTime: createTime, | |||
} | |||
log.Error("Restart GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
if err != nil { | |||
log.Error("Restart GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID, Status: ctx.NewCloudbrain.Status}, nil | |||
return nil | |||
} | |||
func (g CloudbrainOneNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *response.BizError { | |||
c, err := cluster.GetCluster(ai_task_entity.OpenICloudbrainOne) | |||
func (c CloudbrainOneNotebookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{ | |||
ComputeSource: ctx.Request.ComputeSource.Name, | |||
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest, | |||
}).ContainerSteps).Run(ctx) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func (g CloudbrainOneNotebookTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
return response.SYSTEM_ERROR | |||
} | |||
form := ctx.Request | |||
req := ai_task_entity.CreateNoteBookTaskRequest{ | |||
req := entity.CreateNoteBookTaskRequest{ | |||
Name: form.JobName, | |||
Tasks: []ai_task_entity.NoteBookTask{ | |||
Tasks: []entity.NoteBookTask{ | |||
{ | |||
Name: form.JobName, | |||
ResourceSpecId: ctx.Spec.SourceSpecId, | |||
ImageId: form.ImageID, | |||
ImageUrl: strings.TrimSpace(form.ImageUrl), | |||
Datasets: ctx.GetContainerDataArray(ai_task_entity.ContainerDataset), | |||
Code: ctx.GetContainerDataArray(ai_task_entity.ContainerCode), | |||
PreTrainModel: ctx.GetContainerDataArray(ai_task_entity.ContainerPreTrainModel), | |||
Datasets: ctx.GetContainerDataArray(entity.ContainerDataset), | |||
Code: ctx.GetContainerDataArray(entity.ContainerCode), | |||
PreTrainModel: ctx.GetContainerDataArray(entity.ContainerPreTrainModel), | |||
OutPut: ctx.GetContainerDataArray(entity.ContainerOutPutPath), | |||
AutoStopDuration: autoStopDurationMs, | |||
Capacity: setting.Capacity, | |||
CenterID: ctx.Spec.GetAvailableCenterIds(ctx.User.ID, form.JobType), | |||
@@ -129,33 +161,19 @@ func (g CloudbrainOneNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationC | |||
log.Error("CloudbrainOneNotebookTask CreateNoteBook err.req=%+v err=%v", req, err) | |||
return response.NewBizError(err) | |||
} | |||
ctx.Response = &ai_task_entity.CreationResponse{ | |||
ctx.Response = &entity.CreationResponse{ | |||
JobID: res.JobID, | |||
Status: res.Status, | |||
CreateTime: createTime, | |||
} | |||
return nil | |||
} | |||
func (CloudbrainOneNotebookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
err := container_builder.NewBuilderChain(). | |||
Next(container_builder.ObsCodeBuilder{Opts: container_builder.ContainerBuildOpts{ | |||
ContainerPath: "/code", | |||
ReadOnly: false, | |||
}}). | |||
Next(container_builder.DatasetBuilder{Opts: container_builder.ContainerBuildOpts{ | |||
ContainerPath: "/dataset", | |||
ReadOnly: true, | |||
}}). | |||
Next(container_builder.PretrainModelBuilder{Opts: container_builder.ContainerBuildOpts{ | |||
ContainerPath: "/pretrainmodel", | |||
ReadOnly: false, | |||
}}). | |||
Run(ctx) | |||
if err != nil { | |||
return response.NewBizError(err) | |||
} | |||
return nil | |||
func (g CloudbrainOneNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *response.BizError { | |||
//云脑一没有再次调试接口,通过使用同样的参数新建接口来模拟 | |||
return g.CallCreationAPI(ctx) | |||
} | |||
func (CloudbrainOneNotebookTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
@@ -0,0 +1,217 @@ | |||
package task | |||
import ( | |||
"code.gitea.io/gitea/entity" | |||
"code.gitea.io/gitea/models" | |||
"code.gitea.io/gitea/modules/convert" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/setting" | |||
api "code.gitea.io/gitea/modules/structs" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/ai_task_service/container_builder" | |||
"code.gitea.io/gitea/services/ai_task_service/context" | |||
"code.gitea.io/gitea/services/cloudbrain/resource" | |||
"strconv" | |||
"strings" | |||
) | |||
type CloudbrainTwoNotebookTaskTemplate struct { | |||
DefaultAITaskTemplate | |||
} | |||
func init() { | |||
t := &CloudbrainTwoNotebookTaskTemplate{ | |||
DefaultAITaskTemplate: DefaultAITaskTemplate{ | |||
ClusterType: entity.OpenICloudbrainTwo, | |||
JobType: models.JobTypeDebug, | |||
}, | |||
} | |||
RegisterTask(models.JobTypeDebug, entity.OpenICloudbrainTwo, t) | |||
} | |||
func (t CloudbrainTwoNotebookTaskTemplate) Create(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.CheckDisplayJobName). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CheckDatasetSize). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckBranchExists). | |||
Next(t.InsertCloudbrainRecord4Async). | |||
AsyncNextWithErrFun(t.BuildContainerData, t.CallCreationAPI, t.AfterCallCreationAPI4Async, t.NotifyCreation, t.HandleErr4Async). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("create CloudbrainOneNotebookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID}, nil | |||
} | |||
func (g CloudbrainTwoNotebookTaskTemplate) GetConfig(opts entity.GetAITaskConfigOpts) entity.AITaskConfig { | |||
if opts.IsFileNoteBookRequest { | |||
return entity.AITaskConfig{ | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerFileNoteBookCode: {}, | |||
}, | |||
} | |||
} | |||
return entity.AITaskConfig{ | |||
DatasetMaxSize: setting.DebugAttachSize * 1000 * 1000 * 1000, | |||
ContainerSteps: map[entity.ContainerDataType]*entity.ContainerBuildOpts{ | |||
entity.ContainerCode: { | |||
Disable: true, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
}, | |||
entity.ContainerDataset: { | |||
Disable: true, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
}, | |||
entity.ContainerPreTrainModel: { | |||
Disable: true, | |||
AcceptStorageType: []entity.StorageType{entity.OBS}, | |||
}, | |||
}, | |||
} | |||
} | |||
func (t CloudbrainTwoNotebookTaskTemplate) Restart(ctx *context.CreationContext) (*entity.CreateTaskRes, *response.BizError) { | |||
c := &CreateOperator{} | |||
err := c.Next(t.BuildRequest4Restart). | |||
Next(t.CheckOutput4Restart). | |||
Next(t.CheckModel). | |||
Next(t.CheckDatasetExists). | |||
Next(t.CheckIsCleared). | |||
Next(t.CheckParam). | |||
Next(t.CheckMulti). | |||
Next(t.LoadSpec). | |||
Next(t.CheckPointBalance). | |||
Next(t.CallRestartAPI). | |||
Next(t.CreateCloudbrainRecord4Restart). | |||
Next(t.NotifyCreation). | |||
Operate(ctx) | |||
if err != nil { | |||
log.Error("Restart GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
if err != nil { | |||
log.Error("Restart GrampusNoteBookTask err.%v", err) | |||
return nil, err | |||
} | |||
return &entity.CreateTaskRes{ID: ctx.NewCloudbrain.ID, Status: ctx.NewCloudbrain.Status}, nil | |||
} | |||
func (g CloudbrainTwoNotebookTaskTemplate) CallCreationAPI(ctx *context.CreationContext) *response.BizError { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
return response.SYSTEM_ERROR | |||
} | |||
form := ctx.Request | |||
req := entity.CreateNoteBookTaskRequest{ | |||
Name: form.JobName, | |||
Description: form.Description, | |||
Tasks: []entity.NoteBookTask{ | |||
{ | |||
Name: form.JobName, | |||
ResourceSpecId: ctx.Spec.SourceSpecId, | |||
ImageId: form.ImageID, | |||
ImageUrl: strings.TrimSpace(form.ImageUrl), | |||
AutoStopDuration: autoStopDurationMs, | |||
Spec: ctx.Spec, | |||
}, | |||
}, | |||
} | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.CreateNoteBook(req) | |||
if err != nil { | |||
log.Error("CloudbrainTwoNotebookTaskTemplate CreateNoteBook err.req=%+v err=%v", req, err) | |||
return response.NewBizError(err) | |||
} | |||
ctx.Response = &entity.CreationResponse{ | |||
JobID: res.JobID, | |||
Status: res.Status, | |||
CreateTime: createTime, | |||
} | |||
return nil | |||
} | |||
func (g CloudbrainTwoNotebookTaskTemplate) CallRestartAPI(ctx *context.CreationContext) *response.BizError { | |||
c := g.GetMyCluster() | |||
if c == nil { | |||
log.Error("Get cluster failed") | |||
return response.SYSTEM_ERROR | |||
} | |||
createTime := timeutil.TimeStampNow() | |||
res, err := c.RestartNoteBook(ctx.SourceCloudbrain.JobID) | |||
if err != nil { | |||
log.Error("CloudbrainTwoNotebookTaskTemplate RestartNoteBook err.Cloudbrain.JobID=%s err=%v", ctx.SourceCloudbrain.JobID, err) | |||
return response.NewBizError(err) | |||
} | |||
if res.JobId == "" { | |||
log.Error("CloudbrainTwoNotebookTaskTemplate RestartNoteBook failed.Cloudbrain.JobID=%s", ctx.SourceCloudbrain.JobID) | |||
return response.RESTART_FAILED | |||
} | |||
ctx.Response = &entity.CreationResponse{ | |||
JobID: res.JobId, | |||
Status: res.Status, | |||
CreateTime: createTime, | |||
} | |||
return nil | |||
} | |||
func (c CloudbrainTwoNotebookTaskTemplate) BuildContainerData(ctx *context.CreationContext) *response.BizError { | |||
err := container_builder.BuildContainerDataChain(c.GetConfig(entity.GetAITaskConfigOpts{ | |||
ComputeSource: ctx.Request.ComputeSource.Name, | |||
IsFileNoteBookRequest: ctx.Request.IsFileNoteBookRequest, | |||
}).ContainerSteps).Run(ctx) | |||
if err != nil { | |||
return err | |||
} | |||
return nil | |||
} | |||
func (CloudbrainTwoNotebookTaskTemplate) NotifyCreation(ctx *context.CreationContext) *response.BizError { | |||
req := ctx.Request | |||
jobID := ctx.Response.JobID | |||
task, err := models.GetCloudbrainByJobID(jobID) | |||
if err != nil { | |||
log.Error("GetCloudbrainByJobID failed: %v", err.Error()) | |||
return response.NewBizError(err) | |||
} | |||
stringId := strconv.FormatInt(task.ID, 10) | |||
notification.NotifyOtherTask(ctx.User, ctx.Repository, stringId, req.DisplayJobName, models.ActionCreateDebugNPUTask) | |||
return nil | |||
} | |||
func (g CloudbrainTwoNotebookTaskTemplate) GetSpecs(userId int64, computeSource models.ComputeSource) ([]*api.SpecificationShow, *response.BizError) { | |||
var aiCenterCode = models.AICenterOfCloudBrainTwo | |||
if setting.ModelartsCD.Enabled { | |||
aiCenterCode = models.AICenterOfChengdu | |||
} | |||
var specs []*models.Specification | |||
var err error | |||
specs, err = resource.FindAvailableSpecs(userId, models.FindSpecsOptions{ | |||
JobType: g.JobType, | |||
ComputeResource: computeSource.Name, | |||
Cluster: g.ClusterType.GetParentCluster(), | |||
AiCenterCode: aiCenterCode, | |||
}) | |||
if err != nil { | |||
log.Error("GetSpecs err.%v", err) | |||
return nil, response.SPEC_NOT_AVAILABLE | |||
} | |||
r := make([]*api.SpecificationShow, len(specs)) | |||
for i, v := range specs { | |||
r[i] = convert.ToSpecification(v) | |||
} | |||
return r, nil | |||
} |
Dear OpenI User
Thank you for your continuous support to the Openl Qizhi Community AI Collaboration Platform. In order to protect your usage rights and ensure network security, we updated the Openl Qizhi Community AI Collaboration Platform Usage Agreement in January 2024. The updated agreement specifies that users are prohibited from using intranet penetration tools. After you click "Agree and continue", you can continue to use our services. Thank you for your cooperation and understanding.
For more agreement content, please refer to the《Openl Qizhi Community AI Collaboration Platform Usage Agreement》