#2841 fix-2833 【云脑任务】GPU推理和评测任务日志页签优化

Merged
liuzx merged 19 commits from fix-2833 into V20220908 1 year ago
  1. +3
    -3
      modules/cloudbrain/cloudbrain.go
  2. +139
    -30
      routers/api/v1/repo/cloudbrain.go
  3. +5
    -3
      routers/repo/cloudbrain.go
  4. +39
    -15
      templates/repo/cloudbrain/benchmark/show.tmpl
  5. +42
    -4
      templates/repo/cloudbrain/inference/show.tmpl

+ 3
- 3
modules/cloudbrain/cloudbrain.go View File

@@ -20,7 +20,7 @@ import (
const (
//Command = `pip3 install jupyterlab==2.2.5 -i https://pypi.tuna.tsinghua.edu.cn/simple;service ssh stop;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir="/code" --port=80 --LabApp.token="" --LabApp.allow_origin="self https://cloudbrain.pcl.ac.cn"`
//CommandBenchmark = `echo "start benchmark";python /code/test.py;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh;echo "end benchmark"`
CommandBenchmark = `echo "start benchmark";cd /benchmark && bash run_bk.sh | tee /model/benchmark-log.txt;echo "end benchmark"`
CodeMountPath = "/code"
DataSetMountPath = "/dataset"
ModelMountPath = "/model"
@@ -30,8 +30,8 @@ const (
Snn4imagenetMountPath = "/snn4imagenet"
BrainScoreMountPath = "/brainscore"
TaskInfoName = "/taskInfo"
Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s'`
BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s'`
Snn4imagenetCommand = `/opt/conda/bin/python /snn4imagenet/testSNN_script.py --modelname '%s' --modelpath '/dataset' --modeldescription '%s' | tee /model/benchmark-log.txt`
BrainScoreCommand = `bash /brainscore/brainscore_test_par4shSrcipt.sh -b '%s' -n '%s' -p '/dataset' -d '%s' | tee /model/benchmark-log.txt`

SubTaskName = "task1"



+ 139
- 30
routers/api/v1/repo/cloudbrain.go View File

@@ -405,52 +405,159 @@ func CloudbrainDownloadLogFile(ctx *context.Context) {

func CloudbrainGetLog(ctx *context.Context) {
ID := ctx.Params(":id")
startLine := ctx.QueryInt("base_line")
lines := ctx.QueryInt("lines")
endLine := startLine + lines
order := ctx.Query("order")
if order == "asc" {
endLine = startLine
startLine = endLine - lines
if startLine < 0 {
startLine = 0
}
}
job, err := models.GetCloudbrainByID(ID)
if err != nil {
log.Error("GetCloudbrainByJobName failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
}
result := getLogFromModelDir(job.JobName, startLine, endLine)
if result == nil {
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
lines := ctx.QueryInt("lines")
baseLine := ctx.Query("base_line")
order := ctx.Query("order")
var result map[string]interface{}
resultPath := "/model"
if job.JobType == string(models.JobTypeInference) {
resultPath = "/result"
}
if baseLine == "" && order == "desc" {
result = getLastLogFromModelDir(job.JobName, lines, resultPath)
} else {
startLine := ctx.QueryInt("base_line")
endLine := startLine + lines
if order == "asc" {
if baseLine == "" {
startLine = 0
endLine = lines
} else {
endLine = startLine
startLine = endLine - lines
if startLine < 0 {
startLine = 0
}
}
}
result = getLogFromModelDir(job.JobName, startLine, endLine, resultPath)
if result == nil {
log.Error("GetJobLog failed: %v", err, ctx.Data["MsgID"])
ctx.ServerError(err.Error(), err)
return
}
}

re := map[string]interface{}{
"JobID": ID,
"LogFileName": result["FileName"],
"StartLine": startLine,
"EndLine": result["endLine"],
"StartLine": result["StartLine"],
"EndLine": result["EndLine"],
"Content": result["Content"],
"Lines": result["lines"],
"Lines": result["Lines"],
"CanLogDownload": result["FileName"] != "",
}
//result := CloudbrainGetLogByJobId(job.JobID, job.JobName)

ctx.JSON(http.StatusOK, re)
}

func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + "/model"
func getAllLineFromFile(path string) int {
count := 0
reader, err := os.Open(path)
defer reader.Close()
if err == nil {
r := bufio.NewReader(reader)
for {
_, error := r.ReadString('\n')
if error == io.EOF {
log.Info("read file completed.")
break
}
if error != nil {
log.Info("read file error." + error.Error())
break
}
count = count + 1
}
} else {
log.Info("error:" + err.Error())
}
return count
}

func getLastLogFromModelDir(jobName string, lines int, resultPath string) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + resultPath
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
if err != nil {
log.Error("query cloudbrain model failed: %v", err)
return nil
}

re := ""
fileName := ""
count := 0
allLines := 0
startLine := 0
for _, file := range files {
if strings.HasSuffix(file.FileName, "log.txt") {
fileName = file.FileName
path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName)
allLines = getAllLineFromFile(path)
startLine = allLines - lines
if startLine < 0 {
startLine = 0
}
count = allLines - startLine
log.Info("path=" + path)
reader, err := os.Open(path)
defer reader.Close()
if err == nil {
r := bufio.NewReader(reader)
for i := 0; i < allLines; i++ {
line, error := r.ReadString('\n')
if error == io.EOF {
log.Info("read file completed.")
break
}
if error != nil {
log.Info("read file error." + error.Error())
break
}
if error == nil {
if i >= startLine {
re = re + line
}
}
}
} else {
log.Info("error:" + err.Error())
}
break
}
}

return map[string]interface{}{
"JobName": jobName,
"Content": re,
"FileName": fileName,
"Lines": count,
"EndLine": allLines,
"StartLine": startLine,
}
}

func getLogFromModelDir(jobName string, startLine int, endLine int, resultPath string) map[string]interface{} {
prefix := "/" + setting.CBCodePathPrefix + jobName + resultPath
files, err := storage.GetOneLevelAllObjectUnderDirMinio(setting.Attachment.Minio.Bucket, prefix, "")
if err != nil {
log.Error("query cloudbrain model failed: %v", err)
return nil
}
if startLine == endLine {
return map[string]interface{}{
"JobName": jobName,
"Content": "",
"FileName": "",
"Lines": 0,
"EndLine": startLine,
"StartLine": startLine,
}
}
re := ""
fileName := ""
count := 0
@@ -458,7 +565,7 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
for _, file := range files {
if strings.HasSuffix(file.FileName, "log.txt") {
fileName = file.FileName
path := storage.GetMinioPath(jobName+"/model/", file.FileName)
path := storage.GetMinioPath(jobName+resultPath+"/", file.FileName)
log.Info("path=" + path)
reader, err := os.Open(path)
defer reader.Close()
@@ -467,7 +574,6 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
for i := 0; i < endLine; i++ {
line, error := r.ReadString('\n')
log.Info("line=" + line)
fileEndLine = i
if error == io.EOF {
log.Info("read file completed.")
break
@@ -478,11 +584,13 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
}
if error == nil {
if i >= startLine {
fileEndLine = i
re = re + line
count++
}
}
}
fileEndLine = fileEndLine + 1
} else {
log.Info("error:" + err.Error())
}
@@ -491,11 +599,12 @@ func getLogFromModelDir(jobName string, startLine int, endLine int) map[string]i
}

return map[string]interface{}{
"JobName": jobName,
"Content": re,
"FileName": fileName,
"lines": count,
"endLine": fileEndLine,
"JobName": jobName,
"Content": re,
"FileName": fileName,
"Lines": count,
"EndLine": fileEndLine,
"StartLine": startLine,
}
}



+ 5
- 3
routers/repo/cloudbrain.go View File

@@ -2431,7 +2431,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tplCloudBrainBenchmarkNew, &form)
return
}

log.Info("Command=" + command)
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"))
req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
@@ -2560,7 +2561,8 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return
}

log.Info("Command=" + command)
log.Info("ModelPath=" + storage.GetMinioPath(jobName, cloudbrain.ModelMountPath+"/"))
req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx,
DisplayJobName: displayJobName,
@@ -2689,7 +2691,7 @@ func getInferenceJobCommand(form auth.CreateCloudBrainInferencForm) (string, err

param += " --modelname" + "=" + form.CkptName

command += "python /code/" + bootFile + param + " > " + cloudbrain.ResultPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile
command += "python /code/" + bootFile + param + " | tee " + cloudbrain.ResultPath + "/" + form.DisplayJobName + "-" + cloudbrain.LogFile

return command, nil
}


+ 39
- 15
templates/repo/cloudbrain/benchmark/show.tmpl View File

@@ -256,8 +256,9 @@
<div class="ui pointing secondary menu" style="border-bottom: 1px solid rgba(34,36,38,.15);">
<a class="active item"
data-tab="first{{$k}}">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item" data-tab="second{{$k}}"
onclick="loadLog({{.VersionName}})">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item log_bottom" data-tab="second{{$k}}"
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
</div>
<div class="ui tab active" data-tab="first{{$k}}">
<div style="padding-top: 10px;">
@@ -528,19 +529,42 @@
</div>
<div class="ui tab" data-tab="second{{$k}}">
<div>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log" id="log{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>

</div>

</div>
<a id="{{.VersionName}}-log-down"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}}'
href="/api/v1/repos/{{$.RepoRelPath}}/cloudbrain/{{.ID}}/download_log_file">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span>
</a>
</div>
<div
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<span>
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;"
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a>
</span>
<span class="log-info-{{.VersionName}}">
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;"
class="log_bottom" data-version="{{.VersionName}}"><i
class="icon-to-bottom"></i></a>
</span>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>
</div>
</div>

</div>
</div>


+ 42
- 4
templates/repo/cloudbrain/inference/show.tmpl View File

@@ -228,7 +228,7 @@
</h4>
{{with .task}}
<div class="ui accordion border-according" id="accordion{{.VersionName}}"
data-repopath="{{$.RepoRelPath}}/cloudbrain/inference-job" data-jobid="{{.JobID}}" data-version="{{.VersionName}}">
data-repopath="{{$.RepoRelPath}}/cloudbrain" data-jobid="{{.ID}}" data-version="{{.VersionName}}">
<input type="hidden" id="jobId_input" name="jobId_input" value="{{.JobID}}">
<div class="active title padding0">
<div class="according-panel-heading">
@@ -264,7 +264,8 @@
data-tab="first">{{$.i18n.Tr "repo.modelarts.train_job.config"}}</a>
<a class="item" data-tab="second"
onclick="javascript:parseInfo()">{{$.i18n.Tr "repo.cloudbrain.runinfo"}}</a>
<a class="item log_bottom" data-tab="third"
data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a>
<a class="item load-model-file" data-tab="four"
data-gpu-flag="true" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/cloudbrain/inference-job/{{.JobID}}/result_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a>
</div>
@@ -524,7 +525,7 @@
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log" id="log{{.VersionName}}"
<div class="ui attached"
style="height: 390px !important; overflow: auto;">
<input type="hidden" id="json_value" value="{{$.result.JobStatus.AppExitDiagnostics}}">
<input type="hidden" id="ExitDiagnostics" value="{{$.ExitDiagnostics}}">
@@ -537,7 +538,44 @@

</div>

<div class="ui tab" data-tab="third">
<div>
<a id="{{.VersionName}}-log-down"
class='{{if $.canDownload}}ti-download-file{{else}}disabled{{end}}'
href="/api/v1/repos/{{$.RepoRelPath}}/cloudbrain/{{.ID}}/download_log_file">
<i class="ri-download-cloud-2-line"></i>
<span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span>
</a>
</div>
<div
style="position: relative;border: 1px solid rgba(0,0,0,.2);padding: 0 10px;margin-top: 10px;">
<span>
<a title="滚动到顶部" style="position: absolute; right: -32px;cursor: pointer;"
class="log_top" data-version="{{.VersionName}}"><i class="icon-to-top"></i></a>
</span>
<span class="log-info-{{.VersionName}}">
<a title="滚动到底部" style="position: absolute; bottom: 10px;right: -32px;cursor: pointer;"
class="log_bottom" data-version="{{.VersionName}}"><i
class="icon-to-bottom"></i></a>
</span>
<div class="ui message message{{.VersionName}}" style="display: none;">
<div id="header"></div>
</div>
<div class="ui attached log log-scroll" id="log{{.VersionName}}" data-version="{{.VersionName}}"
style="height: 300px !important; overflow: auto;">
<div class="ui inverted active dimmer">
<div class="ui loader"></div>
</div>
<input type="hidden" name="end_line" value>
<input type="hidden" name="start_line" value>
<pre id="log_file{{.VersionName}}"></pre>
</div>
</div>
</div>

<div class="ui tab" data-tab="four">
<input type="hidden" name="model{{.VersionName}}" value="-1">


Loading…
Cancel
Save