|
|
@@ -8,6 +8,7 @@ import ( |
|
|
|
"net/http" |
|
|
|
"os" |
|
|
|
"path" |
|
|
|
"strconv" |
|
|
|
"strings" |
|
|
|
"time" |
|
|
|
"unicode/utf8" |
|
|
@@ -1660,7 +1661,18 @@ func GrampusDownloadLog(ctx *context.Context) { |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
content, err := grampus.GetTrainJobLog(job.JobID) |
|
|
|
nodeIdStr := ctx.Params(":nodeId") |
|
|
|
var content string |
|
|
|
if nodeIdStr != "" { |
|
|
|
nodeId, _ := strconv.Atoi(nodeIdStr) |
|
|
|
if job.WorkServerNumber < 2 || nodeId > job.WorkServerNumber-1 { |
|
|
|
ctx.NotFound("query parameter is wrong", nil) |
|
|
|
return |
|
|
|
} |
|
|
|
content, err = grampus.GetTrainJobLog(job.JobID, nodeId) |
|
|
|
} else { |
|
|
|
content, err = grampus.GetTrainJobLog(job.JobID) |
|
|
|
} |
|
|
|
if err != nil { |
|
|
|
log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
content = "" |
|
|
@@ -1696,7 +1708,19 @@ func GrampusGetLog(ctx *context.Context) { |
|
|
|
exitDiagnostics = result.ExitDiagnostics |
|
|
|
} |
|
|
|
|
|
|
|
content, err := grampus.GetTrainJobLog(job.JobID) |
|
|
|
nodeIdStr := ctx.Params(":nodeId") |
|
|
|
var content string |
|
|
|
if nodeIdStr != "" { |
|
|
|
nodeId, _ := strconv.Atoi(nodeIdStr) |
|
|
|
if job.WorkServerNumber < 2 || nodeId > job.WorkServerNumber-1 { |
|
|
|
ctx.NotFound("query parameter is wrong", nil) |
|
|
|
return |
|
|
|
} |
|
|
|
content, err = grampus.GetTrainJobLog(job.JobID, nodeId) |
|
|
|
} else { |
|
|
|
content, err = grampus.GetTrainJobLog(job.JobID) |
|
|
|
} |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
ctx.JSON(http.StatusOK, map[string]interface{}{ |
|
|
@@ -1734,7 +1758,17 @@ func GrampusMetrics(ctx *context.Context) { |
|
|
|
} |
|
|
|
var result models.NewModelArtsMetricStatisticResult |
|
|
|
if job.IsNPUTask() { |
|
|
|
result, err = grampus.GetGrampusMetrics(job.JobID, 0, 0) |
|
|
|
nodeIdStr := ctx.Params(":nodeId") |
|
|
|
if nodeIdStr != "" { |
|
|
|
nodeId, _ := strconv.Atoi(nodeIdStr) |
|
|
|
if job.WorkServerNumber < 2 || nodeId > job.WorkServerNumber-1 { |
|
|
|
ctx.NotFound("query parameter is wrong", nil) |
|
|
|
return |
|
|
|
} |
|
|
|
result, err = grampus.GetGrampusMetrics(job.JobID, 0, 0, nodeId) |
|
|
|
} else { |
|
|
|
result, err = grampus.GetGrampusMetrics(job.JobID, 0, 0) |
|
|
|
} |
|
|
|
} else if job.IsGPUTask() { |
|
|
|
startTime := int64(job.StartTime) |
|
|
|
if startTime == 0 { |
|
|
|