1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-06-07 14:21:55 +00:00
This commit is contained in:
Newnius 2020-06-25 11:32:53 +08:00
parent 3032da0baf
commit b75488623f

View File

@ -391,6 +391,8 @@ func (optimizer *Optimizer) predict(job string, seq int) (OptimizerJobExecutionT
} }
func (optimizer *Optimizer) PredictReq(job Job, role string) MsgJobReq { func (optimizer *Optimizer) PredictReq(job Job, role string) MsgJobReq {
res := MsgJobReq{CPU: 4, Mem: 4096, UtilGPU: 100, MemGPU: 8192, BW: 0}
var jobName string var jobName string
str := strings.Split(job.Name, "-") str := strings.Split(job.Name, "-")
if len(str) == 2 { if len(str) == 2 {
@ -404,10 +406,12 @@ func (optimizer *Optimizer) PredictReq(job Job, role string) MsgJobReq {
psNumber := 0 psNumber := 0
workerNumber := 0 workerNumber := 0
flag := false
for _, task := range job.Tasks { for _, task := range job.Tasks {
if (role == "PS" && task.IsPS) || (role == "Worker" && !task.IsPS) { if (role == "PS" && task.IsPS) || (role == "Worker" && !task.IsPS) {
params["num_gpus"] = task.NumberGPU params["num_gpus"] = task.NumberGPU
cmd = task.Cmd cmd = task.Cmd
flag = true
} }
if task.IsPS { if task.IsPS {
psNumber++ psNumber++
@ -417,6 +421,9 @@ func (optimizer *Optimizer) PredictReq(job Job, role string) MsgJobReq {
} }
params["ps_number"] = psNumber params["ps_number"] = psNumber
params["worker_number"] = workerNumber params["worker_number"] = workerNumber
if !flag {
return res
}
exceptions := map[string]bool{} exceptions := map[string]bool{}
exceptions["train_dir"] = true exceptions["train_dir"] = true
@ -466,26 +473,25 @@ func (optimizer *Optimizer) PredictReq(job Job, role string) MsgJobReq {
return MsgJobReq{Code: 3, Error: err.Error()} return MsgJobReq{Code: 3, Error: err.Error()}
} }
req := MsgJobReq{CPU: 4, Mem: 4096, UtilGPU: 100, MemGPU: 8192, BW: 0}
var msg MsgJobReqPredict var msg MsgJobReqPredict
err = json.Unmarshal([]byte(string(body)), &msg) err = json.Unmarshal([]byte(string(body)), &msg)
if err == nil && msg.Code == 0 { if err == nil && msg.Code == 0 {
tmp := msg.Labels tmp := msg.Labels
if v, ok := tmp["cpu"]; ok { if v, ok := tmp["cpu"]; ok {
req.CPU = int(math.Ceil(v / 100)) res.CPU = int(math.Ceil(v / 100))
} }
if v, ok := tmp["mem"]; ok { if v, ok := tmp["mem"]; ok {
req.Mem = int(math.Ceil(v/1024)) * 1024 res.Mem = int(math.Ceil(v/1024)) * 1024
} }
if v, ok := tmp["gpu_util"]; ok { if v, ok := tmp["gpu_util"]; ok {
req.UtilGPU = int(math.Ceil(v)/10) * 10 res.UtilGPU = int(math.Ceil(v)/10) * 10
} }
if v, ok := tmp["gpu_mem"]; ok { if v, ok := tmp["gpu_mem"]; ok {
req.MemGPU = int(math.Ceil(v/1024)) * 1024 res.MemGPU = int(math.Ceil(v/1024)) * 1024
} }
if v, ok := tmp["bw"]; ok { if v, ok := tmp["bw"]; ok {
req.BW = int(math.Ceil(v/10)) * 10 res.BW = int(math.Ceil(v/10)) * 10
} }
} }
return req return res
} }