From 5da3555169d42646ba2c55549df6d1a237322c60 Mon Sep 17 00:00:00 2001 From: Newnius Date: Thu, 30 Apr 2020 16:45:43 +0800 Subject: [PATCH] update --- src/optimizer.go | 30 +++++++++++++----------------- src/scheduler_fair.go | 4 ++-- src/util.go | 5 +++++ 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/optimizer.go b/src/optimizer.go index d2917b4..e5f5b29 100644 --- a/src/optimizer.go +++ b/src/optimizer.go @@ -12,8 +12,7 @@ type Optimizer struct { predicts map[string]*OptimizerJobExecutionTime - jobUtilsGPU map[string]int - versions map[string]int + jobUtilsGPU map[string]*OptimizerUtilGPU } var optimizerInstance *Optimizer @@ -26,8 +25,7 @@ func InstanceOfOptimizer() *Optimizer { if optimizerInstance == nil { optimizerInstance = &Optimizer{} optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{} - optimizerInstance.jobUtilsGPU = map[string]int{} - optimizerInstance.versions = map[string]int{} + optimizerInstance.jobUtilsGPU = map[string]*OptimizerUtilGPU{} } return optimizerInstance } @@ -53,11 +51,11 @@ func (optimizer *Optimizer) feed(job string, utils []int) { last := 0 version := 0 if t, err := optimizer.jobUtilsGPU[jobName]; !err { - last = t - version = optimizer.versions[jobName] + last = t.Util + version = t.Version } - optimizer.jobUtilsGPU[jobName] = (version*last + sum/len(utils)) / (version + 1) - optimizer.versions[jobName]++ + optimizer.jobUtilsGPU[jobName].Util = (version*last + sum/len(utils)) / (version + 1) + optimizer.jobUtilsGPU[jobName].Version++ for i := 0; i < len(utils); i++ { if utils[i] > 15 { @@ -82,6 +80,9 @@ func (optimizer *Optimizer) feed(job string, utils []int) { predict.Post = ((predict.Post * predict.Version) + postCnt) / (predict.Version + 1) predict.Total = ((predict.Total * predict.Version) + len(utils)) / (predict.Version + 1) predict.Main = predict.Total - predict.Pre - predict.Post + if predict.Main < 0 { + predict.Main = 0 + } predict.Version++ } }() @@ -91,12 +92,8 @@ func (optimizer *Optimizer) predictUtilGPU(job string) (int, bool) { str := strings.Split(job, "-") if len(str) == 2 { jobName := str[0] - log.Info("predictUtilGPU, ", jobName) - if _, err := optimizer.jobUtilsGPU[jobName]; err { - return 100, false - } - if optimizer.versions[jobName] > 5 { - return optimizer.jobUtilsGPU[jobName], true + if _, ok := optimizer.jobUtilsGPU[jobName]; ok { + return optimizer.jobUtilsGPU[jobName].Util, optimizer.jobUtilsGPU[jobName].Version >= 5 } } return 100, false @@ -106,9 +103,8 @@ func (optimizer *Optimizer) predictTime(job string) (*OptimizerJobExecutionTime, str := strings.Split(job, "-") if len(str) == 2 { jobName := str[0] - log.Info("predictTime,", jobName) if _, ok := optimizer.predicts[jobName]; ok { - return optimizer.predicts[job], optimizer.predicts[jobName].Version > 5 + return optimizer.predicts[job], optimizer.predicts[jobName].Version >= 5 } } return &OptimizerJobExecutionTime{}, false @@ -118,6 +114,6 @@ func (optimizer *Optimizer) getAllPredicts() map[string]*OptimizerJobExecutionTi return optimizer.predicts } -func (optimizer *Optimizer) getAllGPUUtils() map[string]int { +func (optimizer *Optimizer) getAllGPUUtils() map[string]*OptimizerUtilGPU { return optimizer.jobUtilsGPU } diff --git a/src/scheduler_fair.go b/src/scheduler_fair.go index 58ea600..8ea85e5 100644 --- a/src/scheduler_fair.go +++ b/src/scheduler_fair.go @@ -233,7 +233,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus { for _, node := range pool.pools[(i+poolID)%pool.poolsCount] { var available []GPUStatus for _, status := range node.Status { - if status.MemoryTotal >= task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU { + if status.MemoryTotal > task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU { if jobs, ok := pool.bindings[status.UUID]; ok { totalUtil := util @@ -242,7 +242,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus { totalUtil += utilT } } - if totalUtil < 100 { + if totalUtil < 110 { available = append(available, status) availableGPUs[node.ClientID] = available } diff --git a/src/util.go b/src/util.go index 61a30ec..c4f0153 100644 --- a/src/util.go +++ b/src/util.go @@ -174,6 +174,11 @@ type OptimizerJobExecutionTime struct { Version int `json:"version"` } +type OptimizerUtilGPU struct { + Util int `json:"util"` + Version int `json:"version"` +} + func str2int(str string, defaultValue int) int { i, err := strconv.Atoi(str) if err == nil {