1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-12-13 07:46:43 +00:00
This commit is contained in:
2020-04-30 16:45:43 +08:00
parent beee8bb286
commit 5da3555169
3 changed files with 20 additions and 19 deletions

View File

@@ -12,8 +12,7 @@ type Optimizer struct {
predicts map[string]*OptimizerJobExecutionTime
jobUtilsGPU map[string]int
versions map[string]int
jobUtilsGPU map[string]*OptimizerUtilGPU
}
var optimizerInstance *Optimizer
@@ -26,8 +25,7 @@ func InstanceOfOptimizer() *Optimizer {
if optimizerInstance == nil {
optimizerInstance = &Optimizer{}
optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{}
optimizerInstance.jobUtilsGPU = map[string]int{}
optimizerInstance.versions = map[string]int{}
optimizerInstance.jobUtilsGPU = map[string]*OptimizerUtilGPU{}
}
return optimizerInstance
}
@@ -53,11 +51,11 @@ func (optimizer *Optimizer) feed(job string, utils []int) {
last := 0
version := 0
if t, err := optimizer.jobUtilsGPU[jobName]; !err {
last = t
version = optimizer.versions[jobName]
last = t.Util
version = t.Version
}
optimizer.jobUtilsGPU[jobName] = (version*last + sum/len(utils)) / (version + 1)
optimizer.versions[jobName]++
optimizer.jobUtilsGPU[jobName].Util = (version*last + sum/len(utils)) / (version + 1)
optimizer.jobUtilsGPU[jobName].Version++
for i := 0; i < len(utils); i++ {
if utils[i] > 15 {
@@ -82,6 +80,9 @@ func (optimizer *Optimizer) feed(job string, utils []int) {
predict.Post = ((predict.Post * predict.Version) + postCnt) / (predict.Version + 1)
predict.Total = ((predict.Total * predict.Version) + len(utils)) / (predict.Version + 1)
predict.Main = predict.Total - predict.Pre - predict.Post
if predict.Main < 0 {
predict.Main = 0
}
predict.Version++
}
}()
@@ -91,12 +92,8 @@ func (optimizer *Optimizer) predictUtilGPU(job string) (int, bool) {
str := strings.Split(job, "-")
if len(str) == 2 {
jobName := str[0]
log.Info("predictUtilGPU, ", jobName)
if _, err := optimizer.jobUtilsGPU[jobName]; err {
return 100, false
}
if optimizer.versions[jobName] > 5 {
return optimizer.jobUtilsGPU[jobName], true
if _, ok := optimizer.jobUtilsGPU[jobName]; ok {
return optimizer.jobUtilsGPU[jobName].Util, optimizer.jobUtilsGPU[jobName].Version >= 5
}
}
return 100, false
@@ -106,9 +103,8 @@ func (optimizer *Optimizer) predictTime(job string) (*OptimizerJobExecutionTime,
str := strings.Split(job, "-")
if len(str) == 2 {
jobName := str[0]
log.Info("predictTime,", jobName)
if _, ok := optimizer.predicts[jobName]; ok {
return optimizer.predicts[job], optimizer.predicts[jobName].Version > 5
return optimizer.predicts[job], optimizer.predicts[jobName].Version >= 5
}
}
return &OptimizerJobExecutionTime{}, false
@@ -118,6 +114,6 @@ func (optimizer *Optimizer) getAllPredicts() map[string]*OptimizerJobExecutionTi
return optimizer.predicts
}
func (optimizer *Optimizer) getAllGPUUtils() map[string]int {
func (optimizer *Optimizer) getAllGPUUtils() map[string]*OptimizerUtilGPU {
return optimizer.jobUtilsGPU
}

View File

@@ -233,7 +233,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
for _, node := range pool.pools[(i+poolID)%pool.poolsCount] {
var available []GPUStatus
for _, status := range node.Status {
if status.MemoryTotal >= task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU {
if status.MemoryTotal > task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU {
if jobs, ok := pool.bindings[status.UUID]; ok {
totalUtil := util
@@ -242,7 +242,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
totalUtil += utilT
}
}
if totalUtil < 100 {
if totalUtil < 110 {
available = append(available, status)
availableGPUs[node.ClientID] = available
}

View File

@@ -174,6 +174,11 @@ type OptimizerJobExecutionTime struct {
Version int `json:"version"`
}
type OptimizerUtilGPU struct {
Util int `json:"util"`
Version int `json:"version"`
}
func str2int(str string, defaultValue int) int {
i, err := strconv.Atoi(str)
if err == nil {