1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-12-15 08:16:43 +00:00
This commit is contained in:
2020-04-30 16:45:43 +08:00
parent beee8bb286
commit 5da3555169
3 changed files with 20 additions and 19 deletions

View File

@@ -12,8 +12,7 @@ type Optimizer struct {
predicts map[string]*OptimizerJobExecutionTime predicts map[string]*OptimizerJobExecutionTime
jobUtilsGPU map[string]int jobUtilsGPU map[string]*OptimizerUtilGPU
versions map[string]int
} }
var optimizerInstance *Optimizer var optimizerInstance *Optimizer
@@ -26,8 +25,7 @@ func InstanceOfOptimizer() *Optimizer {
if optimizerInstance == nil { if optimizerInstance == nil {
optimizerInstance = &Optimizer{} optimizerInstance = &Optimizer{}
optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{} optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{}
optimizerInstance.jobUtilsGPU = map[string]int{} optimizerInstance.jobUtilsGPU = map[string]*OptimizerUtilGPU{}
optimizerInstance.versions = map[string]int{}
} }
return optimizerInstance return optimizerInstance
} }
@@ -53,11 +51,11 @@ func (optimizer *Optimizer) feed(job string, utils []int) {
last := 0 last := 0
version := 0 version := 0
if t, err := optimizer.jobUtilsGPU[jobName]; !err { if t, err := optimizer.jobUtilsGPU[jobName]; !err {
last = t last = t.Util
version = optimizer.versions[jobName] version = t.Version
} }
optimizer.jobUtilsGPU[jobName] = (version*last + sum/len(utils)) / (version + 1) optimizer.jobUtilsGPU[jobName].Util = (version*last + sum/len(utils)) / (version + 1)
optimizer.versions[jobName]++ optimizer.jobUtilsGPU[jobName].Version++
for i := 0; i < len(utils); i++ { for i := 0; i < len(utils); i++ {
if utils[i] > 15 { if utils[i] > 15 {
@@ -82,6 +80,9 @@ func (optimizer *Optimizer) feed(job string, utils []int) {
predict.Post = ((predict.Post * predict.Version) + postCnt) / (predict.Version + 1) predict.Post = ((predict.Post * predict.Version) + postCnt) / (predict.Version + 1)
predict.Total = ((predict.Total * predict.Version) + len(utils)) / (predict.Version + 1) predict.Total = ((predict.Total * predict.Version) + len(utils)) / (predict.Version + 1)
predict.Main = predict.Total - predict.Pre - predict.Post predict.Main = predict.Total - predict.Pre - predict.Post
if predict.Main < 0 {
predict.Main = 0
}
predict.Version++ predict.Version++
} }
}() }()
@@ -91,12 +92,8 @@ func (optimizer *Optimizer) predictUtilGPU(job string) (int, bool) {
str := strings.Split(job, "-") str := strings.Split(job, "-")
if len(str) == 2 { if len(str) == 2 {
jobName := str[0] jobName := str[0]
log.Info("predictUtilGPU, ", jobName) if _, ok := optimizer.jobUtilsGPU[jobName]; ok {
if _, err := optimizer.jobUtilsGPU[jobName]; err { return optimizer.jobUtilsGPU[jobName].Util, optimizer.jobUtilsGPU[jobName].Version >= 5
return 100, false
}
if optimizer.versions[jobName] > 5 {
return optimizer.jobUtilsGPU[jobName], true
} }
} }
return 100, false return 100, false
@@ -106,9 +103,8 @@ func (optimizer *Optimizer) predictTime(job string) (*OptimizerJobExecutionTime,
str := strings.Split(job, "-") str := strings.Split(job, "-")
if len(str) == 2 { if len(str) == 2 {
jobName := str[0] jobName := str[0]
log.Info("predictTime,", jobName)
if _, ok := optimizer.predicts[jobName]; ok { if _, ok := optimizer.predicts[jobName]; ok {
return optimizer.predicts[job], optimizer.predicts[jobName].Version > 5 return optimizer.predicts[job], optimizer.predicts[jobName].Version >= 5
} }
} }
return &OptimizerJobExecutionTime{}, false return &OptimizerJobExecutionTime{}, false
@@ -118,6 +114,6 @@ func (optimizer *Optimizer) getAllPredicts() map[string]*OptimizerJobExecutionTi
return optimizer.predicts return optimizer.predicts
} }
func (optimizer *Optimizer) getAllGPUUtils() map[string]int { func (optimizer *Optimizer) getAllGPUUtils() map[string]*OptimizerUtilGPU {
return optimizer.jobUtilsGPU return optimizer.jobUtilsGPU
} }

View File

@@ -233,7 +233,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
for _, node := range pool.pools[(i+poolID)%pool.poolsCount] { for _, node := range pool.pools[(i+poolID)%pool.poolsCount] {
var available []GPUStatus var available []GPUStatus
for _, status := range node.Status { for _, status := range node.Status {
if status.MemoryTotal >= task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU { if status.MemoryTotal > task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU {
if jobs, ok := pool.bindings[status.UUID]; ok { if jobs, ok := pool.bindings[status.UUID]; ok {
totalUtil := util totalUtil := util
@@ -242,7 +242,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
totalUtil += utilT totalUtil += utilT
} }
} }
if totalUtil < 100 { if totalUtil < 110 {
available = append(available, status) available = append(available, status)
availableGPUs[node.ClientID] = available availableGPUs[node.ClientID] = available
} }

View File

@@ -174,6 +174,11 @@ type OptimizerJobExecutionTime struct {
Version int `json:"version"` Version int `json:"version"`
} }
type OptimizerUtilGPU struct {
Util int `json:"util"`
Version int `json:"version"`
}
func str2int(str string, defaultValue int) int { func str2int(str string, defaultValue int) int {
i, err := strconv.Atoi(str) i, err := strconv.Atoi(str)
if err == nil { if err == nil {