mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-12-13 07:46:43 +00:00
update
This commit is contained in:
@@ -12,8 +12,7 @@ type Optimizer struct {
|
||||
|
||||
predicts map[string]*OptimizerJobExecutionTime
|
||||
|
||||
jobUtilsGPU map[string]int
|
||||
versions map[string]int
|
||||
jobUtilsGPU map[string]*OptimizerUtilGPU
|
||||
}
|
||||
|
||||
var optimizerInstance *Optimizer
|
||||
@@ -26,8 +25,7 @@ func InstanceOfOptimizer() *Optimizer {
|
||||
if optimizerInstance == nil {
|
||||
optimizerInstance = &Optimizer{}
|
||||
optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{}
|
||||
optimizerInstance.jobUtilsGPU = map[string]int{}
|
||||
optimizerInstance.versions = map[string]int{}
|
||||
optimizerInstance.jobUtilsGPU = map[string]*OptimizerUtilGPU{}
|
||||
}
|
||||
return optimizerInstance
|
||||
}
|
||||
@@ -53,11 +51,11 @@ func (optimizer *Optimizer) feed(job string, utils []int) {
|
||||
last := 0
|
||||
version := 0
|
||||
if t, err := optimizer.jobUtilsGPU[jobName]; !err {
|
||||
last = t
|
||||
version = optimizer.versions[jobName]
|
||||
last = t.Util
|
||||
version = t.Version
|
||||
}
|
||||
optimizer.jobUtilsGPU[jobName] = (version*last + sum/len(utils)) / (version + 1)
|
||||
optimizer.versions[jobName]++
|
||||
optimizer.jobUtilsGPU[jobName].Util = (version*last + sum/len(utils)) / (version + 1)
|
||||
optimizer.jobUtilsGPU[jobName].Version++
|
||||
|
||||
for i := 0; i < len(utils); i++ {
|
||||
if utils[i] > 15 {
|
||||
@@ -82,6 +80,9 @@ func (optimizer *Optimizer) feed(job string, utils []int) {
|
||||
predict.Post = ((predict.Post * predict.Version) + postCnt) / (predict.Version + 1)
|
||||
predict.Total = ((predict.Total * predict.Version) + len(utils)) / (predict.Version + 1)
|
||||
predict.Main = predict.Total - predict.Pre - predict.Post
|
||||
if predict.Main < 0 {
|
||||
predict.Main = 0
|
||||
}
|
||||
predict.Version++
|
||||
}
|
||||
}()
|
||||
@@ -91,12 +92,8 @@ func (optimizer *Optimizer) predictUtilGPU(job string) (int, bool) {
|
||||
str := strings.Split(job, "-")
|
||||
if len(str) == 2 {
|
||||
jobName := str[0]
|
||||
log.Info("predictUtilGPU, ", jobName)
|
||||
if _, err := optimizer.jobUtilsGPU[jobName]; err {
|
||||
return 100, false
|
||||
}
|
||||
if optimizer.versions[jobName] > 5 {
|
||||
return optimizer.jobUtilsGPU[jobName], true
|
||||
if _, ok := optimizer.jobUtilsGPU[jobName]; ok {
|
||||
return optimizer.jobUtilsGPU[jobName].Util, optimizer.jobUtilsGPU[jobName].Version >= 5
|
||||
}
|
||||
}
|
||||
return 100, false
|
||||
@@ -106,9 +103,8 @@ func (optimizer *Optimizer) predictTime(job string) (*OptimizerJobExecutionTime,
|
||||
str := strings.Split(job, "-")
|
||||
if len(str) == 2 {
|
||||
jobName := str[0]
|
||||
log.Info("predictTime,", jobName)
|
||||
if _, ok := optimizer.predicts[jobName]; ok {
|
||||
return optimizer.predicts[job], optimizer.predicts[jobName].Version > 5
|
||||
return optimizer.predicts[job], optimizer.predicts[jobName].Version >= 5
|
||||
}
|
||||
}
|
||||
return &OptimizerJobExecutionTime{}, false
|
||||
@@ -118,6 +114,6 @@ func (optimizer *Optimizer) getAllPredicts() map[string]*OptimizerJobExecutionTi
|
||||
return optimizer.predicts
|
||||
}
|
||||
|
||||
func (optimizer *Optimizer) getAllGPUUtils() map[string]int {
|
||||
func (optimizer *Optimizer) getAllGPUUtils() map[string]*OptimizerUtilGPU {
|
||||
return optimizer.jobUtilsGPU
|
||||
}
|
||||
|
||||
@@ -233,7 +233,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
||||
for _, node := range pool.pools[(i+poolID)%pool.poolsCount] {
|
||||
var available []GPUStatus
|
||||
for _, status := range node.Status {
|
||||
if status.MemoryTotal >= task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU {
|
||||
if status.MemoryTotal > task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU {
|
||||
|
||||
if jobs, ok := pool.bindings[status.UUID]; ok {
|
||||
totalUtil := util
|
||||
@@ -242,7 +242,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
||||
totalUtil += utilT
|
||||
}
|
||||
}
|
||||
if totalUtil < 100 {
|
||||
if totalUtil < 110 {
|
||||
available = append(available, status)
|
||||
availableGPUs[node.ClientID] = available
|
||||
}
|
||||
|
||||
@@ -174,6 +174,11 @@ type OptimizerJobExecutionTime struct {
|
||||
Version int `json:"version"`
|
||||
}
|
||||
|
||||
type OptimizerUtilGPU struct {
|
||||
Util int `json:"util"`
|
||||
Version int `json:"version"`
|
||||
}
|
||||
|
||||
func str2int(str string, defaultValue int) int {
|
||||
i, err := strconv.Atoi(str)
|
||||
if err == nil {
|
||||
|
||||
Reference in New Issue
Block a user