mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-12-15 08:16:43 +00:00
update
This commit is contained in:
@@ -12,8 +12,7 @@ type Optimizer struct {
|
|||||||
|
|
||||||
predicts map[string]*OptimizerJobExecutionTime
|
predicts map[string]*OptimizerJobExecutionTime
|
||||||
|
|
||||||
jobUtilsGPU map[string]int
|
jobUtilsGPU map[string]*OptimizerUtilGPU
|
||||||
versions map[string]int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var optimizerInstance *Optimizer
|
var optimizerInstance *Optimizer
|
||||||
@@ -26,8 +25,7 @@ func InstanceOfOptimizer() *Optimizer {
|
|||||||
if optimizerInstance == nil {
|
if optimizerInstance == nil {
|
||||||
optimizerInstance = &Optimizer{}
|
optimizerInstance = &Optimizer{}
|
||||||
optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{}
|
optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{}
|
||||||
optimizerInstance.jobUtilsGPU = map[string]int{}
|
optimizerInstance.jobUtilsGPU = map[string]*OptimizerUtilGPU{}
|
||||||
optimizerInstance.versions = map[string]int{}
|
|
||||||
}
|
}
|
||||||
return optimizerInstance
|
return optimizerInstance
|
||||||
}
|
}
|
||||||
@@ -53,11 +51,11 @@ func (optimizer *Optimizer) feed(job string, utils []int) {
|
|||||||
last := 0
|
last := 0
|
||||||
version := 0
|
version := 0
|
||||||
if t, err := optimizer.jobUtilsGPU[jobName]; !err {
|
if t, err := optimizer.jobUtilsGPU[jobName]; !err {
|
||||||
last = t
|
last = t.Util
|
||||||
version = optimizer.versions[jobName]
|
version = t.Version
|
||||||
}
|
}
|
||||||
optimizer.jobUtilsGPU[jobName] = (version*last + sum/len(utils)) / (version + 1)
|
optimizer.jobUtilsGPU[jobName].Util = (version*last + sum/len(utils)) / (version + 1)
|
||||||
optimizer.versions[jobName]++
|
optimizer.jobUtilsGPU[jobName].Version++
|
||||||
|
|
||||||
for i := 0; i < len(utils); i++ {
|
for i := 0; i < len(utils); i++ {
|
||||||
if utils[i] > 15 {
|
if utils[i] > 15 {
|
||||||
@@ -82,6 +80,9 @@ func (optimizer *Optimizer) feed(job string, utils []int) {
|
|||||||
predict.Post = ((predict.Post * predict.Version) + postCnt) / (predict.Version + 1)
|
predict.Post = ((predict.Post * predict.Version) + postCnt) / (predict.Version + 1)
|
||||||
predict.Total = ((predict.Total * predict.Version) + len(utils)) / (predict.Version + 1)
|
predict.Total = ((predict.Total * predict.Version) + len(utils)) / (predict.Version + 1)
|
||||||
predict.Main = predict.Total - predict.Pre - predict.Post
|
predict.Main = predict.Total - predict.Pre - predict.Post
|
||||||
|
if predict.Main < 0 {
|
||||||
|
predict.Main = 0
|
||||||
|
}
|
||||||
predict.Version++
|
predict.Version++
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -91,12 +92,8 @@ func (optimizer *Optimizer) predictUtilGPU(job string) (int, bool) {
|
|||||||
str := strings.Split(job, "-")
|
str := strings.Split(job, "-")
|
||||||
if len(str) == 2 {
|
if len(str) == 2 {
|
||||||
jobName := str[0]
|
jobName := str[0]
|
||||||
log.Info("predictUtilGPU, ", jobName)
|
if _, ok := optimizer.jobUtilsGPU[jobName]; ok {
|
||||||
if _, err := optimizer.jobUtilsGPU[jobName]; err {
|
return optimizer.jobUtilsGPU[jobName].Util, optimizer.jobUtilsGPU[jobName].Version >= 5
|
||||||
return 100, false
|
|
||||||
}
|
|
||||||
if optimizer.versions[jobName] > 5 {
|
|
||||||
return optimizer.jobUtilsGPU[jobName], true
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 100, false
|
return 100, false
|
||||||
@@ -106,9 +103,8 @@ func (optimizer *Optimizer) predictTime(job string) (*OptimizerJobExecutionTime,
|
|||||||
str := strings.Split(job, "-")
|
str := strings.Split(job, "-")
|
||||||
if len(str) == 2 {
|
if len(str) == 2 {
|
||||||
jobName := str[0]
|
jobName := str[0]
|
||||||
log.Info("predictTime,", jobName)
|
|
||||||
if _, ok := optimizer.predicts[jobName]; ok {
|
if _, ok := optimizer.predicts[jobName]; ok {
|
||||||
return optimizer.predicts[job], optimizer.predicts[jobName].Version > 5
|
return optimizer.predicts[job], optimizer.predicts[jobName].Version >= 5
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &OptimizerJobExecutionTime{}, false
|
return &OptimizerJobExecutionTime{}, false
|
||||||
@@ -118,6 +114,6 @@ func (optimizer *Optimizer) getAllPredicts() map[string]*OptimizerJobExecutionTi
|
|||||||
return optimizer.predicts
|
return optimizer.predicts
|
||||||
}
|
}
|
||||||
|
|
||||||
func (optimizer *Optimizer) getAllGPUUtils() map[string]int {
|
func (optimizer *Optimizer) getAllGPUUtils() map[string]*OptimizerUtilGPU {
|
||||||
return optimizer.jobUtilsGPU
|
return optimizer.jobUtilsGPU
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -233,7 +233,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
|||||||
for _, node := range pool.pools[(i+poolID)%pool.poolsCount] {
|
for _, node := range pool.pools[(i+poolID)%pool.poolsCount] {
|
||||||
var available []GPUStatus
|
var available []GPUStatus
|
||||||
for _, status := range node.Status {
|
for _, status := range node.Status {
|
||||||
if status.MemoryTotal >= task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU {
|
if status.MemoryTotal > task.MemoryGPU+status.MemoryAllocated && status.MemoryFree > task.MemoryGPU {
|
||||||
|
|
||||||
if jobs, ok := pool.bindings[status.UUID]; ok {
|
if jobs, ok := pool.bindings[status.UUID]; ok {
|
||||||
totalUtil := util
|
totalUtil := util
|
||||||
@@ -242,7 +242,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
|||||||
totalUtil += utilT
|
totalUtil += utilT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if totalUtil < 100 {
|
if totalUtil < 110 {
|
||||||
available = append(available, status)
|
available = append(available, status)
|
||||||
availableGPUs[node.ClientID] = available
|
availableGPUs[node.ClientID] = available
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -174,6 +174,11 @@ type OptimizerJobExecutionTime struct {
|
|||||||
Version int `json:"version"`
|
Version int `json:"version"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type OptimizerUtilGPU struct {
|
||||||
|
Util int `json:"util"`
|
||||||
|
Version int `json:"version"`
|
||||||
|
}
|
||||||
|
|
||||||
func str2int(str string, defaultValue int) int {
|
func str2int(str string, defaultValue int) int {
|
||||||
i, err := strconv.Atoi(str)
|
i, err := strconv.Atoi(str)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user