1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-12-12 23:36:44 +00:00
This commit is contained in:
2020-04-13 01:30:25 +08:00
parent 4ce9378bf6
commit 533099c14b
4 changed files with 55 additions and 5 deletions

View File

@@ -3,11 +3,14 @@ package main
import (
log "github.com/sirupsen/logrus"
"sync"
"strings"
)
type Optimizer struct {
scheduler Scheduler
killedFlag bool
predicts map[string]OptimizerJobExecutionTime
}
var optimizerInstance *Optimizer
@@ -19,11 +22,42 @@ func InstanceOfOptimizer() *Optimizer {
if optimizerInstance == nil {
optimizerInstance = &Optimizer{}
optimizerInstance.predicts = map[string]OptimizerJobExecutionTime{}
}
return optimizerInstance
}
func (jhl *Optimizer) feed(job string, utils []int) {
func (optimizer *Optimizer) feed(job string, utils []int) {
log.Info("optimizer feed")
log.Info(job, utils)
go func() {
str := strings.Split(job, "-")
if len(str) == 2 {
preCnt := 0
for i := 0; i < len(utils); i++ {
if utils[i] > 15 {
break
}
preCnt++
}
postCnt := 0
for i := len(utils); i >= 0; i-- {
if utils[i] > 15 {
break
}
postCnt++
}
if _, ok := optimizer.predicts[str[0]]; !ok {
optimizer.predicts[str[0]] = OptimizerJobExecutionTime{}
}
predict := optimizer.predicts[str[0]]
predict.Pre = ((predict.Pre * predict.Version) + preCnt) / (predict.Version + 1)
predict.Post = ((predict.Post * predict.Version) + postCnt) / (predict.Version + 1)
predict.Total = ((predict.Total * predict.Version) + len(utils)) / (predict.Version + 1)
predict.Version++
}
}()
}

View File

@@ -27,8 +27,9 @@ type ResourcePool struct {
counter int
counterTotal int
bindings map[string]map[string]bool
utils map[string][]int
bindings map[string]map[string]bool
utils map[string][]int
bindingMu sync.Mutex
}
func (pool *ResourcePool) start() {
@@ -210,6 +211,8 @@ func (pool *ResourcePool) releaseNetwork(network string) {
}
func (pool *ResourcePool) attach(GPU string, job string) {
pool.bindingMu.Lock()
defer pool.bindingMu.Unlock()
if _, ok := pool.bindings[GPU]; !ok {
pool.bindings[GPU] = map[string]bool{}
}
@@ -221,6 +224,8 @@ func (pool *ResourcePool) attach(GPU string, job string) {
}
func (pool *ResourcePool) detach(GPU string, jobName string) {
pool.bindingMu.Lock()
defer pool.bindingMu.Unlock()
if _, ok := pool.bindings[GPU]; ok {
if len(pool.bindings[GPU]) == 1 {
InstanceOfOptimizer().feed(jobName, pool.utils[GPU])

View File

@@ -206,6 +206,10 @@ func (scheduler *SchedulerFair) ReleaseResource(job Job, agent NodeStatus) {
for j := range nodes.Status {
if gpu.UUID == nodes.Status[j].UUID {
nodes.Status[j].MemoryAllocated -= gpu.MemoryTotal
if nodes.Status[j].MemoryAllocated < 0 {
// in case error
nodes.Status[j].MemoryAllocated = 0
}
}
}
}
@@ -376,4 +380,4 @@ func (scheduler *SchedulerFair) Attach(GPU string, job string) {
func (scheduler *SchedulerFair) Detach(GPU string, job string) {
pool.detach(GPU, job)
}
}

View File

@@ -111,7 +111,7 @@ type GPUStatus struct {
type NodeStatus struct {
ClientID string `json:"id"`
ClientHost string `json:"host"`
Version float64 `json:"version"`
Version float64 `json:"version"`
NumCPU int `json:"cpu_num"`
UtilCPU float64 `json:"cpu_load"`
MemTotal int `json:"mem_total"`
@@ -164,6 +164,13 @@ type MsgGroupList struct {
Groups []Group `json:"groups"`
}
type OptimizerJobExecutionTime struct {
Pre int `json:"pre"`
Post int `json:"post"`
Total int `json:"total"`
Version int `json:"version"`
}
func str2int(str string, defaultValue int) int {
i, err := strconv.Atoi(str)
if err == nil {