1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-12-12 23:36:44 +00:00
This commit is contained in:
2020-05-27 13:33:40 +08:00
parent 10cac9adc8
commit c3f05141ee
2 changed files with 34 additions and 10 deletions

View File

@@ -5,17 +5,15 @@ type Evaluator struct {
racks map[string]map[string]int
nodes map[string]map[string]int
upstreams map[string]string
cost float64
totalPS int
totalWorker int
costNetwork float64
costLoad float64
factorNode float64
factorRack float64
factorDomain float64
costLoad float64
}
func (eva *Evaluator) init(nodes []NodeStatus, tasks []Task) {
@@ -28,7 +26,6 @@ func (eva *Evaluator) init(nodes []NodeStatus, tasks []Task) {
eva.factorNode = 1.0
eva.factorRack = 4.0
eva.factorDomain = 40.0
eva.cost = 0.0
eva.costNetwork = 0.0
eva.costLoad = 0.0
}
@@ -65,7 +62,6 @@ func (eva *Evaluator) add(node NodeStatus, task Task) {
eva.domains[node.Domain]["Worker"]++
eva.totalWorker++
}
eva.cost = eva.costNetwork
if task.IsPS {
//eva.costLoad += 1
@@ -104,7 +100,6 @@ func (eva *Evaluator) remove(node NodeStatus, task Task) {
eva.domains[node.Domain]["Worker"]--
eva.totalWorker--
}
eva.cost = eva.costNetwork
if task.IsPS {
//eva.costLoad -= 1
@@ -121,7 +116,10 @@ func (eva *Evaluator) remove(node NodeStatus, task Task) {
}
func (eva *Evaluator) calculate() float64 {
return eva.cost + eva.costLoad/float64(eva.totalPS+eva.totalWorker)
/* factor to determine spread or pack */
/* 1.0 spread, -1.0 pack */
factor := -1.0
return eva.costNetwork + factor*eva.costLoad/float64(eva.totalPS+eva.totalWorker)
}
func evaluate(allocation Allocation) float64 {

View File

@@ -709,7 +709,8 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus {
for _, node := range cur.Nodes {
var available []GPUStatus
for _, status := range node.Status {
if status.MemoryAllocated == 0 && status.MemoryUsed < 10 {
/* make sure GPU is not used by in-system and outer-system */
if status.MemoryAllocated == 0 && status.MemoryUsed < 100 {
available = append(available, status)
}
}
@@ -720,7 +721,6 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus {
}
}
}
log.Info(candidates, cur)
if len(candidates) >= len(job.Tasks)*3+5 {
break
}
@@ -795,7 +795,33 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus {
nodesT = append(nodesT, node.Copy())
}
allocation := fastBestFit(nodesT, job.Tasks)
tasks := make([]Task, len(job.Tasks))
var tasksPS []Task
var tasksWorker []Task
for _, taskT := range job.Tasks {
if taskT.IsPS {
tasksPS = append(tasksPS, taskT)
} else {
tasksWorker = append(tasksWorker, taskT)
}
}
idxPS := 0
idxWorker := 0
factor := float64(len(tasksWorker)) / (float64(len(tasksPS)) + 0.001)
for i := range tasks {
if float64(idxPS)*factor <= float64(idxWorker) && idxPS < len(tasksPS) {
tasks[i] = tasksPS[idxPS]
idxPS++
} else if idxWorker < len(tasksWorker) {
tasks[i] = tasksWorker[idxWorker]
idxWorker++
} else {
tasks[i] = tasksPS[idxPS]
idxPS++
}
}
allocation := fastBestFit(nodesT, tasks)
if allocation.Flags["valid"] {
for range job.Tasks { //append would cause uncertain order