mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-12-15 08:16:43 +00:00
update
This commit is contained in:
@@ -5,17 +5,15 @@ type Evaluator struct {
|
|||||||
racks map[string]map[string]int
|
racks map[string]map[string]int
|
||||||
nodes map[string]map[string]int
|
nodes map[string]map[string]int
|
||||||
upstreams map[string]string
|
upstreams map[string]string
|
||||||
cost float64
|
|
||||||
totalPS int
|
totalPS int
|
||||||
totalWorker int
|
totalWorker int
|
||||||
|
|
||||||
costNetwork float64
|
costNetwork float64
|
||||||
|
costLoad float64
|
||||||
|
|
||||||
factorNode float64
|
factorNode float64
|
||||||
factorRack float64
|
factorRack float64
|
||||||
factorDomain float64
|
factorDomain float64
|
||||||
|
|
||||||
costLoad float64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (eva *Evaluator) init(nodes []NodeStatus, tasks []Task) {
|
func (eva *Evaluator) init(nodes []NodeStatus, tasks []Task) {
|
||||||
@@ -28,7 +26,6 @@ func (eva *Evaluator) init(nodes []NodeStatus, tasks []Task) {
|
|||||||
eva.factorNode = 1.0
|
eva.factorNode = 1.0
|
||||||
eva.factorRack = 4.0
|
eva.factorRack = 4.0
|
||||||
eva.factorDomain = 40.0
|
eva.factorDomain = 40.0
|
||||||
eva.cost = 0.0
|
|
||||||
eva.costNetwork = 0.0
|
eva.costNetwork = 0.0
|
||||||
eva.costLoad = 0.0
|
eva.costLoad = 0.0
|
||||||
}
|
}
|
||||||
@@ -65,7 +62,6 @@ func (eva *Evaluator) add(node NodeStatus, task Task) {
|
|||||||
eva.domains[node.Domain]["Worker"]++
|
eva.domains[node.Domain]["Worker"]++
|
||||||
eva.totalWorker++
|
eva.totalWorker++
|
||||||
}
|
}
|
||||||
eva.cost = eva.costNetwork
|
|
||||||
|
|
||||||
if task.IsPS {
|
if task.IsPS {
|
||||||
//eva.costLoad += 1
|
//eva.costLoad += 1
|
||||||
@@ -104,7 +100,6 @@ func (eva *Evaluator) remove(node NodeStatus, task Task) {
|
|||||||
eva.domains[node.Domain]["Worker"]--
|
eva.domains[node.Domain]["Worker"]--
|
||||||
eva.totalWorker--
|
eva.totalWorker--
|
||||||
}
|
}
|
||||||
eva.cost = eva.costNetwork
|
|
||||||
|
|
||||||
if task.IsPS {
|
if task.IsPS {
|
||||||
//eva.costLoad -= 1
|
//eva.costLoad -= 1
|
||||||
@@ -121,7 +116,10 @@ func (eva *Evaluator) remove(node NodeStatus, task Task) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (eva *Evaluator) calculate() float64 {
|
func (eva *Evaluator) calculate() float64 {
|
||||||
return eva.cost + eva.costLoad/float64(eva.totalPS+eva.totalWorker)
|
/* factor to determine spread or pack */
|
||||||
|
/* 1.0 spread, -1.0 pack */
|
||||||
|
factor := -1.0
|
||||||
|
return eva.costNetwork + factor*eva.costLoad/float64(eva.totalPS+eva.totalWorker)
|
||||||
}
|
}
|
||||||
|
|
||||||
func evaluate(allocation Allocation) float64 {
|
func evaluate(allocation Allocation) float64 {
|
||||||
|
|||||||
@@ -709,7 +709,8 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus {
|
|||||||
for _, node := range cur.Nodes {
|
for _, node := range cur.Nodes {
|
||||||
var available []GPUStatus
|
var available []GPUStatus
|
||||||
for _, status := range node.Status {
|
for _, status := range node.Status {
|
||||||
if status.MemoryAllocated == 0 && status.MemoryUsed < 10 {
|
/* make sure GPU is not used by in-system and outer-system */
|
||||||
|
if status.MemoryAllocated == 0 && status.MemoryUsed < 100 {
|
||||||
available = append(available, status)
|
available = append(available, status)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -720,7 +721,6 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.Info(candidates, cur)
|
|
||||||
if len(candidates) >= len(job.Tasks)*3+5 {
|
if len(candidates) >= len(job.Tasks)*3+5 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -795,7 +795,33 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus {
|
|||||||
nodesT = append(nodesT, node.Copy())
|
nodesT = append(nodesT, node.Copy())
|
||||||
}
|
}
|
||||||
|
|
||||||
allocation := fastBestFit(nodesT, job.Tasks)
|
tasks := make([]Task, len(job.Tasks))
|
||||||
|
var tasksPS []Task
|
||||||
|
var tasksWorker []Task
|
||||||
|
for _, taskT := range job.Tasks {
|
||||||
|
if taskT.IsPS {
|
||||||
|
tasksPS = append(tasksPS, taskT)
|
||||||
|
} else {
|
||||||
|
tasksWorker = append(tasksWorker, taskT)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idxPS := 0
|
||||||
|
idxWorker := 0
|
||||||
|
factor := float64(len(tasksWorker)) / (float64(len(tasksPS)) + 0.001)
|
||||||
|
for i := range tasks {
|
||||||
|
if float64(idxPS)*factor <= float64(idxWorker) && idxPS < len(tasksPS) {
|
||||||
|
tasks[i] = tasksPS[idxPS]
|
||||||
|
idxPS++
|
||||||
|
} else if idxWorker < len(tasksWorker) {
|
||||||
|
tasks[i] = tasksWorker[idxWorker]
|
||||||
|
idxWorker++
|
||||||
|
} else {
|
||||||
|
tasks[i] = tasksPS[idxPS]
|
||||||
|
idxPS++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
allocation := fastBestFit(nodesT, tasks)
|
||||||
if allocation.Flags["valid"] {
|
if allocation.Flags["valid"] {
|
||||||
|
|
||||||
for range job.Tasks { //append would cause uncertain order
|
for range job.Tasks { //append would cause uncertain order
|
||||||
|
|||||||
Reference in New Issue
Block a user