2020-05-14 12:52:39 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
|
|
|
"github.com/MaxHalford/eaopt"
|
|
|
|
"time"
|
|
|
|
"strconv"
|
|
|
|
"math"
|
|
|
|
)
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
type Evaluator struct {
|
|
|
|
domains map[string]map[string]int
|
|
|
|
racks map[string]map[string]int
|
|
|
|
nodes map[string]map[string]int
|
|
|
|
upstreams map[string]string
|
|
|
|
cost float64
|
|
|
|
totalPS int
|
|
|
|
totalWorker int
|
|
|
|
|
|
|
|
costNetwork float64
|
|
|
|
|
|
|
|
factorNode float64
|
|
|
|
factorRack float64
|
|
|
|
factorDomain float64
|
|
|
|
}
|
|
|
|
|
|
|
|
func (eva *Evaluator) init(nodes []Node, tasks []Task) {
|
|
|
|
eva.domains = map[string]map[string]int{}
|
|
|
|
eva.racks = map[string]map[string]int{}
|
|
|
|
eva.nodes = map[string]map[string]int{}
|
|
|
|
eva.upstreams = map[string]string{}
|
|
|
|
eva.totalPS = 0
|
|
|
|
eva.totalWorker = 0
|
|
|
|
eva.factorNode = 1.0
|
|
|
|
eva.factorRack = 4.0
|
|
|
|
eva.factorDomain = 40.0
|
|
|
|
eva.cost = 0.0
|
|
|
|
eva.costNetwork = 0.0
|
|
|
|
}
|
|
|
|
|
|
|
|
func (eva *Evaluator) add(node Node, task Task) {
|
|
|
|
/* update node load cost */
|
|
|
|
|
|
|
|
/* update network cost */
|
|
|
|
if _, ok := eva.nodes[node.ClientID]; !ok {
|
|
|
|
eva.nodes[node.ClientID] = map[string]int{"PS": 0, "Worker": 0}
|
|
|
|
}
|
|
|
|
if _, ok := eva.racks[node.Rack]; !ok {
|
|
|
|
eva.racks[node.Rack] = map[string]int{"PS": 0, "Worker": 0}
|
|
|
|
}
|
|
|
|
if _, ok := eva.domains[node.Domain]; !ok {
|
|
|
|
eva.domains[node.Domain] = map[string]int{"PS": 0, "Worker": 0}
|
|
|
|
}
|
|
|
|
if task.IsPS {
|
|
|
|
eva.costNetwork += eva.factorNode * float64(eva.racks[node.Rack]["Worker"]-eva.nodes[node.ClientID]["Worker"])
|
|
|
|
eva.costNetwork += eva.factorRack * float64(eva.domains[node.Domain]["Worker"]-eva.racks[node.Rack]["Worker"])
|
|
|
|
eva.costNetwork += eva.factorDomain * float64(eva.totalWorker-eva.domains[node.Domain]["Worker"])
|
|
|
|
|
|
|
|
eva.nodes[node.ClientID]["PS"]++
|
|
|
|
eva.racks[node.Rack]["PS"]++
|
|
|
|
eva.domains[node.Domain]["PS"]++
|
|
|
|
eva.totalPS++
|
|
|
|
} else {
|
|
|
|
eva.costNetwork += eva.factorNode * float64(eva.racks[node.Rack]["PS"]-eva.nodes[node.ClientID]["PS"])
|
|
|
|
eva.costNetwork += eva.factorRack * float64(eva.domains[node.Domain]["PS"]-eva.racks[node.Rack]["PS"])
|
|
|
|
eva.costNetwork += eva.factorDomain * float64(eva.totalPS-eva.domains[node.Domain]["PS"])
|
|
|
|
|
|
|
|
eva.nodes[node.ClientID]["Worker"]++
|
|
|
|
eva.racks[node.Rack]["Worker"]++
|
|
|
|
eva.domains[node.Domain]["Worker"]++
|
|
|
|
eva.totalWorker++
|
|
|
|
}
|
|
|
|
eva.cost = eva.costNetwork
|
|
|
|
}
|
|
|
|
|
|
|
|
func (eva *Evaluator) remove(node Node, task Task) {
|
|
|
|
if task.IsPS {
|
|
|
|
eva.costNetwork -= eva.factorNode * float64(eva.racks[node.Rack]["Worker"]-eva.nodes[node.ClientID]["Worker"])
|
|
|
|
eva.costNetwork -= eva.factorRack * float64(eva.domains[node.Domain]["Worker"]-eva.racks[node.Rack]["Worker"])
|
|
|
|
eva.costNetwork -= eva.factorDomain * float64(eva.totalWorker-eva.domains[node.Domain]["Worker"])
|
|
|
|
|
|
|
|
eva.nodes[node.ClientID]["PS"]--
|
|
|
|
eva.racks[node.Rack]["PS"]--
|
|
|
|
eva.domains[node.Domain]["PS"]--
|
|
|
|
eva.totalPS--
|
|
|
|
} else {
|
|
|
|
eva.costNetwork -= eva.factorNode * float64(eva.racks[node.Rack]["PS"]-eva.nodes[node.ClientID]["PS"])
|
|
|
|
eva.costNetwork -= eva.factorRack * float64(eva.domains[node.Domain]["PS"]-eva.racks[node.Rack]["PS"])
|
|
|
|
eva.costNetwork -= eva.factorDomain * float64(eva.totalPS-eva.domains[node.Domain]["PS"])
|
|
|
|
|
|
|
|
//fmt.Println(eva.totalWorker, eva.domains[node.Domain])
|
|
|
|
|
|
|
|
eva.nodes[node.ClientID]["Worker"]--
|
|
|
|
eva.racks[node.Rack]["Worker"]--
|
|
|
|
eva.domains[node.Domain]["Worker"]--
|
|
|
|
eva.totalWorker--
|
|
|
|
}
|
|
|
|
eva.cost = eva.costNetwork
|
|
|
|
}
|
|
|
|
|
|
|
|
func (eva *Evaluator) calculate() float64 {
|
|
|
|
return eva.cost
|
|
|
|
}
|
|
|
|
|
2020-05-14 12:52:39 +00:00
|
|
|
var nodesMap map[string]Node
|
|
|
|
var tasksMap map[string]Task
|
|
|
|
|
|
|
|
type Node struct {
|
2020-05-18 00:52:33 +00:00
|
|
|
ClientID string `json:"id"`
|
|
|
|
Domain string `json:"domain"`
|
|
|
|
Rack string `json:"rack"`
|
|
|
|
Version float64 `json:"version"`
|
|
|
|
NumCPU int `json:"cpu_num"`
|
|
|
|
UtilCPU float64 `json:"cpu_load"`
|
|
|
|
MemTotal int `json:"mem_total"`
|
|
|
|
MemAvailable int `json:"mem_available"`
|
|
|
|
UsingBW float64 `json:"bw_using"`
|
|
|
|
TotalBW float64 `json:"bw_total"`
|
|
|
|
numberGPU int
|
|
|
|
//Status []GPUStatus `json:"status"`
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2020-05-23 13:06:31 +00:00
|
|
|
type Task3 struct {
|
2020-05-18 00:52:33 +00:00
|
|
|
Name string `json:"name"`
|
|
|
|
Image string `json:"image"`
|
|
|
|
Cmd string `json:"cmd"`
|
|
|
|
NumberCPU int `json:"cpu_number"`
|
|
|
|
Memory int `json:"memory"`
|
|
|
|
NumberGPU int `json:"gpu_number"`
|
|
|
|
MemoryGPU int `json:"gpu_memory"`
|
|
|
|
IsPS bool `json:"is_ps"`
|
|
|
|
ModelGPU string `json:"gpu_model"`
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// An valid allocation
|
|
|
|
type Allocation struct {
|
|
|
|
TasksOnNode map[string][]Task // tasks on nodes[id]
|
|
|
|
Nodes map[string]Node
|
2020-05-18 00:52:33 +00:00
|
|
|
NodeIDs []string
|
|
|
|
Flags map[string]bool
|
|
|
|
Evaluator Evaluator
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
func randomFit(allocation Allocation, task Task) (string, bool) {
|
|
|
|
flag := false
|
|
|
|
nodeID := ""
|
|
|
|
for nodeID = range allocation.Nodes {
|
|
|
|
if node, ok := allocation.Nodes[nodeID]; ok && len(allocation.TasksOnNode[nodeID]) < node.numberGPU {
|
|
|
|
flag = true
|
|
|
|
break
|
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
return nodeID, flag
|
|
|
|
}
|
|
|
|
|
|
|
|
func firstFit(allocation Allocation, task Task) (string, bool) {
|
|
|
|
flag := false
|
|
|
|
nodeID := ""
|
|
|
|
for _, nodeID = range allocation.NodeIDs {
|
|
|
|
if node, ok := allocation.Nodes[nodeID]; ok && len(allocation.TasksOnNode[nodeID]) < node.numberGPU {
|
|
|
|
flag = true
|
|
|
|
break
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
return nodeID, flag
|
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
func fastBestFit(nodes []Node, tasks []Task) Allocation {
|
|
|
|
eva := Evaluator{}
|
|
|
|
eva.init(nodes, tasks)
|
|
|
|
|
|
|
|
allocation := Allocation{Flags: map[string]bool{"valid": true}}
|
|
|
|
allocation.TasksOnNode = map[string][]Task{}
|
|
|
|
for _, task := range tasks {
|
|
|
|
minCost := math.MaxFloat64
|
|
|
|
nodeID := ""
|
|
|
|
for _, node := range nodes {
|
|
|
|
if _, ok := allocation.TasksOnNode[node.ClientID]; !ok {
|
|
|
|
allocation.TasksOnNode[node.ClientID] = []Task{}
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
if len(allocation.TasksOnNode[node.ClientID]) >= node.numberGPU {
|
2020-05-14 12:52:39 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
eva.add(node, task)
|
|
|
|
cost := eva.calculate()
|
|
|
|
eva.remove(node, task)
|
|
|
|
if cost < minCost || nodeID == "" {
|
|
|
|
minCost = cost
|
|
|
|
nodeID = node.ClientID
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-23 13:06:31 +00:00
|
|
|
//fmt.Println(cost)
|
2020-05-18 00:52:33 +00:00
|
|
|
}
|
|
|
|
if nodeID == "" {
|
|
|
|
allocation.Flags["valid"] = false
|
|
|
|
break
|
|
|
|
} else {
|
2020-05-23 13:06:31 +00:00
|
|
|
//fmt.Println(task, nodeID, allocation.TasksOnNode, minCost)
|
2020-05-18 00:52:33 +00:00
|
|
|
allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task)
|
|
|
|
eva.add(nodesMap[nodeID], task)
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
fmt.Println(eva.calculate())
|
|
|
|
return allocation
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
func bestFit(allocation Allocation, task Task) (string, bool) {
|
|
|
|
flag := false
|
|
|
|
nodeID := ""
|
|
|
|
minCost := math.MaxFloat64
|
|
|
|
for _, id := range allocation.NodeIDs {
|
|
|
|
if node, ok := allocation.Nodes[id]; ok && len(allocation.TasksOnNode[id]) < node.numberGPU {
|
|
|
|
/* add */
|
|
|
|
allocation.TasksOnNode[id] = append(allocation.TasksOnNode[id], task)
|
|
|
|
|
|
|
|
/* evaluate */
|
|
|
|
cost := evaluatue(allocation)
|
|
|
|
|
|
|
|
/* revert */
|
|
|
|
idx := -1
|
|
|
|
for i, task2 := range allocation.TasksOnNode[id] {
|
|
|
|
if task2.Name == task.Name {
|
|
|
|
idx = i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
copy(allocation.TasksOnNode[id][idx:], allocation.TasksOnNode[id][idx+1:])
|
|
|
|
allocation.TasksOnNode[id] = allocation.TasksOnNode[id][:len(allocation.TasksOnNode[id])-1]
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
if cost < minCost || !flag {
|
|
|
|
nodeID = id
|
|
|
|
minCost = cost
|
|
|
|
}
|
|
|
|
flag = true
|
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
return nodeID, flag
|
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
func evaluatue(allocation Allocation) float64 {
|
|
|
|
/* Calculate cost for network */
|
|
|
|
costNetwork := 0.0
|
|
|
|
domains := map[string]map[string]int{}
|
|
|
|
racks := map[string]map[string]int{}
|
|
|
|
upstreams := map[string]string{}
|
|
|
|
totalPS := 0
|
|
|
|
totalWorker := 0
|
|
|
|
|
|
|
|
taskToNode := map[string]string{}
|
|
|
|
for nodeID, tasks := range allocation.TasksOnNode {
|
|
|
|
numPS := 0
|
|
|
|
numWorker := 0
|
|
|
|
node := allocation.Nodes[nodeID]
|
|
|
|
for _, task := range tasks {
|
|
|
|
taskToNode[task.Name] = nodeID
|
|
|
|
|
|
|
|
if _, ok := domains[node.Domain]; !ok {
|
|
|
|
domains[node.Domain] = map[string]int{"PS": 0, "Worker": 0}
|
|
|
|
}
|
|
|
|
if _, ok := racks[node.Rack]; !ok {
|
|
|
|
racks[node.Rack] = map[string]int{"PS": 0, "Worker": 0}
|
|
|
|
}
|
|
|
|
|
|
|
|
if task.IsPS {
|
|
|
|
domains[node.Domain]["PS"]++
|
|
|
|
racks[node.Rack]["PS"]++
|
|
|
|
numPS++
|
|
|
|
totalPS++
|
|
|
|
} else {
|
|
|
|
domains[node.Domain]["Worker"]++
|
|
|
|
racks[node.Rack]["Worker"]++
|
|
|
|
numWorker++
|
|
|
|
totalWorker++
|
|
|
|
}
|
|
|
|
upstreams[node.Rack] = node.Domain
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
costNetwork -= float64(numPS * numWorker)
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
/* in the same domain */
|
|
|
|
for rackID, pair := range racks {
|
|
|
|
// in the same rack
|
|
|
|
costNetwork += float64(pair["PS"]*pair["Worker"]) * 1.0
|
|
|
|
// cross rack, but in the same domain
|
|
|
|
costNetwork += float64(pair["PS"]*(domains[upstreams[rackID]]["Worker"]-pair["Worker"])) * 4.0
|
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
/* across domain */
|
|
|
|
for _, pair := range domains {
|
|
|
|
costNetwork += float64(pair["PS"]*(totalWorker-pair["Worker"])) * 40.0
|
|
|
|
}
|
|
|
|
|
|
|
|
/* calculate cost for node fitness */
|
|
|
|
//cpu, memory, bw
|
|
|
|
costLB := 0.0
|
|
|
|
for nodeID, tasks := range allocation.TasksOnNode {
|
|
|
|
costCPU := 0.0
|
|
|
|
costMem := 0.0
|
|
|
|
costBW := 0.0
|
|
|
|
costGPU := 0.0
|
|
|
|
requestCPU := 0
|
|
|
|
requestMem := 0
|
|
|
|
requestBW := 0.0
|
|
|
|
requestGPU := 0
|
|
|
|
numberPS := 0
|
|
|
|
numberWorker := 0
|
|
|
|
for _, task := range tasks {
|
|
|
|
requestCPU += task.NumberCPU
|
|
|
|
requestMem += task.Memory
|
|
|
|
requestGPU += task.NumberGPU
|
|
|
|
if task.IsPS {
|
|
|
|
numberPS++
|
|
|
|
} else {
|
|
|
|
numberWorker++
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
requestBW = float64(numberPS*(totalWorker-numberWorker) + numberWorker*(totalPS-numberPS))
|
|
|
|
node := allocation.Nodes[nodeID]
|
|
|
|
costCPU += (float64(requestCPU) + node.UtilCPU) / float64(node.NumCPU) * 1.0
|
|
|
|
costMem += (float64(requestMem + (node.MemTotal - node.MemAvailable))) / float64(node.MemTotal) * 1.0
|
|
|
|
costBW += (float64(requestBW) + (node.TotalBW - node.UsingBW)) / node.TotalBW * 2.0
|
|
|
|
costGPU += (float64(requestGPU + node.numberGPU)) / float64(node.numberGPU) * 3.0
|
|
|
|
costLB += (costCPU + costMem + costBW + costGPU) / (1.0 + 1.0 + 2.0 + 3.0)
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
costLB /= float64(len(allocation.TasksOnNode))
|
|
|
|
costLB *= 100
|
|
|
|
//fmt.Println(costLB)
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
cost := 0.0*costLB + 1.0*costNetwork
|
|
|
|
return cost
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
/* Evaluate the allocation */
|
|
|
|
func (X Allocation) Evaluate() (float64, error) {
|
|
|
|
if !X.Flags["valid"] {
|
|
|
|
//fmt.Println("Invalid allocation")
|
|
|
|
return math.MaxFloat64, nil
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
|
|
|
|
costNetwork := evaluatue(X)
|
|
|
|
|
|
|
|
cost := costNetwork
|
|
|
|
//fmt.Println(taskToNode, cost, len(X.Nodes))
|
|
|
|
return float64(cost), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mutate a Vector by resampling each element from a normal distribution with
|
|
|
|
// probability 0.8.
|
|
|
|
func (X Allocation) Mutate(rng *rand.Rand) {
|
|
|
|
/* remove a node randomly */
|
|
|
|
// make sure rng.Intn != 0 && cnt >0
|
|
|
|
cnt := rng.Intn(1+len(X.Nodes)/100)%50 + 1
|
|
|
|
for i := 0; i < cnt; i++ {
|
|
|
|
if !X.Flags["valid"] {
|
|
|
|
//fmt.Println("Invalid allocation")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
//fmt.Println("Mutate")
|
|
|
|
//fmt.Println("Before", X)
|
|
|
|
|
|
|
|
var nodeIDs []string
|
|
|
|
for nodeID := range X.Nodes {
|
|
|
|
nodeIDs = append(nodeIDs, nodeID)
|
|
|
|
}
|
|
|
|
randIndex := rng.Intn(len(X.Nodes))
|
|
|
|
nodeID := nodeIDs[randIndex]
|
|
|
|
|
|
|
|
/* reschedule tasks on tgt node */
|
|
|
|
var tasks []Task
|
|
|
|
if _, ok := X.TasksOnNode[nodeID]; ok {
|
|
|
|
for _, task := range X.TasksOnNode[nodeID] {
|
|
|
|
tasks = append(tasks, task)
|
|
|
|
}
|
|
|
|
delete(X.TasksOnNode, nodeID)
|
|
|
|
}
|
|
|
|
delete(X.Nodes, nodeID)
|
|
|
|
|
|
|
|
//fmt.Println(tasks)
|
|
|
|
|
|
|
|
/* first-fit */
|
2020-05-14 12:52:39 +00:00
|
|
|
for _, task := range tasks {
|
2020-05-18 00:52:33 +00:00
|
|
|
if nodeID, ok := firstFit(X, task); ok {
|
|
|
|
X.TasksOnNode[nodeID] = append(X.TasksOnNode[nodeID], task)
|
|
|
|
} else {
|
|
|
|
X.Flags["valid"] = false
|
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
//fmt.Println("After", X)
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-23 13:06:31 +00:00
|
|
|
/* move tasks */
|
2020-05-18 00:52:33 +00:00
|
|
|
if !X.Flags["valid"] {
|
|
|
|
//fmt.Println("Invalid allocation")
|
|
|
|
return
|
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
var nodeIDs []string
|
2020-05-18 00:52:33 +00:00
|
|
|
for nodeID := range X.Nodes {
|
2020-05-14 12:52:39 +00:00
|
|
|
nodeIDs = append(nodeIDs, nodeID)
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
randIndex1 := rng.Intn(len(nodeIDs))
|
|
|
|
nodeID1 := nodeIDs[randIndex1]
|
2020-05-23 13:06:31 +00:00
|
|
|
if tasks, ok := X.TasksOnNode[nodeID1]; ok && len(tasks) > 0 {
|
|
|
|
idx := rng.Intn(len(tasks))
|
|
|
|
task := tasks[idx]
|
|
|
|
copy(X.TasksOnNode[nodeID1][idx:], X.TasksOnNode[nodeID1][idx+1:])
|
|
|
|
X.TasksOnNode[nodeID1] = X.TasksOnNode[nodeID1][:len(X.TasksOnNode[nodeID1])-1]
|
|
|
|
|
|
|
|
if nodeID, ok := firstFit(X, task); ok {
|
|
|
|
X.TasksOnNode[nodeID] = append(X.TasksOnNode[nodeID], task)
|
|
|
|
} else {
|
|
|
|
X.Flags["valid"] = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
// Crossover a Vector with another Vector by applying uniform crossover.
|
|
|
|
func (X Allocation) Crossover(Y eaopt.Genome, rng *rand.Rand) {
|
|
|
|
// make sure rng.Intn != 0 && cnt >0
|
|
|
|
cnt := rng.Intn(1+len(X.Nodes)/100)%10 + 1
|
|
|
|
for i := 0; i < cnt; i++ {
|
|
|
|
if !Y.(Allocation).Flags["valid"] || !X.Flags["valid"] {
|
|
|
|
return
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
//fmt.Println("Crossover")
|
|
|
|
taskToNode := map[string]string{}
|
|
|
|
for nodeID, tasks := range X.TasksOnNode {
|
|
|
|
for _, task := range tasks {
|
|
|
|
taskToNode[task.Name] = nodeID
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
|
|
|
|
var nodeIDs []string
|
|
|
|
for nodeID := range Y.(Allocation).Nodes {
|
|
|
|
nodeIDs = append(nodeIDs, nodeID)
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
|
|
|
|
//fmt.Println(nodeIDs, Y.(Allocation))
|
|
|
|
randIndex := rng.Intn(len(nodeIDs))
|
|
|
|
nodeID := nodeIDs[randIndex]
|
|
|
|
|
|
|
|
for _, task := range Y.(Allocation).TasksOnNode[nodeID] {
|
|
|
|
//fmt.Println(Y.(Allocation).TasksOnNode[nodeID])
|
|
|
|
idx := -1
|
|
|
|
nodeID2, ok := taskToNode[task.Name]
|
|
|
|
if !ok {
|
|
|
|
fmt.Println("Error", taskToNode, X.TasksOnNode, task.Name)
|
|
|
|
}
|
|
|
|
for i, task2 := range X.TasksOnNode[nodeID2] {
|
|
|
|
if task2.Name == task.Name {
|
|
|
|
idx = i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if idx == -1 {
|
|
|
|
fmt.Println("Error 2", taskToNode, X.TasksOnNode, task.Name)
|
|
|
|
}
|
|
|
|
//fmt.Println(X.TasksOnNode)
|
|
|
|
copy(X.TasksOnNode[nodeID2][idx:], X.TasksOnNode[nodeID2][idx+1:])
|
|
|
|
X.TasksOnNode[nodeID2] = X.TasksOnNode[nodeID2][:len(X.TasksOnNode[nodeID2])-1]
|
|
|
|
//fmt.Println(X.TasksOnNode)
|
|
|
|
}
|
|
|
|
/* reschedule tasks on tgt node */
|
|
|
|
var tasks []Task
|
|
|
|
if _, ok := X.TasksOnNode[nodeID]; ok {
|
|
|
|
for _, task := range X.TasksOnNode[nodeID] {
|
|
|
|
tasks = append(tasks, task)
|
|
|
|
}
|
|
|
|
delete(X.TasksOnNode, nodeID)
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
if _, ok := X.Nodes[nodeID]; ok {
|
|
|
|
delete(X.Nodes, nodeID)
|
|
|
|
}
|
|
|
|
X.Nodes[nodeID] = Y.(Allocation).Nodes[nodeID]
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
var newTasksOnNode []Task
|
|
|
|
for _, task := range Y.(Allocation).TasksOnNode[nodeID] {
|
|
|
|
newTasksOnNode = append(newTasksOnNode, task)
|
|
|
|
}
|
|
|
|
X.TasksOnNode[nodeID] = newTasksOnNode
|
2020-05-14 12:52:39 +00:00
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
/* first-fit */
|
|
|
|
for _, task := range tasks {
|
|
|
|
if nodeID, ok := firstFit(X, task); ok {
|
|
|
|
X.TasksOnNode[nodeID] = append(X.TasksOnNode[nodeID], task)
|
|
|
|
} else {
|
|
|
|
X.Flags["valid"] = false
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
//fmt.Println()
|
|
|
|
//fmt.Println("crossover", X.TasksOnNode)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Clone a Vector to produce a new one that points to a different slice.
|
|
|
|
func (X Allocation) Clone() eaopt.Genome {
|
2020-05-18 00:52:33 +00:00
|
|
|
if !X.Flags["valid"] {
|
2020-05-14 12:52:39 +00:00
|
|
|
//fmt.Println(X.Valid)
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
Y := Allocation{TasksOnNode: map[string][]Task{}, Nodes: map[string]Node{}, Flags: map[string]bool{"valid": X.Flags["valid"]}}
|
2020-05-14 12:52:39 +00:00
|
|
|
for id, node := range X.Nodes {
|
|
|
|
Y.Nodes[id] = node
|
2020-05-18 00:52:33 +00:00
|
|
|
Y.NodeIDs = append(Y.NodeIDs, node.ClientID)
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
for id, tasks := range X.TasksOnNode {
|
|
|
|
var t []Task
|
|
|
|
for _, task := range tasks {
|
|
|
|
t = append(t, task)
|
|
|
|
}
|
|
|
|
Y.TasksOnNode[id] = t
|
|
|
|
}
|
|
|
|
return Y
|
|
|
|
}
|
|
|
|
|
|
|
|
func VectorFactory(rng *rand.Rand) eaopt.Genome {
|
2020-05-18 00:52:33 +00:00
|
|
|
allocation := Allocation{TasksOnNode: map[string][]Task{}, Nodes: map[string]Node{}, Flags: map[string]bool{"valid": true}}
|
2020-05-14 12:52:39 +00:00
|
|
|
|
|
|
|
var nodes []Node
|
|
|
|
var tasks []Task
|
|
|
|
|
|
|
|
for _, node := range nodesMap {
|
|
|
|
nodes = append(nodes, node)
|
|
|
|
}
|
|
|
|
for _, task := range tasksMap {
|
|
|
|
tasks = append(tasks, task)
|
|
|
|
}
|
|
|
|
|
|
|
|
/* shuffle */
|
|
|
|
for n := len(nodes); n > 0; n-- {
|
|
|
|
randIndex := rng.Intn(n)
|
|
|
|
nodes[n-1], nodes[randIndex] = nodes[randIndex], nodes[n-1]
|
|
|
|
}
|
|
|
|
for n := len(tasks); n > 0; n-- {
|
|
|
|
randIndex := rng.Intn(n)
|
|
|
|
tasks[n-1], tasks[randIndex] = tasks[randIndex], tasks[n-1]
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pick nodes */
|
|
|
|
for _, node := range nodesMap {
|
|
|
|
allocation.Nodes[node.ClientID] = node
|
2020-05-18 00:52:33 +00:00
|
|
|
allocation.NodeIDs = append(allocation.NodeIDs, node.ClientID)
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
t := rng.Int() % 10
|
2020-05-23 13:06:31 +00:00
|
|
|
if t == -1 {
|
2020-05-18 00:52:33 +00:00
|
|
|
/* best-fit */
|
|
|
|
ts := time.Now()
|
|
|
|
|
|
|
|
/*
|
|
|
|
for _, task := range tasks {
|
|
|
|
if nodeID, ok := bestFit(allocation, task); ok {
|
|
|
|
allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task)
|
|
|
|
} else {
|
|
|
|
allocation.Flags["valid"] = false
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
allocation.TasksOnNode = fastBestFit(nodes, tasks).TasksOnNode
|
|
|
|
fmt.Println(time.Since(ts))
|
|
|
|
//fmt.Println("Best Fit")
|
|
|
|
} else if t%2 == 0 {
|
|
|
|
/* first-fit */
|
|
|
|
for _, task := range tasks {
|
|
|
|
if nodeID, ok := randomFit(allocation, task); ok {
|
|
|
|
allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task)
|
|
|
|
} else {
|
|
|
|
allocation.Flags["valid"] = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* random-fit */
|
|
|
|
for _, task := range tasks {
|
|
|
|
if nodeID, ok := randomFit(allocation, task); ok {
|
|
|
|
allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task)
|
|
|
|
} else {
|
|
|
|
allocation.Flags["valid"] = false
|
|
|
|
}
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
//fmt.Println(evaluatue(allocation))
|
2020-05-14 12:52:39 +00:00
|
|
|
//fmt.Println(allocation)
|
|
|
|
return allocation
|
|
|
|
}
|
|
|
|
|
2020-05-23 13:06:31 +00:00
|
|
|
func main3() {
|
2020-05-18 00:52:33 +00:00
|
|
|
numTask := 5
|
2020-05-14 12:52:39 +00:00
|
|
|
|
|
|
|
nodesMap = map[string]Node{}
|
|
|
|
tasksMap = map[string]Task{}
|
|
|
|
|
|
|
|
for i := 0; i < numTask*3; i++ {
|
2020-05-18 00:52:33 +00:00
|
|
|
node := Node{ClientID: strconv.Itoa(i), Rack: strconv.Itoa(i % 40), Domain: strconv.Itoa(i % 4)}
|
|
|
|
node.NumCPU = 24
|
|
|
|
node.MemTotal = 188
|
|
|
|
node.TotalBW = 100
|
2020-05-23 13:06:31 +00:00
|
|
|
node.numberGPU = rand.Intn(3) + 1
|
2020-05-18 00:52:33 +00:00
|
|
|
nodesMap[strconv.Itoa(i)] = node
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
for i := 0; i < numTask; i++ {
|
|
|
|
isPS := false
|
2020-05-23 13:06:31 +00:00
|
|
|
if i>= 3 {
|
2020-05-14 12:52:39 +00:00
|
|
|
isPS = true
|
|
|
|
}
|
2020-05-18 00:52:33 +00:00
|
|
|
task := Task{Name: strconv.Itoa(i), IsPS: isPS}
|
|
|
|
task.Memory = 4
|
|
|
|
task.NumberCPU = 2
|
|
|
|
task.NumberGPU = 1
|
|
|
|
tasksMap[strconv.Itoa(i)] = task
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
var nodes []Node
|
|
|
|
var tasks []Task
|
|
|
|
|
|
|
|
for _, node := range nodesMap {
|
|
|
|
nodes = append(nodes, node)
|
|
|
|
}
|
|
|
|
for _, task := range tasksMap {
|
|
|
|
tasks = append(tasks, task)
|
|
|
|
}
|
|
|
|
s := time.Now()
|
2020-05-23 13:06:31 +00:00
|
|
|
allocation := fastBestFit(nodes, tasks)
|
2020-05-18 00:52:33 +00:00
|
|
|
fmt.Println(time.Since(s))
|
|
|
|
|
2020-05-14 12:52:39 +00:00
|
|
|
// Instantiate a GA with a GAConfig
|
|
|
|
var ga, err = eaopt.NewDefaultGAConfig().NewGA()
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set the number of generations to run for
|
2020-05-18 00:52:33 +00:00
|
|
|
ga.NGenerations = math.MaxInt32
|
2020-05-14 12:52:39 +00:00
|
|
|
ga.NPops = 1
|
2020-05-18 00:52:33 +00:00
|
|
|
ga.PopSize = 30 + uint(numTask/2)
|
2020-05-14 12:52:39 +00:00
|
|
|
|
|
|
|
// Add a custom print function to track progress
|
|
|
|
ga.Callback = func(ga *eaopt.GA) {
|
|
|
|
fmt.Printf("Best fitness at generation %d: %f\n", ga.Generations, ga.HallOfFame[0].Fitness)
|
|
|
|
}
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
bestFitness := math.MaxFloat64
|
2020-05-14 12:52:39 +00:00
|
|
|
count := 0
|
|
|
|
|
2020-05-18 00:52:33 +00:00
|
|
|
ts := time.Now()
|
|
|
|
|
2020-05-14 12:52:39 +00:00
|
|
|
ga.EarlyStop = func(ga *eaopt.GA) bool {
|
|
|
|
gap := math.Abs(ga.HallOfFame[0].Fitness - bestFitness)
|
2020-05-23 13:06:31 +00:00
|
|
|
if gap <= 0.000001 || ga.HallOfFame[0].Fitness >= bestFitness {
|
|
|
|
if count >= 50 || time.Since(ts) > time.Second*30 {
|
2020-05-14 12:52:39 +00:00
|
|
|
fmt.Println("Early Stop")
|
|
|
|
return true
|
|
|
|
} else {
|
|
|
|
count++
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
bestFitness = ga.HallOfFame[0].Fitness
|
|
|
|
count = 1
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the minimum
|
|
|
|
err = ga.Minimize(VectorFactory)
|
|
|
|
fmt.Println(time.Since(ts))
|
2020-05-23 13:06:31 +00:00
|
|
|
fmt.Println(ga.HallOfFame[0].Genome.(Allocation).TasksOnNode)
|
2020-05-18 00:52:33 +00:00
|
|
|
//fmt.Println(ga.HallOfFame[0].Genome.(Allocation).Nodes)
|
2020-05-14 12:52:39 +00:00
|
|
|
if err != nil {
|
|
|
|
fmt.Println(err)
|
|
|
|
return
|
|
|
|
}
|
2020-05-23 13:06:31 +00:00
|
|
|
|
|
|
|
fmt.Println(allocation)
|
2020-05-14 12:52:39 +00:00
|
|
|
}
|