1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-06-08 23:01:55 +00:00
YAO-scheduler/src/ga.go

487 lines
12 KiB
Go
Raw Normal View History

2020-05-14 12:52:39 +00:00
package main
import (
"math/rand"
"github.com/MaxHalford/eaopt"
"time"
"strconv"
"math"
2020-05-25 03:35:44 +00:00
log "github.com/sirupsen/logrus"
2020-05-14 12:52:39 +00:00
)
2020-05-25 03:35:44 +00:00
var nodesMap map[string]NodeStatus
2020-05-14 12:52:39 +00:00
var tasksMap map[string]Task
2020-05-25 03:35:44 +00:00
// A resource allocation
2020-05-14 12:52:39 +00:00
type Allocation struct {
TasksOnNode map[string][]Task // tasks on nodes[id]
2020-05-25 03:35:44 +00:00
Nodes map[string]NodeStatus
2020-05-18 00:52:33 +00:00
NodeIDs []string
Flags map[string]bool
Evaluator Evaluator
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
func randomFit(allocation Allocation, task Task) (string, bool) {
flag := false
nodeID := ""
for nodeID = range allocation.Nodes {
2020-05-25 03:35:44 +00:00
numberGPU := 0
for _, gpu := range allocation.Nodes[nodeID].Status {
if gpu.MemoryAllocated == 0 {
numberGPU += 0
}
}
if _, ok := allocation.Nodes[nodeID]; ok && len(allocation.TasksOnNode[nodeID]) < numberGPU {
2020-05-18 00:52:33 +00:00
flag = true
break
}
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
return nodeID, flag
}
func firstFit(allocation Allocation, task Task) (string, bool) {
flag := false
nodeID := ""
for _, nodeID = range allocation.NodeIDs {
2020-05-25 03:35:44 +00:00
numberGPU := 0
for _, gpu := range allocation.Nodes[nodeID].Status {
if gpu.MemoryAllocated == 0 {
numberGPU += 0
}
}
if _, ok := allocation.Nodes[nodeID]; ok && len(allocation.TasksOnNode[nodeID]) < numberGPU {
2020-05-18 00:52:33 +00:00
flag = true
break
2020-05-14 12:52:39 +00:00
}
}
2020-05-18 00:52:33 +00:00
return nodeID, flag
}
2020-05-14 12:52:39 +00:00
2020-05-25 03:35:44 +00:00
func fastBestFit(nodes []NodeStatus, tasks []Task) Allocation {
2020-05-18 00:52:33 +00:00
eva := Evaluator{}
eva.init(nodes, tasks)
allocation := Allocation{Flags: map[string]bool{"valid": true}}
allocation.TasksOnNode = map[string][]Task{}
for _, task := range tasks {
minCost := math.MaxFloat64
nodeID := ""
for _, node := range nodes {
if _, ok := allocation.TasksOnNode[node.ClientID]; !ok {
allocation.TasksOnNode[node.ClientID] = []Task{}
2020-05-14 12:52:39 +00:00
}
2020-05-25 03:35:44 +00:00
numberGPU := 0
for _, gpu := range allocation.Nodes[nodeID].Status {
if gpu.MemoryAllocated == 0 {
numberGPU += 0
}
}
if len(allocation.TasksOnNode[node.ClientID]) >= numberGPU {
2020-05-14 12:52:39 +00:00
continue
}
2020-05-18 00:52:33 +00:00
eva.add(node, task)
cost := eva.calculate()
eva.remove(node, task)
if cost < minCost || nodeID == "" {
minCost = cost
nodeID = node.ClientID
2020-05-14 12:52:39 +00:00
}
2020-05-23 13:06:31 +00:00
//fmt.Println(cost)
2020-05-18 00:52:33 +00:00
}
if nodeID == "" {
allocation.Flags["valid"] = false
break
} else {
2020-05-23 13:06:31 +00:00
//fmt.Println(task, nodeID, allocation.TasksOnNode, minCost)
2020-05-18 00:52:33 +00:00
allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task)
eva.add(nodesMap[nodeID], task)
2020-05-14 12:52:39 +00:00
}
}
2020-05-25 03:35:44 +00:00
log.Println(eva.calculate())
2020-05-18 00:52:33 +00:00
return allocation
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
func bestFit(allocation Allocation, task Task) (string, bool) {
flag := false
nodeID := ""
minCost := math.MaxFloat64
for _, id := range allocation.NodeIDs {
2020-05-25 03:35:44 +00:00
numberGPU := 0
for _, gpu := range allocation.Nodes[id].Status {
if gpu.MemoryAllocated == 0 {
numberGPU += 0
}
}
if _, ok := allocation.Nodes[id]; ok && len(allocation.TasksOnNode[id]) < numberGPU {
2020-05-18 00:52:33 +00:00
/* add */
allocation.TasksOnNode[id] = append(allocation.TasksOnNode[id], task)
/* evaluate */
2020-05-25 03:35:44 +00:00
cost := evaluate(allocation)
2020-05-18 00:52:33 +00:00
/* revert */
idx := -1
for i, task2 := range allocation.TasksOnNode[id] {
if task2.Name == task.Name {
idx = i
}
}
copy(allocation.TasksOnNode[id][idx:], allocation.TasksOnNode[id][idx+1:])
allocation.TasksOnNode[id] = allocation.TasksOnNode[id][:len(allocation.TasksOnNode[id])-1]
2020-05-14 12:52:39 +00:00
2020-05-18 00:52:33 +00:00
if cost < minCost || !flag {
nodeID = id
minCost = cost
}
flag = true
}
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
return nodeID, flag
}
2020-05-14 12:52:39 +00:00
2020-05-18 00:52:33 +00:00
/* Evaluate the allocation */
func (X Allocation) Evaluate() (float64, error) {
if !X.Flags["valid"] {
//fmt.Println("Invalid allocation")
return math.MaxFloat64, nil
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
2020-05-25 03:35:44 +00:00
costNetwork := evaluate(X)
2020-05-18 00:52:33 +00:00
cost := costNetwork
//fmt.Println(taskToNode, cost, len(X.Nodes))
return float64(cost), nil
}
// Mutate a Vector by resampling each element from a normal distribution with
// probability 0.8.
func (X Allocation) Mutate(rng *rand.Rand) {
/* remove a node randomly */
// make sure rng.Intn != 0 && cnt >0
cnt := rng.Intn(1+len(X.Nodes)/100)%50 + 1
for i := 0; i < cnt; i++ {
if !X.Flags["valid"] {
//fmt.Println("Invalid allocation")
return
}
//fmt.Println("Mutate")
//fmt.Println("Before", X)
var nodeIDs []string
for nodeID := range X.Nodes {
nodeIDs = append(nodeIDs, nodeID)
}
randIndex := rng.Intn(len(X.Nodes))
nodeID := nodeIDs[randIndex]
/* reschedule tasks on tgt node */
var tasks []Task
if _, ok := X.TasksOnNode[nodeID]; ok {
for _, task := range X.TasksOnNode[nodeID] {
tasks = append(tasks, task)
}
delete(X.TasksOnNode, nodeID)
}
delete(X.Nodes, nodeID)
//fmt.Println(tasks)
/* first-fit */
2020-05-14 12:52:39 +00:00
for _, task := range tasks {
2020-05-18 00:52:33 +00:00
if nodeID, ok := firstFit(X, task); ok {
X.TasksOnNode[nodeID] = append(X.TasksOnNode[nodeID], task)
} else {
X.Flags["valid"] = false
}
2020-05-14 12:52:39 +00:00
}
}
2020-05-18 00:52:33 +00:00
//fmt.Println("After", X)
2020-05-14 12:52:39 +00:00
2020-05-23 13:06:31 +00:00
/* move tasks */
2020-05-18 00:52:33 +00:00
if !X.Flags["valid"] {
//fmt.Println("Invalid allocation")
return
}
2020-05-14 12:52:39 +00:00
var nodeIDs []string
2020-05-18 00:52:33 +00:00
for nodeID := range X.Nodes {
2020-05-14 12:52:39 +00:00
nodeIDs = append(nodeIDs, nodeID)
}
2020-05-18 00:52:33 +00:00
randIndex1 := rng.Intn(len(nodeIDs))
nodeID1 := nodeIDs[randIndex1]
2020-05-23 13:06:31 +00:00
if tasks, ok := X.TasksOnNode[nodeID1]; ok && len(tasks) > 0 {
idx := rng.Intn(len(tasks))
task := tasks[idx]
copy(X.TasksOnNode[nodeID1][idx:], X.TasksOnNode[nodeID1][idx+1:])
X.TasksOnNode[nodeID1] = X.TasksOnNode[nodeID1][:len(X.TasksOnNode[nodeID1])-1]
if nodeID, ok := firstFit(X, task); ok {
X.TasksOnNode[nodeID] = append(X.TasksOnNode[nodeID], task)
} else {
X.Flags["valid"] = false
}
}
2020-05-18 00:52:33 +00:00
}
2020-05-14 12:52:39 +00:00
2020-05-18 00:52:33 +00:00
// Crossover a Vector with another Vector by applying uniform crossover.
func (X Allocation) Crossover(Y eaopt.Genome, rng *rand.Rand) {
// make sure rng.Intn != 0 && cnt >0
cnt := rng.Intn(1+len(X.Nodes)/100)%10 + 1
for i := 0; i < cnt; i++ {
if !Y.(Allocation).Flags["valid"] || !X.Flags["valid"] {
return
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
//fmt.Println("Crossover")
taskToNode := map[string]string{}
for nodeID, tasks := range X.TasksOnNode {
for _, task := range tasks {
taskToNode[task.Name] = nodeID
2020-05-14 12:52:39 +00:00
}
}
2020-05-18 00:52:33 +00:00
var nodeIDs []string
for nodeID := range Y.(Allocation).Nodes {
nodeIDs = append(nodeIDs, nodeID)
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
//fmt.Println(nodeIDs, Y.(Allocation))
randIndex := rng.Intn(len(nodeIDs))
nodeID := nodeIDs[randIndex]
for _, task := range Y.(Allocation).TasksOnNode[nodeID] {
//fmt.Println(Y.(Allocation).TasksOnNode[nodeID])
idx := -1
nodeID2, ok := taskToNode[task.Name]
if !ok {
2020-05-25 03:35:44 +00:00
log.Println("Error", taskToNode, X.TasksOnNode, task.Name)
2020-05-18 00:52:33 +00:00
}
for i, task2 := range X.TasksOnNode[nodeID2] {
if task2.Name == task.Name {
idx = i
}
}
if idx == -1 {
2020-05-25 03:35:44 +00:00
log.Println("Error 2", taskToNode, X.TasksOnNode, task.Name)
2020-05-18 00:52:33 +00:00
}
//fmt.Println(X.TasksOnNode)
copy(X.TasksOnNode[nodeID2][idx:], X.TasksOnNode[nodeID2][idx+1:])
X.TasksOnNode[nodeID2] = X.TasksOnNode[nodeID2][:len(X.TasksOnNode[nodeID2])-1]
//fmt.Println(X.TasksOnNode)
}
/* reschedule tasks on tgt node */
var tasks []Task
if _, ok := X.TasksOnNode[nodeID]; ok {
for _, task := range X.TasksOnNode[nodeID] {
tasks = append(tasks, task)
}
delete(X.TasksOnNode, nodeID)
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
if _, ok := X.Nodes[nodeID]; ok {
delete(X.Nodes, nodeID)
}
X.Nodes[nodeID] = Y.(Allocation).Nodes[nodeID]
2020-05-14 12:52:39 +00:00
2020-05-18 00:52:33 +00:00
var newTasksOnNode []Task
for _, task := range Y.(Allocation).TasksOnNode[nodeID] {
newTasksOnNode = append(newTasksOnNode, task)
}
X.TasksOnNode[nodeID] = newTasksOnNode
2020-05-14 12:52:39 +00:00
2020-05-18 00:52:33 +00:00
/* first-fit */
for _, task := range tasks {
if nodeID, ok := firstFit(X, task); ok {
X.TasksOnNode[nodeID] = append(X.TasksOnNode[nodeID], task)
} else {
X.Flags["valid"] = false
2020-05-14 12:52:39 +00:00
}
}
}
//fmt.Println()
//fmt.Println("crossover", X.TasksOnNode)
}
// Clone a Vector to produce a new one that points to a different slice.
func (X Allocation) Clone() eaopt.Genome {
2020-05-18 00:52:33 +00:00
if !X.Flags["valid"] {
2020-05-14 12:52:39 +00:00
//fmt.Println(X.Valid)
}
2020-05-25 03:35:44 +00:00
Y := Allocation{TasksOnNode: map[string][]Task{}, Nodes: map[string]NodeStatus{}, Flags: map[string]bool{"valid": X.Flags["valid"]}}
2020-05-14 12:52:39 +00:00
for id, node := range X.Nodes {
Y.Nodes[id] = node
2020-05-18 00:52:33 +00:00
Y.NodeIDs = append(Y.NodeIDs, node.ClientID)
2020-05-14 12:52:39 +00:00
}
for id, tasks := range X.TasksOnNode {
var t []Task
for _, task := range tasks {
t = append(t, task)
}
Y.TasksOnNode[id] = t
}
return Y
}
func VectorFactory(rng *rand.Rand) eaopt.Genome {
2020-05-25 03:35:44 +00:00
allocation := Allocation{TasksOnNode: map[string][]Task{}, Nodes: map[string]NodeStatus{}, Flags: map[string]bool{"valid": true}}
2020-05-14 12:52:39 +00:00
2020-05-25 03:35:44 +00:00
var nodes []NodeStatus
2020-05-14 12:52:39 +00:00
var tasks []Task
for _, node := range nodesMap {
nodes = append(nodes, node)
}
for _, task := range tasksMap {
tasks = append(tasks, task)
}
/* shuffle */
for n := len(nodes); n > 0; n-- {
randIndex := rng.Intn(n)
nodes[n-1], nodes[randIndex] = nodes[randIndex], nodes[n-1]
}
for n := len(tasks); n > 0; n-- {
randIndex := rng.Intn(n)
tasks[n-1], tasks[randIndex] = tasks[randIndex], tasks[n-1]
}
/* pick nodes */
for _, node := range nodesMap {
allocation.Nodes[node.ClientID] = node
2020-05-18 00:52:33 +00:00
allocation.NodeIDs = append(allocation.NodeIDs, node.ClientID)
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
t := rng.Int() % 10
2020-05-23 13:06:31 +00:00
if t == -1 {
2020-05-18 00:52:33 +00:00
/* best-fit */
ts := time.Now()
/*
for _, task := range tasks {
if nodeID, ok := bestFit(allocation, task); ok {
allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task)
} else {
allocation.Flags["valid"] = false
2020-05-14 12:52:39 +00:00
}
}
2020-05-18 00:52:33 +00:00
*/
allocation.TasksOnNode = fastBestFit(nodes, tasks).TasksOnNode
2020-05-25 03:35:44 +00:00
log.Println(time.Since(ts))
2020-05-18 00:52:33 +00:00
//fmt.Println("Best Fit")
} else if t%2 == 0 {
/* first-fit */
for _, task := range tasks {
if nodeID, ok := randomFit(allocation, task); ok {
allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task)
} else {
allocation.Flags["valid"] = false
}
}
} else {
/* random-fit */
for _, task := range tasks {
if nodeID, ok := randomFit(allocation, task); ok {
allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task)
} else {
allocation.Flags["valid"] = false
}
2020-05-14 12:52:39 +00:00
}
}
2020-05-18 00:52:33 +00:00
//fmt.Println(evaluatue(allocation))
2020-05-14 12:52:39 +00:00
//fmt.Println(allocation)
return allocation
}
2020-05-25 03:35:44 +00:00
func testGA() {
2020-05-23 18:22:05 +00:00
numTask := 20
2020-05-14 12:52:39 +00:00
2020-05-25 03:35:44 +00:00
nodesMap = map[string]NodeStatus{}
2020-05-14 12:52:39 +00:00
tasksMap = map[string]Task{}
for i := 0; i < numTask*3; i++ {
2020-05-25 03:35:44 +00:00
node := NodeStatus{ClientID: strconv.Itoa(i), Rack: strconv.Itoa(i % 40), Domain: strconv.Itoa(i % 4)}
2020-05-18 00:52:33 +00:00
node.NumCPU = 24
node.MemTotal = 188
node.TotalBW = 100
2020-05-25 03:35:44 +00:00
cnt := rand.Intn(3) + 1
for i := 0; i < cnt; i++ {
node.Status = append(node.Status, GPUStatus{MemoryTotal: 11439, MemoryAllocated: 0, UUID: node.ClientID + strconv.Itoa(i)})
}
2020-05-18 00:52:33 +00:00
nodesMap[strconv.Itoa(i)] = node
2020-05-14 12:52:39 +00:00
}
for i := 0; i < numTask; i++ {
isPS := false
2020-05-25 03:35:44 +00:00
if i >= 3 {
2020-05-14 12:52:39 +00:00
isPS = true
}
2020-05-18 00:52:33 +00:00
task := Task{Name: strconv.Itoa(i), IsPS: isPS}
task.Memory = 4
task.NumberCPU = 2
task.NumberGPU = 1
tasksMap[strconv.Itoa(i)] = task
2020-05-14 12:52:39 +00:00
}
2020-05-25 03:35:44 +00:00
var nodes []NodeStatus
2020-05-18 00:52:33 +00:00
var tasks []Task
for _, node := range nodesMap {
nodes = append(nodes, node)
}
for _, task := range tasksMap {
tasks = append(tasks, task)
}
s := time.Now()
2020-05-23 13:06:31 +00:00
allocation := fastBestFit(nodes, tasks)
2020-05-25 03:35:44 +00:00
log.Println(time.Since(s))
2020-05-18 00:52:33 +00:00
2020-05-14 12:52:39 +00:00
// Instantiate a GA with a GAConfig
var ga, err = eaopt.NewDefaultGAConfig().NewGA()
if err != nil {
2020-05-25 03:35:44 +00:00
log.Println(err)
2020-05-14 12:52:39 +00:00
return
}
// Set the number of generations to run for
2020-05-18 00:52:33 +00:00
ga.NGenerations = math.MaxInt32
2020-05-14 12:52:39 +00:00
ga.NPops = 1
2020-05-18 00:52:33 +00:00
ga.PopSize = 30 + uint(numTask/2)
2020-05-14 12:52:39 +00:00
// Add a custom print function to track progress
ga.Callback = func(ga *eaopt.GA) {
2020-05-25 03:35:44 +00:00
log.Printf("Best fitness at generation %d: %f\n", ga.Generations, ga.HallOfFame[0].Fitness)
2020-05-14 12:52:39 +00:00
}
2020-05-18 00:52:33 +00:00
bestFitness := math.MaxFloat64
2020-05-14 12:52:39 +00:00
count := 0
2020-05-18 00:52:33 +00:00
ts := time.Now()
2020-05-14 12:52:39 +00:00
ga.EarlyStop = func(ga *eaopt.GA) bool {
gap := math.Abs(ga.HallOfFame[0].Fitness - bestFitness)
2020-05-23 13:06:31 +00:00
if gap <= 0.000001 || ga.HallOfFame[0].Fitness >= bestFitness {
2020-05-23 18:22:05 +00:00
if count >= 30 || time.Since(ts) > time.Second*30 {
2020-05-25 03:35:44 +00:00
log.Println("Early Stop")
2020-05-14 12:52:39 +00:00
return true
} else {
count++
}
} else {
bestFitness = ga.HallOfFame[0].Fitness
count = 1
}
return false
}
// Find the minimum
err = ga.Minimize(VectorFactory)
2020-05-25 03:35:44 +00:00
log.Println(time.Since(ts))
log.Println(ga.HallOfFame[0].Genome.(Allocation).TasksOnNode)
2020-05-18 00:52:33 +00:00
//fmt.Println(ga.HallOfFame[0].Genome.(Allocation).Nodes)
2020-05-14 12:52:39 +00:00
if err != nil {
2020-05-25 03:35:44 +00:00
log.Println(err)
2020-05-14 12:52:39 +00:00
return
}
2020-05-23 13:06:31 +00:00
2020-05-25 03:35:44 +00:00
log.Println(allocation)
2020-05-14 12:52:39 +00:00
}