diff --git a/src/evaluator.go b/src/evaluator.go index 5c151ca..065229c 100644 --- a/src/evaluator.go +++ b/src/evaluator.go @@ -14,6 +14,8 @@ type Evaluator struct { factorNode float64 factorRack float64 factorDomain float64 + + costLoad float64 } func (eva *Evaluator) init(nodes []NodeStatus, tasks []Task) { @@ -28,6 +30,7 @@ func (eva *Evaluator) init(nodes []NodeStatus, tasks []Task) { eva.factorDomain = 40.0 eva.cost = 0.0 eva.costNetwork = 0.0 + eva.costLoad = 0.0 } func (eva *Evaluator) add(node NodeStatus, task Task) { @@ -63,6 +66,20 @@ func (eva *Evaluator) add(node NodeStatus, task Task) { eva.totalWorker++ } eva.cost = eva.costNetwork + + if task.IsPS { + //eva.costLoad += 1 + } else { + //eva.costLoad += 0.5 + } + numberGPU := 1 + for _, gpu := range node.Status { + if gpu.MemoryAllocated != 0 { + numberGPU += 1 + } + } + eva.costLoad += float64(numberGPU) / float64(len(node.Status)) + } func (eva *Evaluator) remove(node NodeStatus, task Task) { @@ -88,10 +105,23 @@ func (eva *Evaluator) remove(node NodeStatus, task Task) { eva.totalWorker-- } eva.cost = eva.costNetwork + + if task.IsPS { + //eva.costLoad -= 1 + } else { + //eva.costLoad -= 0.5 + } + numberGPU := 1 + for _, gpu := range node.Status { + if gpu.MemoryAllocated != 0 { + numberGPU += 1 + } + } + eva.costLoad -= float64(numberGPU) / float64(len(node.Status)) } func (eva *Evaluator) calculate() float64 { - return eva.cost + return eva.cost + eva.costLoad/float64(eva.totalPS+eva.totalWorker) } func evaluate(allocation Allocation) float64 { @@ -189,6 +219,7 @@ func evaluate(allocation Allocation) float64 { costLB *= 100 //fmt.Println(costLB) - cost := 0.0*costLB + 1.0*costNetwork + cost := costNetwork + //cost := 0.0*costLB + 1.0*costNetwork return cost } diff --git a/src/ga.go b/src/ga.go index 472b700..3d4db1b 100644 --- a/src/ga.go +++ b/src/ga.go @@ -3,14 +3,10 @@ package main import ( "math/rand" "github.com/MaxHalford/eaopt" - "time" - "math" + "math" log "github.com/sirupsen/logrus" ) -var nodesMap map[string]NodeStatus -var tasksMap map[string]Task - // A resource allocation type Allocation struct { TasksOnNode map[string][]Task // tasks on nodes[id] @@ -18,6 +14,7 @@ type Allocation struct { NodeIDs []string Flags map[string]bool Evaluator Evaluator + Tasks []Task } func randomFit(allocation Allocation, task Task) (string, bool) { @@ -27,10 +24,10 @@ func randomFit(allocation Allocation, task Task) (string, bool) { numberGPU := 0 for _, gpu := range allocation.Nodes[nodeID].Status { if gpu.MemoryAllocated == 0 { - numberGPU += 0 + numberGPU += 1 } } - if _, ok := allocation.Nodes[nodeID]; ok && len(allocation.TasksOnNode[nodeID]) < numberGPU { + if task.NumberGPU <= numberGPU { flag = true break } @@ -42,13 +39,16 @@ func firstFit(allocation Allocation, task Task) (string, bool) { flag := false nodeID := "" for _, nodeID = range allocation.NodeIDs { + if _, ok := allocation.Nodes[nodeID]; !ok { + continue + } numberGPU := 0 for _, gpu := range allocation.Nodes[nodeID].Status { if gpu.MemoryAllocated == 0 { - numberGPU += 0 + numberGPU += 1 } } - if _, ok := allocation.Nodes[nodeID]; ok && len(allocation.TasksOnNode[nodeID]) < numberGPU { + if task.NumberGPU <= numberGPU { flag = true break } @@ -57,6 +57,8 @@ func firstFit(allocation Allocation, task Task) (string, bool) { } func fastBestFit(nodes []NodeStatus, tasks []Task) Allocation { + //log.Info(nodes) + //log.Info(tasks) eva := Evaluator{} eva.init(nodes, tasks) @@ -64,90 +66,76 @@ func fastBestFit(nodes []NodeStatus, tasks []Task) Allocation { allocation.TasksOnNode = map[string][]Task{} for _, task := range tasks { minCost := math.MaxFloat64 - nodeID := "" - for _, node := range nodes { + var best *NodeStatus + for i, node := range nodes { if _, ok := allocation.TasksOnNode[node.ClientID]; !ok { allocation.TasksOnNode[node.ClientID] = []Task{} } numberGPU := 0 - for _, gpu := range allocation.Nodes[nodeID].Status { + for _, gpu := range node.Status { if gpu.MemoryAllocated == 0 { - numberGPU += 0 + numberGPU += 1 } } - if len(allocation.TasksOnNode[node.ClientID]) >= numberGPU { + if task.NumberGPU > numberGPU { continue } eva.add(node, task) cost := eva.calculate() eva.remove(node, task) - if cost < minCost || nodeID == "" { + //log.Info(node, cost) + if cost < minCost || best == nil { minCost = cost - nodeID = node.ClientID + best = &nodes[i] } - //fmt.Println(cost) } - if nodeID == "" { + log.Info(task, " choose ", best.ClientID) + if best == nil { allocation.Flags["valid"] = false break } else { //fmt.Println(task, nodeID, allocation.TasksOnNode, minCost) - allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task) - eva.add(nodesMap[nodeID], task) - } - } - log.Println(eva.calculate()) - return allocation -} - -func bestFit(allocation Allocation, task Task) (string, bool) { - flag := false - nodeID := "" - minCost := math.MaxFloat64 - for _, id := range allocation.NodeIDs { - numberGPU := 0 - for _, gpu := range allocation.Nodes[id].Status { - if gpu.MemoryAllocated == 0 { - numberGPU += 0 - } - } - if _, ok := allocation.Nodes[id]; ok && len(allocation.TasksOnNode[id]) < numberGPU { - /* add */ - allocation.TasksOnNode[id] = append(allocation.TasksOnNode[id], task) - - /* evaluate */ - cost := evaluate(allocation) - - /* revert */ - idx := -1 - for i, task2 := range allocation.TasksOnNode[id] { - if task2.Name == task.Name { - idx = i + allocation.TasksOnNode[best.ClientID] = append(allocation.TasksOnNode[best.ClientID], task) + eva.add(*best, task) + for i := range best.Status { + //allocate more than 1 + if best.Status[i].MemoryAllocated == 0 { + best.Status[i].MemoryAllocated += task.MemoryGPU + break } } - copy(allocation.TasksOnNode[id][idx:], allocation.TasksOnNode[id][idx+1:]) - allocation.TasksOnNode[id] = allocation.TasksOnNode[id][:len(allocation.TasksOnNode[id])-1] - - if cost < minCost || !flag { - nodeID = id - minCost = cost - } - flag = true } } - return nodeID, flag + //log.Info(allocation.TasksOnNode) + log.Println("BestFit Cost:", eva.calculate()) + return allocation } /* Evaluate the allocation */ func (X Allocation) Evaluate() (float64, error) { + //log.Info(X) if !X.Flags["valid"] { //fmt.Println("Invalid allocation") return math.MaxFloat64, nil } - costNetwork := evaluate(X) + //costNetwork := evaluate(X) - cost := costNetwork + var nodes []NodeStatus + for _, node := range X.Nodes { + nodes = append(nodes, node) + } + + eva := Evaluator{} + eva.init(nodes, X.Tasks) + for node, tasks := range X.TasksOnNode { + for _, task := range tasks { + eva.add(X.Nodes[node], task) + } + } + + cost := eva.calculate() + //log.Info(cost) //fmt.Println(taskToNode, cost, len(X.Nodes)) return float64(cost), nil } @@ -189,6 +177,12 @@ func (X Allocation) Mutate(rng *rand.Rand) { for _, task := range tasks { if nodeID, ok := firstFit(X, task); ok { X.TasksOnNode[nodeID] = append(X.TasksOnNode[nodeID], task) + for i := range X.Nodes[nodeID].Status { + if X.Nodes[nodeID].Status[i].MemoryAllocated == 0 { + X.Nodes[nodeID].Status[i].MemoryAllocated += task.MemoryGPU + break + } + } } else { X.Flags["valid"] = false } @@ -196,6 +190,7 @@ func (X Allocation) Mutate(rng *rand.Rand) { } //fmt.Println("After", X) + return /* move tasks */ if !X.Flags["valid"] { //fmt.Println("Invalid allocation") @@ -230,7 +225,6 @@ func (X Allocation) Crossover(Y eaopt.Genome, rng *rand.Rand) { if !Y.(Allocation).Flags["valid"] || !X.Flags["valid"] { return } - //fmt.Println("Crossover") taskToNode := map[string]string{} for nodeID, tasks := range X.TasksOnNode { for _, task := range tasks { @@ -265,6 +259,12 @@ func (X Allocation) Crossover(Y eaopt.Genome, rng *rand.Rand) { //fmt.Println(X.TasksOnNode) copy(X.TasksOnNode[nodeID2][idx:], X.TasksOnNode[nodeID2][idx+1:]) X.TasksOnNode[nodeID2] = X.TasksOnNode[nodeID2][:len(X.TasksOnNode[nodeID2])-1] + for i := range X.Nodes[nodeID].Status { + if X.Nodes[nodeID].Status[i].MemoryAllocated == 0 { + X.Nodes[nodeID].Status[i].MemoryAllocated -= task.MemoryGPU + break + } + } //fmt.Println(X.TasksOnNode) } /* reschedule tasks on tgt node */ @@ -291,6 +291,12 @@ func (X Allocation) Crossover(Y eaopt.Genome, rng *rand.Rand) { for _, task := range tasks { if nodeID, ok := firstFit(X, task); ok { X.TasksOnNode[nodeID] = append(X.TasksOnNode[nodeID], task) + for i := range X.Nodes[nodeID].Status { + if X.Nodes[nodeID].Status[i].MemoryAllocated == 0 { + X.Nodes[nodeID].Status[i].MemoryAllocated += task.MemoryGPU + break + } + } } else { X.Flags["valid"] = false } @@ -319,74 +325,3 @@ func (X Allocation) Clone() eaopt.Genome { } return Y } - -func VectorFactory(rng *rand.Rand) eaopt.Genome { - allocation := Allocation{TasksOnNode: map[string][]Task{}, Nodes: map[string]NodeStatus{}, Flags: map[string]bool{"valid": true}} - - var nodes []NodeStatus - var tasks []Task - - for _, node := range nodesMap { - nodes = append(nodes, node) - } - for _, task := range tasksMap { - tasks = append(tasks, task) - } - - /* shuffle */ - for n := len(nodes); n > 0; n-- { - randIndex := rng.Intn(n) - nodes[n-1], nodes[randIndex] = nodes[randIndex], nodes[n-1] - } - for n := len(tasks); n > 0; n-- { - randIndex := rng.Intn(n) - tasks[n-1], tasks[randIndex] = tasks[randIndex], tasks[n-1] - } - - /* pick nodes */ - for _, node := range nodesMap { - allocation.Nodes[node.ClientID] = node - allocation.NodeIDs = append(allocation.NodeIDs, node.ClientID) - } - - t := rng.Int() % 10 - if t == -1 { - /* best-fit */ - ts := time.Now() - - /* - for _, task := range tasks { - if nodeID, ok := bestFit(allocation, task); ok { - allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task) - } else { - allocation.Flags["valid"] = false - } - } - */ - - allocation.TasksOnNode = fastBestFit(nodes, tasks).TasksOnNode - log.Println(time.Since(ts)) - //fmt.Println("Best Fit") - } else if t%2 == 0 { - /* first-fit */ - for _, task := range tasks { - if nodeID, ok := randomFit(allocation, task); ok { - allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task) - } else { - allocation.Flags["valid"] = false - } - } - } else { - /* random-fit */ - for _, task := range tasks { - if nodeID, ok := randomFit(allocation, task); ok { - allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task) - } else { - allocation.Flags["valid"] = false - } - } - } - //fmt.Println(evaluatue(allocation)) - //fmt.Println(allocation) - return allocation -} diff --git a/src/ga_test.go b/src/ga_test.go index a3ae128..78afa67 100644 --- a/src/ga_test.go +++ b/src/ga_test.go @@ -4,53 +4,61 @@ import ( "strconv" "math/rand" "time" - "log" + log "github.com/sirupsen/logrus" "github.com/MaxHalford/eaopt" "math" "testing" ) -func TestGA(t *testing.T) { - numTask := 20 - - nodesMap = map[string]NodeStatus{} - tasksMap = map[string]Task{} - - for i := 0; i < numTask*3; i++ { - node := NodeStatus{ClientID: strconv.Itoa(i), Rack: strconv.Itoa(i % 40), Domain: strconv.Itoa(i % 4)} - node.NumCPU = 24 - node.MemTotal = 188 - node.TotalBW = 100 - cnt := rand.Intn(3) + 1 - for i := 0; i < cnt; i++ { - node.Status = append(node.Status, GPUStatus{MemoryTotal: 11439, MemoryAllocated: 0, UUID: node.ClientID + strconv.Itoa(i)}) - } - nodesMap[strconv.Itoa(i)] = node - } - for i := 0; i < numTask; i++ { - isPS := false - if i >= 3 { - isPS = true - } - task := Task{Name: strconv.Itoa(i), IsPS: isPS} - task.Memory = 4 - task.NumberCPU = 2 - task.NumberGPU = 1 - tasksMap[strconv.Itoa(i)] = task - } +func TgenerateCase() ([]NodeStatus, []Task) { + numTask := 6 var nodes []NodeStatus var tasks []Task - for _, node := range nodesMap { + for i := 0; i < numTask*3; i++ { + node := NodeStatus{ClientID: strconv.Itoa(i), Rack: "Rack-" + strconv.Itoa(i%40), Domain: "Domain-" + strconv.Itoa(i%4)} + node.NumCPU = 24 + node.UtilCPU = 2.0 + node.MemTotal = 188 + node.MemAvailable = 20 + node.TotalBW = 100 + //cnt := 4 + cnt := rand.Intn(3) + 1 + for i := 0; i < cnt; i++ { + node.Status = append(node.Status, GPUStatus{MemoryTotal: 11439, MemoryAllocated: 0, UUID: node.ClientID + "-" + strconv.Itoa(i)}) + } nodes = append(nodes, node) } - for _, task := range tasksMap { + for i := 0; i < numTask; i++ { + isPS := false + if i%4 == 0 { + isPS = true + } + task := Task{Name: "task-" + strconv.Itoa(i), IsPS: isPS} + task.Memory = 4 + task.NumberCPU = 2 + task.NumberGPU = 1 + task.MemoryGPU = 4096 tasks = append(tasks, task) } + return nodes, tasks +} + +func TestBestFit(t *testing.T) { + nodes, tasks := TgenerateCase() + for _, node := range nodes { + log.Info(node) + } s := time.Now() allocation := fastBestFit(nodes, tasks) log.Println(time.Since(s)) + log.Println(allocation) +} + +func TestGA(t *testing.T) { + return + nodes, tasks := TgenerateCase() // Instantiate a GA with a GAConfig var ga, err = eaopt.NewDefaultGAConfig().NewGA() @@ -62,7 +70,7 @@ func TestGA(t *testing.T) { // Set the number of generations to run for ga.NGenerations = math.MaxInt32 ga.NPops = 1 - ga.PopSize = 30 + uint(numTask/2) + ga.PopSize = 30 + uint(len(tasks)/2) // Add a custom print function to track progress ga.Callback = func(ga *eaopt.GA) { @@ -90,15 +98,92 @@ func TestGA(t *testing.T) { return false } + var f = func(rng *rand.Rand) eaopt.Genome { + allocation := Allocation{TasksOnNode: map[string][]Task{}, Nodes: map[string]NodeStatus{}, Flags: map[string]bool{"valid": true}} + + //log.Println(nodes) + var nodesT []NodeStatus + for _, node := range nodes { + nodesT = append(nodesT, node.Copy()) + } + + //nodesT[0].Status[0].MemoryAllocated = 100 + //log.Println(nodes[0].Status[0].MemoryAllocated) + + //log.Println(&nodesT[0]) + //log.Println(&nodes[0]) + + for _, node := range nodesT { + allocation.Nodes[node.ClientID] = node + } + for _, task := range tasks { + allocation.Tasks = append(allocation.Tasks, task) + } + + /* shuffle */ + for n := len(tasks); n > 0; n-- { + randIndex := rng.Intn(n) + allocation.Tasks[n-1], allocation.Tasks[randIndex] = allocation.Tasks[randIndex], allocation.Tasks[n-1] + } + + /* pick nodes */ + for _, node := range nodesT { + allocation.Nodes[node.ClientID] = node + allocation.NodeIDs = append(allocation.NodeIDs, node.ClientID) + } + + t := rng.Int() % 10 + if t == 0 { + /* best-fit */ + ts := time.Now() + allocation.TasksOnNode = fastBestFit(nodesT, tasks).TasksOnNode + log.Println(time.Since(ts)) + //fmt.Println("Best Fit") + } else if t%2 == 0 { + /* first-fit */ + for _, task := range tasks { + if nodeID, ok := randomFit(allocation, task); ok { + allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task) + for i := range allocation.Nodes[nodeID].Status { + if allocation.Nodes[nodeID].Status[i].MemoryAllocated == 0 { + allocation.Nodes[nodeID].Status[i].MemoryAllocated += task.MemoryGPU + break + } + } + } else { + allocation.Flags["valid"] = false + } + } + } else { + /* random-fit */ + for _, task := range tasks { + if nodeID, ok := randomFit(allocation, task); ok { + allocation.TasksOnNode[nodeID] = append(allocation.TasksOnNode[nodeID], task) + for i := range allocation.Nodes[nodeID].Status { + if allocation.Nodes[nodeID].Status[i].MemoryAllocated == 0 { + allocation.Nodes[nodeID].Status[i].MemoryAllocated += task.MemoryGPU + break + } + } + } else { + allocation.Flags["valid"] = false + } + } + } + //fmt.Println(evaluatue(allocation)) + //fmt.Println(allocation) + return allocation + + } + // Find the minimum - err = ga.Minimize(VectorFactory) + err = ga.Minimize(f) log.Println(time.Since(ts)) log.Println(ga.HallOfFame[0].Genome.(Allocation).TasksOnNode) - //fmt.Println(ga.HallOfFame[0].Genome.(Allocation).Nodes) + //log.Println(ga.HallOfFame[0].Genome.(Allocation).Flags) + //log.Println(ga.HallOfFame[0].Genome.(Allocation).Nodes) if err != nil { log.Println(err) return } - - log.Println(allocation) } diff --git a/src/pool_status.go b/src/pool_status.go index bd1bac3..4cd0d2f 100644 --- a/src/pool_status.go +++ b/src/pool_status.go @@ -40,3 +40,10 @@ type NodeStatus struct { TotalBW float64 `json:"bw_total"` Status []GPUStatus `json:"status"` } + +func (X *NodeStatus) Copy() NodeStatus { + res := *X + res.Status = make([]GPUStatus, len(X.Status)) + copy(res.Status, X.Status) + return res +} diff --git a/src/resource_pool.go b/src/resource_pool.go index 93b4bd4..086daa7 100644 --- a/src/resource_pool.go +++ b/src/resource_pool.go @@ -158,14 +158,14 @@ func (pool *ResourcePool) checkDeadNodes() { func (pool *ResourcePool) GPUModelToPower(model string) int { mapper := map[string]int{ - "K40": 1, "Tesla K40": 1, - "K80": 2, "Tesla K80": 2, - "P100": 3, "Tesla P100": 3, + "K40": 2, "Tesla K40": 2, + "K80": 3, "Tesla K80": 3, + "P100": 4, "Tesla P100": 4, } if power, err := mapper[model]; !err { return power } - return 0 + return 1 } func (pool *ResourcePool) getNodePool(name string) int { @@ -639,12 +639,16 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus { locks := map[int]*sync.Mutex{} allocationType := 0 - availableGPUs := map[string][]GPUStatus{} - var candidates []*NodeStatus + var candidates []NodeStatus + + if pool.TotalGPU == 0 { + return []NodeStatus{} + } + loadRatio := float64(pool.UsingGPU) / float64(pool.TotalGPU) /* first, choose sharable GPUs */ - if pool.enableShare && (pool.TotalGPU != 0 && len(job.Tasks) == 1 && task.NumberGPU == 1 && float64(pool.UsingGPU)/float64(pool.TotalGPU) >= pool.enableShareRatio) { + if pool.enableShare && len(job.Tasks) == 1 && task.NumberGPU == 1 && loadRatio >= pool.enableShareRatio { // check sharable allocationType = 1 if util, valid := InstanceOfOptimizer().predictUtilGPU(job.Name); valid { @@ -671,13 +675,12 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus { } if totalUtil < 100 { available = append(available, status) - availableGPUs[node.ClientID] = available } } } } if len(available) >= task.NumberGPU { - candidates = append(candidates, node) + candidates = append(candidates, *node) if len(candidates) >= len(job.Tasks)*3+5 { break } @@ -711,8 +714,7 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus { } } if len(available) >= task.NumberGPU { - candidates = append(candidates, node) - availableGPUs[node.ClientID] = available + candidates = append(candidates, *node) if len(candidates) >= len(job.Tasks)*3+5 { break } @@ -733,11 +735,7 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus { if len(candidates) == 0 && len(job.Tasks) == 1 && task.NumberGPU == 1 && pool.enablePreSchedule { estimate, valid := InstanceOfOptimizer().predictTime(job.Name) - //log.Info(pool.TotalGPU) - //log.Info(estimate, valid) - //log.Info(scheduler.UsingGPU) - - if pool.TotalGPU != 0 && float64(pool.UsingGPU)/float64(pool.TotalGPU) >= pool.enablePreScheduleRatio && valid { + if loadRatio >= pool.enablePreScheduleRatio && valid { allocationType = 3 for cur := start; ; { if _, ok := locks[cur.ID]; !ok { @@ -765,8 +763,7 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus { } } if len(available) >= task.NumberGPU { - candidates = append(candidates, node) - availableGPUs[node.ClientID] = available + candidates = append(candidates, *node) if len(candidates) >= len(job.Tasks)*3+5 { break } @@ -792,44 +789,69 @@ func (pool *ResourcePool) acquireResource(job Job) []NodeStatus { /* assign */ var ress []NodeStatus if len(candidates) > 0 { - /* - for range job.Tasks { //append would cause uncertain order - resources = append(resources, NodeStatus{ClientID: "null"}) - } - */ - - var nodes []NodeStatus - if len(job.Tasks) == 1 { - node := pool.pickNode(candidates, availableGPUs, task, job, []NodeStatus{}) - nodes = append(nodes, *node) + for range job.Tasks { //append would cause uncertain order + ress = append(ress, NodeStatus{ClientID: "null"}) } - for _, node := range nodes { - res := NodeStatus{} - res.ClientID = node.ClientID - res.ClientHost = node.ClientHost - res.Status = availableGPUs[node.ClientID][0:task.NumberGPU] - res.NumCPU = task.NumberCPU - res.MemTotal = task.Memory + var nodesT []NodeStatus + for _, node := range candidates { + nodesT = append(nodesT, node.Copy()) + } - for i := range res.Status { - for j := range node.Status { - if res.Status[i].UUID == node.Status[j].UUID { - if node.Status[j].MemoryAllocated == 0 { - pool.UsingGPUMu.Lock() - pool.UsingGPU ++ - pool.UsingGPUMu.Unlock() + allocation := fastBestFit(nodesT, job.Tasks) + if !allocation.Flags["valid"] { + return []NodeStatus{} + } + + for nodeID, tasks := range allocation.TasksOnNode { + var node *NodeStatus + for i := range candidates { + if candidates[i].ClientID == nodeID { + node = &candidates[i] + } + } + + var available []GPUStatus + for _, gpu := range node.Status { + if gpu.MemoryAllocated == 0 { + available = append(available, gpu) + } + } + for _, task := range tasks { + res := NodeStatus{} + res.ClientID = node.ClientID + res.ClientHost = node.ClientHost + res.NumCPU = task.NumberCPU + res.MemTotal = task.Memory + res.Status = available[0:task.NumberGPU] + available = available[task.NumberGPU:] + + for i := range res.Status { + for j := range node.Status { + if res.Status[i].UUID == node.Status[j].UUID { + if node.Status[j].MemoryAllocated == 0 { + pool.UsingGPUMu.Lock() + pool.UsingGPU ++ + pool.UsingGPUMu.Unlock() + } + node.Status[j].MemoryAllocated += task.MemoryGPU + res.Status[i].MemoryTotal = task.MemoryGPU } - node.Status[j].MemoryAllocated += task.MemoryGPU - res.Status[i].MemoryTotal = task.MemoryGPU } } + for _, t := range res.Status { + pool.attach(t.UUID, job.Name) + } + + for i := range job.Tasks { + if job.Tasks[i].Name == task.Name { + ress[i] = res + } + } + } - for _, t := range res.Status { - pool.attach(t.UUID, job.Name) - } - ress = append(ress, res) } + } for segID, lock := range locks {