1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-06-07 14:21:55 +00:00
YAO-scheduler/src/resource_pool.go

1110 lines
27 KiB
Go
Raw Normal View History

2019-03-04 09:19:55 +00:00
package main
import (
"sync"
2019-04-29 09:05:15 +00:00
"time"
2019-06-05 09:09:22 +00:00
"net/url"
"strings"
"math/rand"
"strconv"
2020-05-01 04:48:06 +00:00
"sort"
2020-05-03 15:32:38 +00:00
"hash/fnv"
2019-10-24 12:25:59 +00:00
)
2019-03-04 09:19:55 +00:00
2020-05-24 13:07:02 +00:00
var resourcePoolInstance *ResourcePool
var resourcePoolInstanceLock sync.Mutex
func InstanceOfResourcePool() *ResourcePool {
defer resourcePoolInstanceLock.Unlock()
resourcePoolInstanceLock.Lock()
if resourcePoolInstance == nil {
resourcePoolInstance = &ResourcePool{}
}
return resourcePoolInstance
}
2019-03-04 09:19:55 +00:00
type ResourcePool struct {
2020-04-13 14:35:17 +00:00
poolsCount int
2020-05-03 16:19:50 +00:00
pools []PoolSeg
2020-05-03 15:32:38 +00:00
poolsMu sync.Mutex
2019-04-29 09:05:15 +00:00
history []PoolStatus
historyMu sync.Mutex
2019-06-04 03:08:49 +00:00
2020-05-25 12:50:41 +00:00
heartBeat map[string]time.Time
heartBeatMu sync.Mutex
versions map[string]float64
versionsMu sync.Mutex
counter int
counterTotal int
subscriptions map[string]map[string]int
subscriptionsMu sync.Mutex
2019-06-05 09:09:22 +00:00
networks map[string]bool
networksFree map[string]bool
networkMu sync.Mutex
2020-03-29 13:12:44 +00:00
bindings map[string]map[string]Job
bindingsMu sync.Mutex
exclusiveJobs map[string]bool
2020-04-30 09:52:52 +00:00
2020-05-28 06:26:08 +00:00
TotalGPU int
TotalGPUMu sync.Mutex
TotalCPU int
TotalMemory int
UsingGPU int
UsingGPUMu sync.Mutex
2020-05-24 13:07:02 +00:00
2020-06-08 12:49:50 +00:00
enableBatch bool
2020-06-10 16:08:42 +00:00
batchJobs map[string]Job
2020-06-08 12:49:50 +00:00
batchMu sync.Mutex
batchAllocations map[string][]NodeStatus
2020-06-11 03:36:52 +00:00
batchInterval int
2019-04-29 09:05:15 +00:00
}
2020-05-24 13:07:02 +00:00
func (pool *ResourcePool) init(conf Configuration) {
2020-05-04 05:59:01 +00:00
log.Info("RM started ")
2019-06-05 09:09:22 +00:00
pool.networks = map[string]bool{}
pool.networksFree = map[string]bool{}
2020-06-30 08:16:30 +00:00
pool.bindings = map[string]map[string]Job{}
pool.exclusiveJobs = map[string]bool{}
2020-04-11 03:38:04 +00:00
2020-04-30 09:52:52 +00:00
pool.TotalGPU = 0
2020-05-24 13:07:02 +00:00
pool.UsingGPU = 0
2020-05-28 06:26:08 +00:00
pool.TotalCPU = 0
pool.TotalMemory = 0
2020-06-08 12:49:50 +00:00
pool.enableBatch = false
2020-06-10 15:31:20 +00:00
pool.batchAllocations = map[string][]NodeStatus{}
2020-06-10 16:08:42 +00:00
pool.batchJobs = map[string]Job{}
2020-06-11 03:36:52 +00:00
pool.batchInterval = 15
2020-06-08 12:49:50 +00:00
2020-05-03 15:32:38 +00:00
/* init pools */
pool.poolsCount = 300
2020-04-13 14:35:17 +00:00
for i := 0; i < pool.poolsCount; i++ {
2020-05-25 11:29:35 +00:00
pool.pools = append(pool.pools, PoolSeg{Lock: sync.Mutex{}, ID: i})
2020-05-03 15:32:38 +00:00
}
2020-05-23 18:22:05 +00:00
/* generate working segs */
2020-05-03 15:32:38 +00:00
for i := 0; i < 10; i++ {
pool.pools[rand.Intn(pool.poolsCount)].Nodes = map[string]*NodeStatus{}
}
/* init Next pointer */
var pre *PoolSeg
for i := pool.poolsCount*2 - 1; ; i-- {
if pool.pools[i%pool.poolsCount].Next != nil {
break
}
pool.pools[i%pool.poolsCount].Next = pre
if pool.pools[i%pool.poolsCount].Nodes != nil {
pre = &pool.pools[i%pool.poolsCount]
}
2020-04-13 14:35:17 +00:00
}
2020-05-25 12:50:41 +00:00
pool.versions = map[string]float64{}
pool.subscriptions = map[string]map[string]int{}
2020-05-03 15:32:38 +00:00
pool.heartBeat = map[string]time.Time{}
2019-06-04 03:15:12 +00:00
go func() {
2020-05-03 15:32:38 +00:00
pool.checkDeadNodes()
2019-06-04 03:15:12 +00:00
}()
2019-06-04 03:08:49 +00:00
2020-05-03 15:32:38 +00:00
pool.history = []PoolStatus{}
2019-04-29 09:05:15 +00:00
go func() {
2020-05-03 15:32:38 +00:00
pool.saveStatusHistory()
}()
2020-06-08 12:49:50 +00:00
go func() {
2020-06-10 13:24:00 +00:00
/* batch allocation */
for {
2020-06-11 03:36:52 +00:00
time.Sleep(time.Second * time.Duration(pool.batchInterval))
2020-06-10 13:24:00 +00:00
if !pool.enableBatch {
continue
2020-06-08 12:49:50 +00:00
}
2020-06-10 13:24:00 +00:00
pool.batchMu.Lock()
2020-06-10 14:06:09 +00:00
var nodes []NodeStatus
var left []Job
for {
var tasks []Task
for _, job := range pool.batchJobs {
for _, task := range job.Tasks {
tasks = append(tasks, task)
}
2020-06-10 13:24:00 +00:00
}
2020-06-11 01:26:02 +00:00
//log.Info(tasks)
2020-06-10 13:24:00 +00:00
job := Job{Tasks: tasks}
2020-06-10 14:06:09 +00:00
if len(tasks) == 0 {
break
}
nodes = pool.doAcquireResource(job)
if len(nodes) == 0 {
2020-06-10 16:08:42 +00:00
for jobName := range pool.batchJobs {
left = append(left, pool.batchJobs[jobName])
delete(pool.batchJobs, jobName)
log.Info("cannot find a valid allocation, remove a job randomly: ", jobName)
2020-06-10 16:18:11 +00:00
break
2020-06-10 16:08:42 +00:00
}
2020-06-10 14:06:09 +00:00
continue
}
2020-06-10 13:24:00 +00:00
for i, task := range job.Tasks {
if _, ok := pool.batchAllocations[task.Job]; !ok {
pool.batchAllocations[task.Job] = []NodeStatus{}
}
pool.batchAllocations[task.Job] = append(pool.batchAllocations[task.Job], nodes[i])
}
2020-06-10 16:26:57 +00:00
break
2020-06-08 12:49:50 +00:00
}
2020-06-10 16:18:11 +00:00
pool.batchJobs = map[string]Job{}
2020-06-10 16:08:42 +00:00
for _, job := range left {
2020-06-10 16:18:11 +00:00
pool.batchJobs[job.Name] = job
2020-06-10 16:08:42 +00:00
}
2020-06-10 13:24:00 +00:00
pool.batchMu.Unlock()
2020-06-08 12:49:50 +00:00
}
}()
2020-05-03 15:32:38 +00:00
}
/* check dead nodes periodically */
func (pool *ResourcePool) checkDeadNodes() {
for {
pool.heartBeatMu.Lock()
2020-05-04 10:19:39 +00:00
var nodesToDel []string
2020-05-03 15:32:38 +00:00
for k, v := range pool.heartBeat {
if v.Add(time.Second * 30).Before(time.Now()) {
2020-05-04 10:19:39 +00:00
segID := pool.getNodePool(k)
seg := &pool.pools[segID]
2020-05-03 15:32:38 +00:00
if seg.Nodes == nil {
seg = seg.Next
2019-04-29 09:05:15 +00:00
}
2020-05-03 16:59:20 +00:00
2020-05-04 10:05:11 +00:00
seg.Lock.Lock()
2020-05-14 12:52:39 +00:00
pool.TotalGPUMu.Lock()
2020-05-03 16:59:20 +00:00
if _, ok := seg.Nodes[k]; ok {
pool.TotalGPU -= len(seg.Nodes[k].Status)
2020-05-28 06:26:08 +00:00
pool.TotalCPU -= seg.Nodes[k].NumCPU
pool.TotalMemory -= seg.Nodes[k].MemTotal
2020-05-03 16:59:20 +00:00
}
pool.TotalGPUMu.Unlock()
2020-05-03 15:32:38 +00:00
delete(seg.Nodes, k)
seg.Lock.Unlock()
pool.versionsMu.Lock()
delete(pool.versions, k)
pool.versionsMu.Unlock()
2020-05-04 10:19:39 +00:00
nodesToDel = append(nodesToDel, k)
log.Warn("node ", k, " is offline")
2019-04-29 09:05:15 +00:00
}
2020-05-03 15:32:38 +00:00
}
2020-05-04 10:19:39 +00:00
for _, v := range nodesToDel {
segID := pool.getNodePool(v)
seg := &pool.pools[segID]
if seg.Nodes == nil {
seg = seg.Next
}
2020-05-14 12:52:39 +00:00
seg.Lock.Lock()
2020-05-04 10:19:39 +00:00
delete(seg.Nodes, v)
2020-05-14 12:52:39 +00:00
seg.Lock.Unlock()
2020-05-24 13:07:02 +00:00
delete(pool.heartBeat, v)
2020-05-04 10:19:39 +00:00
}
2020-05-03 15:32:38 +00:00
pool.heartBeatMu.Unlock()
time.Sleep(time.Second * 10)
}
}
2019-04-29 09:05:15 +00:00
2020-05-03 15:32:38 +00:00
func (pool *ResourcePool) GPUModelToPower(model string) int {
mapper := map[string]int{
2020-05-26 12:46:11 +00:00
"K40": 2, "Tesla K40": 2,
"K80": 3, "Tesla K80": 3,
"P100": 4, "Tesla P100": 4,
2020-05-03 15:32:38 +00:00
}
if power, err := mapper[model]; !err {
return power
}
2020-05-26 12:46:11 +00:00
return 1
2020-05-03 15:32:38 +00:00
}
2019-04-29 09:05:15 +00:00
2020-05-03 15:32:38 +00:00
func (pool *ResourcePool) getNodePool(name string) int {
h := fnv.New32a()
h.Write([]byte(name))
return int(h.Sum32()) % pool.poolsCount
}
/* save pool status periodically */
func (pool *ResourcePool) saveStatusHistory() {
2020-05-03 16:59:20 +00:00
/* waiting for nodes */
time.Sleep(time.Second * 30)
2020-05-03 15:32:38 +00:00
for {
summary := PoolStatus{}
UtilCPU := 0.0
TotalCPU := 0
TotalMem := 0
AvailableMem := 0
TotalGPU := 0
UtilGPU := 0
TotalMemGPU := 0
AvailableMemGPU := 0
nodesCount := 0
2020-05-25 11:29:35 +00:00
start := pool.pools[0]
if start.Nodes == nil {
start = *start.Next
}
2020-05-03 15:32:38 +00:00
for cur := start; ; {
cur.Lock.Lock()
for _, node := range cur.Nodes {
UtilCPU += node.UtilCPU
TotalCPU += node.NumCPU
TotalMem += node.MemTotal
AvailableMem += node.MemAvailable
for _, GPU := range node.Status {
UtilGPU += GPU.UtilizationGPU
TotalGPU ++
TotalMemGPU += GPU.MemoryTotal
AvailableMemGPU += GPU.MemoryFree
}
}
nodesCount += len(cur.Nodes)
cur.Lock.Unlock()
2020-05-25 11:29:35 +00:00
cur = *cur.Next
2020-05-03 16:17:59 +00:00
if cur.ID == start.ID {
2020-05-03 15:32:38 +00:00
break
2019-04-29 09:05:15 +00:00
}
2020-05-03 15:32:38 +00:00
}
summary.TimeStamp = time.Now().Format("2006-01-02 15:04:05")
summary.UtilCPU = UtilCPU / (float64(nodesCount) + 0.001)
summary.TotalCPU = TotalCPU
summary.TotalMem = TotalMem
summary.AvailableMem = AvailableMem
summary.TotalGPU = TotalGPU
if TotalGPU == 0 {
summary.UtilGPU = 0.0
} else {
summary.UtilGPU = UtilGPU / TotalGPU
}
summary.TotalMemGPU = TotalMemGPU
summary.AvailableMemGPU = AvailableMemGPU
pool.historyMu.Lock()
2020-05-03 15:32:38 +00:00
pool.history = append(pool.history, summary)
if len(pool.history) > 60 {
pool.history = pool.history[len(pool.history)-60:]
2019-04-29 09:05:15 +00:00
}
pool.historyMu.Unlock()
2020-05-03 15:32:38 +00:00
2020-05-03 16:59:20 +00:00
pool.TotalGPUMu.Lock()
2020-05-03 15:32:38 +00:00
pool.TotalGPU = TotalGPU
2020-05-28 06:26:08 +00:00
pool.TotalCPU = TotalCPU
pool.TotalMemory = TotalMemGPU
2020-05-03 16:59:20 +00:00
pool.TotalGPUMu.Unlock()
2020-05-03 15:32:38 +00:00
time.Sleep(time.Second * 60)
}
2019-03-04 09:19:55 +00:00
}
2020-05-03 15:32:38 +00:00
/* update node info */
2019-04-16 08:59:19 +00:00
func (pool *ResourcePool) update(node NodeStatus) {
2020-05-25 13:41:39 +00:00
pool.poolsMu.Lock()
2020-05-25 13:54:38 +00:00
defer pool.poolsMu.Unlock()
2020-05-03 15:32:38 +00:00
segID := pool.getNodePool(node.ClientID)
seg := &pool.pools[segID]
if seg.Nodes == nil {
seg = seg.Next
}
seg.Lock.Lock()
defer seg.Lock.Unlock()
2019-03-04 09:19:55 +00:00
2020-05-03 15:32:38 +00:00
/* init bindings */
2020-04-12 03:13:23 +00:00
go func(node NodeStatus) {
2020-05-23 13:06:31 +00:00
pool.subscriptionsMu.Lock()
defer pool.subscriptionsMu.Unlock()
2020-04-13 12:29:58 +00:00
pool.bindingsMu.Lock()
defer pool.bindingsMu.Unlock()
2020-04-12 03:13:23 +00:00
for _, gpu := range node.Status {
2020-05-23 13:06:31 +00:00
if _, ok := pool.subscriptions[gpu.UUID]; ok {
for jobName := range pool.subscriptions[gpu.UUID] {
2020-05-23 17:41:19 +00:00
go func(name string) {
2020-05-24 13:07:02 +00:00
/* ask to update job status */
2020-05-23 17:41:19 +00:00
scheduler.QueryState(name)
}(jobName)
2020-05-23 13:06:31 +00:00
}
}
2020-04-12 03:13:23 +00:00
}
2020-04-13 12:29:58 +00:00
pool.heartBeatMu.Lock()
pool.heartBeat[node.ClientID] = time.Now()
pool.heartBeatMu.Unlock()
2020-04-12 03:13:23 +00:00
}(node)
2020-03-29 13:12:44 +00:00
pool.counterTotal++
2020-05-03 15:32:38 +00:00
pool.versionsMu.Lock()
2020-03-29 13:12:44 +00:00
if version, ok := pool.versions[node.ClientID]; ok && version == node.Version {
2020-05-25 11:29:35 +00:00
//pool.versionsMu.Unlock()
//return
2020-03-29 13:12:44 +00:00
}
2020-05-03 15:32:38 +00:00
pool.versionsMu.Unlock()
pool.counter++
2020-04-12 02:44:32 +00:00
log.Debug(node.Version, "!=", pool.versions[node.ClientID])
2020-04-11 03:38:04 +00:00
2020-05-03 15:32:38 +00:00
status, ok := seg.Nodes[node.ClientID]
2019-03-20 03:14:07 +00:00
if ok {
2020-05-04 05:59:01 +00:00
/* keep allocation info */
2019-04-16 08:59:19 +00:00
for i, GPU := range status.Status {
if GPU.UUID == node.Status[i].UUID {
node.Status[i].MemoryAllocated = GPU.MemoryAllocated
2019-03-20 03:14:07 +00:00
}
}
2020-05-03 16:59:20 +00:00
} else {
2020-05-25 13:41:39 +00:00
/* TODO: double check node do belong to this seg */
2020-05-03 16:59:20 +00:00
pool.TotalGPUMu.Lock()
pool.TotalGPU += len(node.Status)
2020-05-28 06:26:08 +00:00
pool.TotalCPU += node.NumCPU
pool.TotalMemory += node.MemTotal
2020-05-03 16:59:20 +00:00
pool.TotalGPUMu.Unlock()
2020-05-25 12:50:41 +00:00
log.Info("node ", node.ClientID, " is online")
2019-03-20 03:14:07 +00:00
}
2020-05-03 15:32:38 +00:00
seg.Nodes[node.ClientID] = &node
if len(seg.Nodes) > 10 {
2020-05-25 13:41:39 +00:00
go func() {
pool.scaleSeg(seg)
}()
2020-05-03 15:32:38 +00:00
}
2020-03-29 13:12:44 +00:00
pool.versions[node.ClientID] = node.Version
2019-03-04 09:19:55 +00:00
}
2020-05-03 15:32:38 +00:00
/* spilt seg */
func (pool *ResourcePool) scaleSeg(seg *PoolSeg) {
2020-05-03 16:17:59 +00:00
log.Info("Scaling seg ", seg.ID)
2020-05-03 15:32:38 +00:00
2020-05-25 13:41:39 +00:00
pool.poolsMu.Lock()
defer pool.poolsMu.Unlock()
2020-05-03 15:32:38 +00:00
2020-05-25 13:41:39 +00:00
var segIDs []int
segIDs = append(segIDs, seg.ID)
2020-05-03 15:32:38 +00:00
2020-05-25 13:41:39 +00:00
/* find previous seg */
var pre *PoolSeg
for i := seg.ID + pool.poolsCount - 1; i >= 0; i-- {
segIDs = append(segIDs, i%pool.poolsCount)
2020-05-25 15:56:49 +00:00
if pool.pools[i%pool.poolsCount].Next.ID != seg.ID {
2020-05-25 13:41:39 +00:00
break
2020-05-03 15:32:38 +00:00
}
2020-05-25 15:56:49 +00:00
pre = &pool.pools[i%pool.poolsCount]
2020-05-25 13:41:39 +00:00
}
2020-05-03 15:32:38 +00:00
2020-05-25 13:41:39 +00:00
distance := seg.ID - pre.ID
if distance < 0 {
distance += pool.poolsCount
}
if distance <= 1 {
2020-05-25 15:56:49 +00:00
log.Warn("Unable to scale, ", seg.ID, ", already full")
2020-05-25 13:41:39 +00:00
return
}
2020-05-25 11:29:35 +00:00
2020-05-25 13:41:39 +00:00
candidate := pre
/* walk to the nearest middle */
2020-05-25 15:56:49 +00:00
if pre.ID < seg.ID {
candidate = &pool.pools[(pre.ID+seg.ID)/2]
} else {
candidate = &pool.pools[(pre.ID+seg.ID+pool.poolsCount)/2%pool.poolsCount]
2020-05-25 13:41:39 +00:00
}
2020-05-25 15:56:49 +00:00
candidate.Next = seg
candidate.Nodes = map[string]*NodeStatus{}
2020-05-03 15:32:38 +00:00
2020-05-25 13:41:39 +00:00
/* lock in asc sequence to avoid deadlock */
sort.Ints(segIDs)
for _, id := range segIDs {
pool.pools[id].Lock.Lock()
}
2020-05-26 00:17:25 +00:00
//log.Println(segIDs)
2020-05-25 13:41:39 +00:00
/* update Next */
2020-05-25 15:56:49 +00:00
for i := 0; ; i++ {
id := (pre.ID + i) % pool.poolsCount
if id == candidate.ID {
break
}
pool.pools[id].Next = candidate
2020-05-25 13:41:39 +00:00
}
/* move nodes */
nodesToMove := map[string]*NodeStatus{}
for _, node := range seg.Nodes {
seg2ID := pool.getNodePool(node.ClientID)
seg2 := &pool.pools[seg2ID]
if seg2.Nodes == nil {
seg2 = seg2.Next
2020-05-03 15:32:38 +00:00
}
2020-05-25 15:56:49 +00:00
if seg2.ID != seg.ID {
2020-05-25 13:41:39 +00:00
nodesToMove[node.ClientID] = node
}
}
for _, node := range nodesToMove {
delete(seg.Nodes, node.ClientID)
}
candidate.Nodes = nodesToMove
2020-05-25 15:56:49 +00:00
//log.Info("pre=", pre.ID, " active=", candidate.ID, " seg=", seg.ID)
2020-05-25 13:41:39 +00:00
for _, id := range segIDs {
pool.pools[id].Lock.Unlock()
}
2020-05-03 15:32:38 +00:00
}
/* get node by ClientID */
2019-04-18 09:25:37 +00:00
func (pool *ResourcePool) getByID(id string) NodeStatus {
2020-04-13 14:35:17 +00:00
poolID := pool.getNodePool(id)
2020-05-03 15:32:38 +00:00
seg := &pool.pools[poolID]
if seg.Nodes == nil {
seg = seg.Next
}
seg.Lock.Lock()
defer seg.Lock.Unlock()
2020-04-13 14:35:17 +00:00
2020-05-03 15:32:38 +00:00
status, ok := seg.Nodes[id]
2019-03-04 09:19:55 +00:00
if ok {
2020-05-03 15:32:38 +00:00
return *status
2019-03-04 09:19:55 +00:00
}
2019-04-16 08:59:19 +00:00
return NodeStatus{}
2019-03-04 09:19:55 +00:00
}
2019-04-29 09:05:15 +00:00
2020-05-03 15:32:38 +00:00
/* get all nodes */
2019-04-29 09:05:15 +00:00
func (pool *ResourcePool) list() MsgResource {
2020-04-13 14:35:17 +00:00
nodes := map[string]NodeStatus{}
2020-05-03 15:32:38 +00:00
2020-05-25 11:29:35 +00:00
start := pool.pools[0]
if start.Nodes == nil {
start = *start.Next
}
2020-05-03 15:32:38 +00:00
for cur := start; ; {
cur.Lock.Lock()
for k, node := range cur.Nodes {
nodes[k] = *node
}
2020-05-03 15:43:47 +00:00
cur.Lock.Unlock()
2020-05-25 11:29:35 +00:00
cur = *cur.Next
if cur.ID == start.ID {
2020-05-03 15:32:38 +00:00
break
2020-04-13 14:35:17 +00:00
}
}
return MsgResource{Code: 0, Resource: nodes}
2019-04-29 09:05:15 +00:00
}
func (pool *ResourcePool) statusHistory() MsgPoolStatusHistory {
pool.historyMu.Lock()
defer pool.historyMu.Unlock()
history := pool.history
return MsgPoolStatusHistory{Code: 0, Data: history}
2019-04-29 09:05:15 +00:00
}
2019-06-05 09:09:22 +00:00
2020-03-29 13:12:44 +00:00
func (pool *ResourcePool) getCounter() map[string]int {
return map[string]int{"counter": pool.counter, "counterTotal": pool.counterTotal}
}
2019-06-05 09:09:22 +00:00
func (pool *ResourcePool) acquireNetwork() string {
2019-06-13 02:53:00 +00:00
pool.networkMu.Lock()
defer pool.networkMu.Unlock()
2019-06-05 09:09:22 +00:00
var network string
2020-04-13 11:41:28 +00:00
log.Debug(pool.networksFree)
2019-06-05 09:09:22 +00:00
if len(pool.networksFree) == 0 {
for {
2019-06-13 02:53:00 +00:00
for {
network = "yao-net-" + strconv.Itoa(rand.Intn(999999))
if _, ok := pool.networks[network]; !ok {
break
}
2019-06-05 09:09:22 +00:00
}
2019-06-13 02:53:00 +00:00
v := url.Values{}
v.Set("name", network)
2019-06-13 03:30:55 +00:00
resp, err := doRequest("POST", "http://yao-agent-master:8000/create", strings.NewReader(v.Encode()), "application/x-www-form-urlencoded", "")
2019-06-13 02:53:00 +00:00
if err != nil {
log.Warn(err.Error())
2019-06-13 02:53:00 +00:00
continue
}
2020-05-24 13:07:02 +00:00
resp.Body.Close()
2019-06-13 02:53:00 +00:00
pool.networksFree[network] = true
pool.networks[network] = true
break
2019-06-05 09:09:22 +00:00
}
}
2019-06-13 02:53:00 +00:00
2019-06-05 09:09:22 +00:00
for k := range pool.networksFree {
network = k
delete(pool.networksFree, k)
2020-05-03 07:19:21 +00:00
break
2019-06-05 09:09:22 +00:00
}
return network
}
func (pool *ResourcePool) releaseNetwork(network string) {
pool.networkMu.Lock()
pool.networksFree[network] = true
pool.networkMu.Unlock()
}
2020-04-11 03:38:04 +00:00
2020-06-30 08:16:30 +00:00
func (pool *ResourcePool) attach(GPU string, job Job) {
2020-05-23 13:06:31 +00:00
pool.subscriptionsMu.Lock()
defer pool.subscriptionsMu.Unlock()
2020-04-13 12:29:58 +00:00
pool.bindingsMu.Lock()
defer pool.bindingsMu.Unlock()
2020-05-23 13:06:31 +00:00
if _, ok := pool.subscriptions[GPU]; !ok {
pool.subscriptions[GPU] = map[string]int{}
}
2020-06-30 08:16:30 +00:00
pool.subscriptions[GPU][job.Name] = int(time.Now().Unix())
2020-05-23 13:06:31 +00:00
2020-04-12 03:13:23 +00:00
if _, ok := pool.bindings[GPU]; !ok {
2020-06-30 08:16:30 +00:00
pool.bindings[GPU] = map[string]Job{}
2020-04-12 03:13:23 +00:00
}
2020-06-30 08:16:30 +00:00
pool.bindings[GPU][job.Name] = job
2020-04-11 03:38:04 +00:00
}
2020-05-04 10:05:11 +00:00
func (pool *ResourcePool) detach(GPU string, job Job) {
2020-05-23 13:06:31 +00:00
pool.subscriptionsMu.Lock()
defer pool.subscriptionsMu.Unlock()
2020-04-13 12:29:58 +00:00
pool.bindingsMu.Lock()
defer pool.bindingsMu.Unlock()
2020-05-23 13:06:31 +00:00
if _, ok := pool.subscriptions[GPU]; ok {
delete(pool.subscriptions[GPU], job.Name)
}
2020-04-11 03:38:04 +00:00
if list, ok := pool.bindings[GPU]; ok {
2020-05-04 10:05:11 +00:00
delete(list, job.Name)
2020-04-11 03:38:04 +00:00
}
}
2020-04-12 02:44:32 +00:00
2020-05-25 16:21:28 +00:00
/* return free & using GPUs */
2020-05-24 13:07:02 +00:00
func (pool *ResourcePool) countGPU() (int, int) {
2020-05-25 16:21:28 +00:00
return pool.TotalGPU - pool.UsingGPU, pool.UsingGPU
2020-05-24 13:07:02 +00:00
}
2020-05-01 04:48:06 +00:00
func (pool *ResourcePool) pickNode(candidates []*NodeStatus, availableGPUs map[string][]GPUStatus, task Task, job Job, nodes []NodeStatus) *NodeStatus {
2020-04-30 13:22:21 +00:00
/* shuffle */
r := rand.New(rand.NewSource(time.Now().Unix()))
2020-05-01 04:48:06 +00:00
for n := len(candidates); n > 0; n-- {
2020-04-30 13:22:21 +00:00
randIndex := r.Intn(n)
2020-05-01 04:48:06 +00:00
candidates[n-1], candidates[randIndex] = candidates[randIndex], candidates[n-1]
2020-04-30 13:22:21 +00:00
}
/* sort */
2020-05-01 04:48:06 +00:00
// single node, single GPU
sort.Slice(candidates, func(a, b int) bool {
diffA := pool.GPUModelToPower(candidates[a].Status[0].ProductName) - pool.GPUModelToPower(task.ModelGPU)
diffB := pool.GPUModelToPower(candidates[b].Status[0].ProductName) - pool.GPUModelToPower(task.ModelGPU)
2020-04-30 13:22:21 +00:00
2020-05-01 04:48:06 +00:00
if diffA > 0 && diffB >= 0 && diffA > diffB {
return false //b
}
if diffA < 0 && diffB < 0 && diffA > diffB {
return false
}
if diffA < 0 && diffB >= 0 {
return false
}
if diffA == diffB {
if len(availableGPUs[candidates[a].ClientID]) == len(availableGPUs[candidates[b].ClientID]) {
return candidates[a].UtilCPU > candidates[b].UtilCPU
}
return len(availableGPUs[candidates[a].ClientID]) < len(availableGPUs[candidates[b].ClientID])
}
return true //a
})
var t []*NodeStatus
bestGPU := candidates[0].Status[0].ProductName
for _, node := range candidates {
if node.Status[0].ProductName != bestGPU {
break
}
t = append(t, node)
}
candidates = t
if (len(job.Tasks) == 1) && task.NumberGPU > 1 { //single node, multi GPUs
sort.Slice(candidates, func(a, b int) bool {
if len(availableGPUs[candidates[a].ClientID]) == len(availableGPUs[candidates[b].ClientID]) {
return candidates[a].UtilCPU > candidates[b].UtilCPU
}
return len(availableGPUs[candidates[a].ClientID]) < len(availableGPUs[candidates[b].ClientID])
})
}
if len(job.Tasks) > 1 { //multi nodes, multi GPUs
sort.Slice(candidates, func(a, b int) bool {
distanceA := 0
distanceB := 0
for _, node := range nodes {
if node.Rack != candidates[a].Rack {
distanceA += 10
}
if node.ClientID != candidates[a].ClientID {
distanceA += 1
}
if node.Rack != candidates[b].Rack {
distanceB += 10
}
if node.ClientID != candidates[b].ClientID {
distanceB += 1
}
}
if distanceA == distanceB {
return len(availableGPUs[candidates[a].ClientID]) > len(availableGPUs[candidates[b].ClientID])
}
return distanceA*job.Locality < distanceB*job.Locality
})
}
return candidates[0]
2020-04-30 13:22:21 +00:00
}
2020-05-24 13:07:02 +00:00
func (pool *ResourcePool) acquireResource(job Job) []NodeStatus {
2020-06-10 16:23:41 +00:00
for i := range job.Tasks {
job.Tasks[i].Job = job.Name
}
2020-06-08 12:49:50 +00:00
if !pool.enableBatch {
2020-06-08 13:04:51 +00:00
return pool.doAcquireResource(job)
2020-06-08 12:49:50 +00:00
}
2020-06-10 15:23:09 +00:00
pool.batchMu.Lock()
2020-06-10 16:08:42 +00:00
pool.batchJobs[job.Name] = job
2020-06-10 15:23:09 +00:00
pool.batchMu.Unlock()
2020-06-08 12:49:50 +00:00
for {
2020-06-14 14:23:07 +00:00
pool.batchMu.Lock()
2020-06-08 12:49:50 +00:00
if _, ok := pool.batchAllocations[job.Name]; ok {
2020-06-14 14:23:07 +00:00
pool.batchMu.Unlock()
2020-06-08 12:49:50 +00:00
break
} else {
2020-06-14 14:23:07 +00:00
pool.batchMu.Unlock()
2020-06-08 12:49:50 +00:00
time.Sleep(time.Millisecond * 100)
}
}
pool.batchMu.Lock()
nodes := pool.batchAllocations[job.Name]
delete(pool.batchAllocations, job.Name)
pool.batchMu.Unlock()
return nodes
}
func (pool *ResourcePool) doAcquireResource(job Job) []NodeStatus {
2020-05-24 13:07:02 +00:00
if len(job.Tasks) == 0 {
return []NodeStatus{}
}
segID := rand.Intn(pool.poolsCount)
if pool.TotalGPU < 100 {
segID = 0
}
start := &pool.pools[segID]
if start.Nodes == nil {
start = start.Next
}
2020-07-03 13:39:40 +00:00
config := InstanceOfConfiguration()
2020-05-24 13:07:02 +00:00
locks := map[int]*sync.Mutex{}
/* 1-Share, 2-Vacant, 3-PreSchedule */
2020-05-24 13:07:02 +00:00
allocationType := 0
2020-05-26 12:46:11 +00:00
var candidates []NodeStatus
if pool.TotalGPU == 0 {
return []NodeStatus{}
}
2020-06-30 08:49:35 +00:00
var ress []NodeStatus
2020-05-24 13:07:02 +00:00
2020-06-30 08:16:30 +00:00
loadRatio := float64(pool.UsingGPU) / float64(pool.TotalGPU)
2020-05-24 13:07:02 +00:00
/* first, choose sharable GPUs */
2020-07-06 02:29:22 +00:00
task := job.Tasks[0]
2020-07-03 13:39:40 +00:00
if len(job.Tasks) == 1 && task.NumberGPU == 1 && loadRatio >= config.EnableShareRatio {
2020-05-24 13:07:02 +00:00
// check sharable
allocationType = 1
2020-06-29 15:24:33 +00:00
pred := InstanceOfOptimizer().PredictReq(job, "Worker")
2020-06-30 08:16:30 +00:00
availables := map[string][]GPUStatus{}
2020-06-29 15:24:33 +00:00
for cur := start; ; {
if _, ok := locks[cur.ID]; !ok {
cur.Lock.Lock()
locks[cur.ID] = &cur.Lock
}
2020-05-24 13:07:02 +00:00
2020-06-29 15:24:33 +00:00
for _, node := range cur.Nodes {
var available []GPUStatus
for _, status := range node.Status {
if status.MemoryAllocated > 0 && status.MemoryTotal > task.MemoryGPU+status.MemoryAllocated {
pool.bindingsMu.Lock()
2020-06-29 15:24:33 +00:00
if jobs, ok := pool.bindings[status.UUID]; ok {
totalUtil := pred.UtilGPU
2020-06-30 08:16:30 +00:00
for _, job := range jobs {
utilT := InstanceOfOptimizer().PredictReq(job, "Worker").UtilGPU
totalUtil += utilT
2020-05-24 13:07:02 +00:00
}
if totalUtil < int(InstanceOfConfiguration().ShareMaxUtilization*100) {
2020-06-29 15:24:33 +00:00
available = append(available, status)
}
2020-05-24 13:07:02 +00:00
}
pool.bindingsMu.Unlock()
2020-05-24 13:07:02 +00:00
}
}
2020-06-29 15:24:33 +00:00
if len(available) >= task.NumberGPU {
candidates = append(candidates, *node)
2020-06-30 08:16:30 +00:00
availables[node.ClientHost] = available
2020-06-29 15:24:33 +00:00
if len(candidates) >= len(job.Tasks)*3+5 {
break
}
2020-05-24 13:07:02 +00:00
}
}
2020-06-29 15:24:33 +00:00
if len(candidates) >= len(job.Tasks)*3+5 {
break
}
if cur.ID > cur.Next.ID {
break
}
cur = cur.Next
2020-05-24 13:07:02 +00:00
}
2020-06-30 08:16:30 +00:00
if len(candidates) > 0 {
node := candidates[0]
2020-06-30 08:49:35 +00:00
res := NodeStatus{}
res.ClientID = node.ClientID
res.ClientHost = node.ClientHost
res.NumCPU = task.NumberCPU
res.MemTotal = task.Memory
res.Status = availables[node.ClientHost][0:task.NumberGPU]
for i := range res.Status {
pool.bindingsMu.Lock()
if jobsT, okT := pool.bindings[res.Status[i].UUID]; okT {
for jobT := range jobsT {
delete(pool.exclusiveJobs, jobT)
}
}
pool.bindingsMu.Unlock()
2020-06-30 08:16:30 +00:00
for j := range node.Status {
2020-06-30 08:49:35 +00:00
if res.Status[i].UUID == node.Status[j].UUID {
2020-06-30 08:16:30 +00:00
if node.Status[j].MemoryAllocated == 0 {
pool.UsingGPUMu.Lock()
pool.UsingGPU ++
pool.UsingGPUMu.Unlock()
}
node.Status[j].MemoryAllocated += task.MemoryGPU
2020-06-30 08:49:35 +00:00
res.Status[i].MemoryTotal = task.MemoryGPU
2020-06-30 08:16:30 +00:00
}
}
}
2020-06-30 08:49:35 +00:00
for _, t := range res.Status {
2020-06-30 08:16:30 +00:00
pool.attach(t.UUID, job)
}
2020-06-30 08:49:35 +00:00
ress = append(ress, res)
2020-06-30 08:16:30 +00:00
}
2020-05-24 13:07:02 +00:00
}
2020-06-29 15:24:33 +00:00
//log.Info(candidates)
2020-05-24 13:07:02 +00:00
/* second round, find vacant gpu */
if len(candidates) == 0 {
allocationType = 2
2020-05-25 11:29:35 +00:00
for cur := start; ; {
2020-05-24 13:07:02 +00:00
if _, ok := locks[cur.ID]; !ok {
cur.Lock.Lock()
locks[cur.ID] = &cur.Lock
}
for _, node := range cur.Nodes {
var available []GPUStatus
for _, status := range node.Status {
2020-05-27 05:33:40 +00:00
/* make sure GPU is not used by in-system and outer-system */
2020-05-30 08:53:00 +00:00
if status.MemoryAllocated == 0 { //} && status.MemoryUsed < 100 {
2020-05-24 13:07:02 +00:00
available = append(available, status)
}
}
if len(available) >= task.NumberGPU {
2020-05-26 12:46:11 +00:00
candidates = append(candidates, *node)
2020-05-24 13:07:02 +00:00
if len(candidates) >= len(job.Tasks)*3+5 {
break
}
}
}
if len(candidates) >= len(job.Tasks)*3+5 {
break
}
2020-05-25 11:29:35 +00:00
if cur.ID > cur.Next.ID {
2020-05-24 13:07:02 +00:00
break
}
2020-05-25 11:29:35 +00:00
cur = cur.Next
2020-05-24 13:07:02 +00:00
}
//log.Info(candidates)
}
/* third round, find gpu to be released */
2020-07-03 13:39:40 +00:00
if len(candidates) == 0 && len(job.Tasks) == 1 && task.NumberGPU == 1 {
2020-06-29 15:24:33 +00:00
estimate := InstanceOfOptimizer().PredictTime(job)
2020-07-02 13:07:42 +00:00
log.Debug(estimate)
2020-05-24 13:07:02 +00:00
2020-07-03 13:39:40 +00:00
if loadRatio >= config.EnablePreScheduleRatio {
2020-05-24 13:07:02 +00:00
allocationType = 3
2020-06-30 08:16:30 +00:00
availables := map[string][]GPUStatus{}
2020-05-25 11:29:35 +00:00
for cur := start; ; {
2020-05-24 13:07:02 +00:00
if _, ok := locks[cur.ID]; !ok {
cur.Lock.Lock()
locks[cur.ID] = &cur.Lock
}
for _, node := range cur.Nodes {
var available []GPUStatus
for _, status := range node.Status {
if jobs, ok := pool.bindings[status.UUID]; ok {
2020-06-30 08:16:30 +00:00
if len(jobs) > 1 || status.MemoryAllocated == 0 {
2020-05-24 13:07:02 +00:00
continue
}
2020-06-30 08:16:30 +00:00
for _, jobT := range jobs {
est := InstanceOfOptimizer().PredictTime(jobT)
now := time.Now().Unix()
if int(now-jobT.StartedAt) > est.Total-est.Post-estimate.Pre-InstanceOfConfiguration().PreScheduleExtraTime {
2020-06-29 15:24:33 +00:00
available = append(available, status)
2020-05-24 13:07:02 +00:00
}
}
}
}
if len(available) >= task.NumberGPU {
2020-05-26 12:46:11 +00:00
candidates = append(candidates, *node)
2020-06-30 08:16:30 +00:00
availables[node.ClientHost] = available
2020-05-24 13:07:02 +00:00
if len(candidates) >= len(job.Tasks)*3+5 {
break
}
}
}
if len(candidates) >= len(job.Tasks)*3+5 {
break
}
2020-05-25 11:29:35 +00:00
if cur.ID > cur.Next.ID {
break
}
cur = cur.Next
2020-05-24 13:07:02 +00:00
}
//log.Info(candidates)
2020-06-30 08:16:30 +00:00
if len(candidates) > 0 {
node := candidates[0]
2020-06-30 08:49:35 +00:00
res := NodeStatus{}
res.ClientID = node.ClientID
res.ClientHost = node.ClientHost
res.NumCPU = task.NumberCPU
res.MemTotal = task.Memory
res.Status = availables[node.ClientHost][0:task.NumberGPU]
for i := range res.Status {
2020-06-30 08:16:30 +00:00
for j := range node.Status {
2020-06-30 08:49:35 +00:00
if res.Status[i].UUID == node.Status[j].UUID {
2020-06-30 08:16:30 +00:00
if node.Status[j].MemoryAllocated == 0 {
pool.UsingGPUMu.Lock()
pool.UsingGPU ++
pool.UsingGPUMu.Unlock()
}
node.Status[j].MemoryAllocated += task.MemoryGPU
2020-06-30 08:49:35 +00:00
res.Status[i].MemoryTotal = task.MemoryGPU
2020-06-30 15:18:23 +00:00
/* being fully used, means ahead */
2020-07-06 02:29:22 +00:00
res.Status[i].MemoryUsed = res.Status[i].MemoryTotal
2020-06-30 08:16:30 +00:00
}
}
}
2020-06-30 08:49:35 +00:00
for _, t := range res.Status {
2020-06-30 08:16:30 +00:00
pool.attach(t.UUID, job)
}
2020-06-30 08:49:35 +00:00
ress = append(ress, res)
2020-06-30 08:16:30 +00:00
}
2020-05-24 13:07:02 +00:00
}
}
if len(candidates) > 0 {
log.Info("allocationType is ", allocationType)
//log.Info(candidates)
}
/* assign */
2020-06-30 08:49:35 +00:00
if len(candidates) > 0 && len(ress) == 0 {
2020-05-26 12:46:11 +00:00
var nodesT []NodeStatus
for _, node := range candidates {
nodesT = append(nodesT, node.Copy())
2020-05-24 13:07:02 +00:00
}
2020-05-27 05:33:40 +00:00
tasks := make([]Task, len(job.Tasks))
var tasksPS []Task
var tasksWorker []Task
for _, taskT := range job.Tasks {
if taskT.IsPS {
tasksPS = append(tasksPS, taskT)
} else {
tasksWorker = append(tasksWorker, taskT)
}
}
idxPS := 0
idxWorker := 0
factor := float64(len(tasksWorker)) / (float64(len(tasksPS)) + 0.001)
for i := range tasks {
if float64(idxPS)*factor <= float64(idxWorker) && idxPS < len(tasksPS) {
tasks[i] = tasksPS[idxPS]
idxPS++
} else if idxWorker < len(tasksWorker) {
tasks[i] = tasksWorker[idxWorker]
idxWorker++
} else {
tasks[i] = tasksPS[idxPS]
idxPS++
}
}
2020-06-10 13:24:00 +00:00
//log.Info(tasks, factor)
2020-05-27 10:04:05 +00:00
allocation := InstanceOfAllocator().allocate(nodesT, tasks)
2020-06-11 03:36:52 +00:00
//log.Info(allocation)
2020-05-26 12:57:22 +00:00
if allocation.Flags["valid"] {
for range job.Tasks { //append would cause uncertain order
ress = append(ress, NodeStatus{ClientID: "null"})
2020-05-26 12:46:11 +00:00
}
2020-06-15 06:38:31 +00:00
cnt := 0
2020-05-26 12:57:22 +00:00
for nodeID, tasks := range allocation.TasksOnNode {
var node *NodeStatus
for i := range candidates {
if candidates[i].ClientID == nodeID {
node = &candidates[i]
}
}
var available []GPUStatus
for _, gpu := range node.Status {
if gpu.MemoryAllocated == 0 {
available = append(available, gpu)
}
2020-05-26 12:46:11 +00:00
}
2020-05-26 12:57:22 +00:00
for _, task := range tasks {
2020-06-15 06:38:31 +00:00
cnt++
2020-05-26 12:57:22 +00:00
res := NodeStatus{}
res.ClientID = node.ClientID
res.ClientHost = node.ClientHost
res.NumCPU = task.NumberCPU
res.MemTotal = task.Memory
res.Status = available[0:task.NumberGPU]
available = available[task.NumberGPU:]
for i := range res.Status {
for j := range node.Status {
if res.Status[i].UUID == node.Status[j].UUID {
if node.Status[j].MemoryAllocated == 0 {
pool.UsingGPUMu.Lock()
pool.UsingGPU ++
pool.UsingGPUMu.Unlock()
}
node.Status[j].MemoryAllocated += task.MemoryGPU
res.Status[i].MemoryTotal = task.MemoryGPU
2020-05-26 12:46:11 +00:00
}
2020-05-24 13:07:02 +00:00
}
}
2020-05-26 12:57:22 +00:00
for _, t := range res.Status {
2020-06-30 08:16:30 +00:00
pool.attach(t.UUID, job)
2020-05-26 12:57:22 +00:00
}
2020-05-26 12:46:11 +00:00
2020-06-15 06:20:06 +00:00
flag := false
2020-05-26 12:57:22 +00:00
for i := range job.Tasks {
2020-06-11 02:02:02 +00:00
if job.Tasks[i].ID == task.ID {
2020-05-26 12:57:22 +00:00
ress[i] = res
2020-06-15 06:20:06 +00:00
flag = true
break
2020-05-26 12:57:22 +00:00
}
2020-05-26 12:46:11 +00:00
}
2020-06-15 06:20:06 +00:00
if !flag {
log.Warn("Unable to find task, ", res)
}
2020-05-26 12:46:11 +00:00
2020-05-26 12:57:22 +00:00
}
2020-05-24 13:07:02 +00:00
}
2020-05-26 12:46:11 +00:00
2020-06-15 06:38:31 +00:00
if cnt != len(job.Tasks) {
log.Warn("Allocation is invalid")
2020-06-15 06:40:03 +00:00
log.Warn(cnt, len(job.Tasks))
log.Warn(job.Tasks)
log.Warn(allocation.TasksOnNode)
2020-06-15 06:38:31 +00:00
}
2020-05-26 12:57:22 +00:00
}
2020-05-24 13:07:02 +00:00
}
pool.bindingsMu.Lock()
if allocationType == 2 {
pool.exclusiveJobs[job.Name] = true
}
pool.bindingsMu.Unlock()
2020-05-24 13:07:02 +00:00
for segID, lock := range locks {
log.Debug("Unlock ", segID)
lock.Unlock()
}
return ress
}
func (pool *ResourcePool) releaseResource(job Job, agent NodeStatus) {
segID := pool.getNodePool(agent.ClientID)
seg := pool.pools[segID]
if seg.Nodes == nil {
seg = *seg.Next
}
seg.Lock.Lock()
defer seg.Lock.Unlock()
2020-05-27 06:15:43 +00:00
node, ok := seg.Nodes[agent.ClientID]
2020-05-27 10:04:05 +00:00
/* in case node is offline */
2020-05-27 06:15:43 +00:00
if !ok {
/* TODO, update usingTotalGPU correctly */
2020-05-28 11:38:46 +00:00
log.Warn("node ", agent.ClientID, " not present")
2020-05-27 06:15:43 +00:00
return
}
2020-05-24 13:07:02 +00:00
for _, gpu := range agent.Status {
for j := range node.Status {
if gpu.UUID == node.Status[j].UUID {
node.Status[j].MemoryAllocated -= gpu.MemoryTotal
if node.Status[j].MemoryAllocated < 0 {
// in case of error
2020-05-28 13:28:18 +00:00
/* Case 0: a node is offline and then online, the allocation info will be lost */
2020-05-28 11:39:11 +00:00
log.Warn(node.ClientID, " UUID=", gpu.UUID, " More Memory Allocated")
2020-05-24 13:07:02 +00:00
node.Status[j].MemoryAllocated = 0
}
if node.Status[j].MemoryAllocated == 0 {
pool.UsingGPUMu.Lock()
pool.UsingGPU--
pool.UsingGPUMu.Unlock()
log.Info(node.Status[j].UUID, " is released")
}
//log.Info(node.Status[j].MemoryAllocated)
}
}
}
}
2020-07-11 06:04:20 +00:00
func (pool *ResourcePool) SetBatchEnabled(enabled bool) bool {
pool.enableBatch = enabled
log.Info("enableBatch is set to ", enabled)
2020-06-10 13:24:00 +00:00
return true
}
2020-06-11 03:36:52 +00:00
func (pool *ResourcePool) SetBatchInterval(interval int) bool {
if interval < 1 {
interval = 1
}
pool.batchInterval = interval
log.Info("batchInterval is updated to ", interval)
return true
}
func (pool *ResourcePool) isExclusive(jobName string) bool {
pool.bindingsMu.Lock()
defer pool.bindingsMu.Unlock()
_, ok := pool.exclusiveJobs[jobName]
/* clear after called */
delete(pool.exclusiveJobs, jobName)
return ok
}
func (pool *ResourcePool) Dump() map[string]interface{} {
2020-06-10 15:23:09 +00:00
res := map[string]interface{}{}
res["batchJobs"] = pool.batchJobs
res["bindings"] = pool.bindings
2020-06-10 15:23:09 +00:00
return res
}