mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-12-15 08:16:43 +00:00
update
This commit is contained in:
@@ -38,6 +38,9 @@ type ResourcePool struct {
|
||||
|
||||
TotalGPU int
|
||||
TotalGPUMu sync.Mutex
|
||||
|
||||
subscriptions map[string]map[string]int
|
||||
subscriptionsMu sync.Mutex
|
||||
}
|
||||
|
||||
func (pool *ResourcePool) start() {
|
||||
@@ -50,6 +53,8 @@ func (pool *ResourcePool) start() {
|
||||
pool.bindings = map[string]map[string]int{}
|
||||
pool.utils = map[string][]UtilGPUTimeSeries{}
|
||||
|
||||
pool.subscriptions = map[string]map[string]int{}
|
||||
|
||||
pool.TotalGPU = 0
|
||||
|
||||
/* init pools */
|
||||
@@ -233,6 +238,8 @@ func (pool *ResourcePool) update(node NodeStatus) {
|
||||
|
||||
/* init bindings */
|
||||
go func(node NodeStatus) {
|
||||
pool.subscriptionsMu.Lock()
|
||||
defer pool.subscriptionsMu.Unlock()
|
||||
pool.bindingsMu.Lock()
|
||||
defer pool.bindingsMu.Unlock()
|
||||
for _, gpu := range node.Status {
|
||||
@@ -242,6 +249,12 @@ func (pool *ResourcePool) update(node NodeStatus) {
|
||||
UtilGPUTimeSeries{Time: (int)(time.Now().Unix()), Util: gpu.UtilizationGPU})
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := pool.subscriptions[gpu.UUID]; ok {
|
||||
for jobName := range pool.subscriptions[gpu.UUID] {
|
||||
scheduler.QueryState(jobName)
|
||||
}
|
||||
}
|
||||
}
|
||||
pool.heartBeatMu.Lock()
|
||||
pool.heartBeat[node.ClientID] = time.Now()
|
||||
@@ -438,8 +451,16 @@ func (pool *ResourcePool) releaseNetwork(network string) {
|
||||
}
|
||||
|
||||
func (pool *ResourcePool) attach(GPU string, job string) {
|
||||
pool.subscriptionsMu.Lock()
|
||||
defer pool.subscriptionsMu.Unlock()
|
||||
pool.bindingsMu.Lock()
|
||||
defer pool.bindingsMu.Unlock()
|
||||
|
||||
if _, ok := pool.subscriptions[GPU]; !ok {
|
||||
pool.subscriptions[GPU] = map[string]int{}
|
||||
}
|
||||
pool.subscriptions[GPU][job] = int(time.Now().Unix())
|
||||
|
||||
if _, ok := pool.bindings[GPU]; !ok {
|
||||
pool.bindings[GPU] = map[string]int{}
|
||||
}
|
||||
@@ -455,8 +476,15 @@ func (pool *ResourcePool) attach(GPU string, job string) {
|
||||
}
|
||||
|
||||
func (pool *ResourcePool) detach(GPU string, job Job) {
|
||||
pool.subscriptionsMu.Lock()
|
||||
defer pool.subscriptionsMu.Unlock()
|
||||
pool.bindingsMu.Lock()
|
||||
defer pool.bindingsMu.Unlock()
|
||||
|
||||
if _, ok := pool.subscriptions[GPU]; ok {
|
||||
delete(pool.subscriptions[GPU], job.Name)
|
||||
}
|
||||
|
||||
if _, ok := pool.bindings[GPU]; ok {
|
||||
if _, ok2 := pool.utils[GPU]; ok2 {
|
||||
if len(pool.bindings[GPU]) == 1 && job.Status != Failed && job.Status != Stopped {
|
||||
|
||||
Reference in New Issue
Block a user