mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 14:21:55 +00:00
update fair
This commit is contained in:
parent
4a91d71c26
commit
911796da2c
@ -911,6 +911,7 @@ func (pool *ResourcePool) releaseResource(job Job, agent NodeStatus) {
|
|||||||
/* in case node is offline */
|
/* in case node is offline */
|
||||||
if !ok {
|
if !ok {
|
||||||
/* TODO, update usingTotalGPU correctly */
|
/* TODO, update usingTotalGPU correctly */
|
||||||
|
log.Warn("node ", agent.ClientID, " not present")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, gpu := range agent.Status {
|
for _, gpu := range agent.Status {
|
||||||
|
@ -250,19 +250,20 @@ func (scheduler *SchedulerCapacity) AcquireResource(job Job) []NodeStatus {
|
|||||||
|
|
||||||
func (scheduler *SchedulerCapacity) ReleaseResource(job Job, agent NodeStatus) {
|
func (scheduler *SchedulerCapacity) ReleaseResource(job Job, agent NodeStatus) {
|
||||||
InstanceOfResourcePool().releaseResource(job, agent)
|
InstanceOfResourcePool().releaseResource(job, agent)
|
||||||
|
|
||||||
|
scheduler.resourceAllocationsMu.Lock()
|
||||||
|
if _, ok := scheduler.resourceAllocations[job.Group]; !ok {
|
||||||
|
scheduler.resourceAllocations[job.Group] = &ResourceCount{}
|
||||||
|
}
|
||||||
|
cnt, _ := scheduler.resourceAllocations[job.Group]
|
||||||
|
cnt.CPU -= agent.MemTotal
|
||||||
|
cnt.Memory -= agent.NumCPU
|
||||||
|
for _, v := range agent.Status {
|
||||||
|
cnt.NumberGPU --
|
||||||
|
cnt.MemoryGPU -= v.MemoryTotal
|
||||||
|
}
|
||||||
|
scheduler.resourceAllocationsMu.Unlock()
|
||||||
go func(res NodeStatus) {
|
go func(res NodeStatus) {
|
||||||
scheduler.resourceAllocationsMu.Lock()
|
|
||||||
if _, ok := scheduler.resourceAllocations[job.Group]; !ok {
|
|
||||||
scheduler.resourceAllocations[job.Group] = &ResourceCount{}
|
|
||||||
}
|
|
||||||
cnt, _ := scheduler.resourceAllocations[job.Group]
|
|
||||||
cnt.CPU -= res.MemTotal
|
|
||||||
cnt.Memory -= res.NumCPU
|
|
||||||
for _, v := range res.Status {
|
|
||||||
cnt.NumberGPU --
|
|
||||||
cnt.MemoryGPU -= v.MemoryTotal
|
|
||||||
}
|
|
||||||
scheduler.resourceAllocationsMu.Unlock()
|
|
||||||
scheduler.UpdateNextQueue()
|
scheduler.UpdateNextQueue()
|
||||||
}(agent)
|
}(agent)
|
||||||
}
|
}
|
||||||
|
@ -276,17 +276,17 @@ func (scheduler *SchedulerFair) AcquireResource(job Job) []NodeStatus {
|
|||||||
|
|
||||||
func (scheduler *SchedulerFair) ReleaseResource(job Job, agent NodeStatus) {
|
func (scheduler *SchedulerFair) ReleaseResource(job Job, agent NodeStatus) {
|
||||||
InstanceOfResourcePool().releaseResource(job, agent)
|
InstanceOfResourcePool().releaseResource(job, agent)
|
||||||
go func(res NodeStatus) {
|
|
||||||
scheduler.resourceAllocationsMu.Lock()
|
scheduler.resourceAllocationsMu.Lock()
|
||||||
if _, ok := scheduler.resourceAllocations[job.Group]; !ok {
|
if _, ok := scheduler.resourceAllocations[job.Group]; !ok {
|
||||||
scheduler.resourceAllocations[job.Group] = &ResourceCount{}
|
scheduler.resourceAllocations[job.Group] = &ResourceCount{}
|
||||||
}
|
}
|
||||||
cnt, _ := scheduler.resourceAllocations[job.Group]
|
cnt, _ := scheduler.resourceAllocations[job.Group]
|
||||||
cnt.CPU -= res.NumCPU
|
cnt.CPU -= agent.NumCPU
|
||||||
cnt.Memory -= res.MemTotal
|
cnt.Memory -= agent.MemTotal
|
||||||
cnt.NumberGPU -= len(res.Status)
|
cnt.NumberGPU -= len(agent.Status)
|
||||||
scheduler.resourceAllocationsMu.Unlock()
|
scheduler.resourceAllocationsMu.Unlock()
|
||||||
}(agent)
|
|
||||||
go func() {
|
go func() {
|
||||||
scheduler.UpdateQuota()
|
scheduler.UpdateQuota()
|
||||||
}()
|
}()
|
||||||
|
Loading…
Reference in New Issue
Block a user