mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 14:21:55 +00:00
robust, make sure allocatinngGPU is decreased even if job is killed before requesting resource
This commit is contained in:
parent
ee34e09f7f
commit
71ab7bbb75
@ -367,7 +367,11 @@ func (scheduler *SchedulerFair) UpdateProgress(job Job, state State) {
|
|||||||
defer scheduler.historyMu.Unlock()
|
defer scheduler.historyMu.Unlock()
|
||||||
|
|
||||||
scheduler.schedulingMu.Lock()
|
scheduler.schedulingMu.Lock()
|
||||||
delete(scheduler.schedulingJobs, job.Name)
|
if _, ok := scheduler.schedulingJobs[job.Name]; ok {
|
||||||
|
delete(scheduler.schedulingJobs, job.Name)
|
||||||
|
scheduler.allocatingGPU -= job.NumberGPU
|
||||||
|
log.Info("allocatingGPU is ", scheduler.allocatingGPU)
|
||||||
|
}
|
||||||
scheduler.schedulingMu.Unlock()
|
scheduler.schedulingMu.Unlock()
|
||||||
|
|
||||||
switch state {
|
switch state {
|
||||||
@ -456,15 +460,6 @@ func (scheduler *SchedulerFair) Schedule(job Job) {
|
|||||||
|
|
||||||
func (scheduler *SchedulerFair) AcquireResource(job Job) []NodeStatus {
|
func (scheduler *SchedulerFair) AcquireResource(job Job) []NodeStatus {
|
||||||
res := InstanceOfResourcePool().acquireResource(job)
|
res := InstanceOfResourcePool().acquireResource(job)
|
||||||
if len(res) != 0 {
|
|
||||||
for _, task := range job.Tasks {
|
|
||||||
|
|
||||||
scheduler.allocatingGPUMu.Lock()
|
|
||||||
scheduler.allocatingGPU -= task.NumberGPU
|
|
||||||
scheduler.allocatingGPUMu.Unlock()
|
|
||||||
}
|
|
||||||
log.Info("allocatingGPU is ", scheduler.allocatingGPU)
|
|
||||||
}
|
|
||||||
go func() {
|
go func() {
|
||||||
scheduler.UpdateQuota()
|
scheduler.UpdateQuota()
|
||||||
}()
|
}()
|
||||||
|
Loading…
Reference in New Issue
Block a user