mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 22:31:55 +00:00
update fair
This commit is contained in:
parent
b99e66a4ca
commit
41242024b6
@ -189,7 +189,9 @@ func (jm *JobManager) checkStatus(status []TaskStatus) {
|
|||||||
}
|
}
|
||||||
//InstanceJobHistoryLogger().submitTaskStatus(jm.job.Name, status[i])
|
//InstanceJobHistoryLogger().submitTaskStatus(jm.job.Name, status[i])
|
||||||
} else {
|
} else {
|
||||||
|
jm.resourcesMu.Lock()
|
||||||
if jm.resources[i].ClientID == "_released_" {
|
if jm.resources[i].ClientID == "_released_" {
|
||||||
|
jm.resourcesMu.Unlock()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
log.Info(jm.job.Name, "-", i, " ", status[i].Status)
|
log.Info(jm.job.Name, "-", i, " ", status[i].Status)
|
||||||
@ -200,7 +202,6 @@ func (jm *JobManager) checkStatus(status []TaskStatus) {
|
|||||||
jm.scheduler.UpdateProgress(jm.job, Failed)
|
jm.scheduler.UpdateProgress(jm.job, Failed)
|
||||||
}
|
}
|
||||||
|
|
||||||
jm.resourcesMu.Lock()
|
|
||||||
if jm.resources[i].ClientID != "_released_" {
|
if jm.resources[i].ClientID != "_released_" {
|
||||||
jm.scheduler.ReleaseResource(jm.job, jm.resources[i])
|
jm.scheduler.ReleaseResource(jm.job, jm.resources[i])
|
||||||
log.Info("return resource ", jm.resources[i].ClientID)
|
log.Info("return resource ", jm.resources[i].ClientID)
|
||||||
|
@ -128,6 +128,16 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
|
|
||||||
/* phase 2: borrow */
|
/* phase 2: borrow */
|
||||||
if bestQueue == "" && scheduler.enableBorrow {
|
if bestQueue == "" && scheduler.enableBorrow {
|
||||||
|
quotas := map[string]ResourceCount{}
|
||||||
|
for queue, quota := range scheduler.queuesQuota {
|
||||||
|
quotas[queue] = ResourceCount{NumberGPU: quota.NumberGPU}
|
||||||
|
}
|
||||||
|
for _, IOUs := range scheduler.IOUs {
|
||||||
|
for queue, IOU := range IOUs {
|
||||||
|
quota := quotas[queue]
|
||||||
|
quota.NumberGPU += IOU.NumberGPU
|
||||||
|
}
|
||||||
|
}
|
||||||
/* firstly, check if quota sum can run a job */
|
/* firstly, check if quota sum can run a job */
|
||||||
totalGPU := 0
|
totalGPU := 0
|
||||||
for _, quota := range scheduler.queuesQuota {
|
for _, quota := range scheduler.queuesQuota {
|
||||||
@ -151,7 +161,7 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
needGPU := numberGPUtmp*1000 - scheduler.queuesQuota[queue].NumberGPU
|
needGPU := numberGPUtmp*1000 - quotas[queue].NumberGPU
|
||||||
/* the less, the better */
|
/* the less, the better */
|
||||||
if bestQueue == "" || needGPU < minRequestGPU {
|
if bestQueue == "" || needGPU < minRequestGPU {
|
||||||
bestQueue = queue
|
bestQueue = queue
|
||||||
@ -167,7 +177,7 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
log.Info(bestQueue, ": ", "total=", totalGPU, " still need ", minRequestGPU)
|
log.Info(bestQueue, ": ", "total=", totalGPU, " still need ", minRequestGPU)
|
||||||
for {
|
for {
|
||||||
/* if all satisfied, break */
|
/* if all satisfied, break */
|
||||||
if minRequestGPU == 0 {
|
if minRequestGPU <= 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
least := math.MaxInt32
|
least := math.MaxInt32
|
||||||
|
Loading…
Reference in New Issue
Block a user