mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 22:31:55 +00:00
update fair
This commit is contained in:
parent
d2a67796dd
commit
bbb5fda362
@ -128,14 +128,17 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
|
|
||||||
/* phase 2: borrow */
|
/* phase 2: borrow */
|
||||||
if bestQueue == "" && scheduler.enableBorrow {
|
if bestQueue == "" && scheduler.enableBorrow {
|
||||||
|
/* calculate real quotas */
|
||||||
quotas := map[string]*ResourceCount{}
|
quotas := map[string]*ResourceCount{}
|
||||||
for queue, quota := range scheduler.queuesQuota {
|
for queue, quota := range scheduler.queuesQuota {
|
||||||
quotas[queue] = &ResourceCount{NumberGPU: quota.NumberGPU}
|
quotas[queue] = &ResourceCount{NumberGPU: quota.NumberGPU}
|
||||||
}
|
}
|
||||||
for _, IOUs := range scheduler.IOUs {
|
for q, IOUs := range scheduler.IOUs {
|
||||||
for queue, IOU := range IOUs {
|
for queue, IOU := range IOUs {
|
||||||
quota := quotas[queue]
|
quota := quotas[queue]
|
||||||
quota.NumberGPU += IOU.NumberGPU
|
quota.NumberGPU += IOU.NumberGPU
|
||||||
|
quota = quotas[q]
|
||||||
|
quota.NumberGPU -= IOU.NumberGPU
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* firstly, check if quota sum can run a job */
|
/* firstly, check if quota sum can run a job */
|
||||||
@ -145,6 +148,7 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
}
|
}
|
||||||
/* find job which is short of least resource */
|
/* find job which is short of least resource */
|
||||||
minRequestGPU := math.MaxInt32
|
minRequestGPU := math.MaxInt32
|
||||||
|
minNeedBorrow := math.MaxInt32
|
||||||
for queue, jobs := range scheduler.queues {
|
for queue, jobs := range scheduler.queues {
|
||||||
if len(jobs) == 0 {
|
if len(jobs) == 0 {
|
||||||
continue
|
continue
|
||||||
@ -169,18 +173,19 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
if bestQueue == "" || needGPU < minRequestGPU {
|
if bestQueue == "" || needGPU < minRequestGPU {
|
||||||
bestQueue = queue
|
bestQueue = queue
|
||||||
minRequestGPU = needGPU
|
minRequestGPU = needGPU
|
||||||
|
minNeedBorrow = numberGPUtmp*1000 - scheduler.queuesQuota[queue].NumberGPU
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if quota, ok := scheduler.queuesQuota[bestQueue]; ok {
|
if quota, ok := scheduler.queuesQuota[bestQueue]; ok {
|
||||||
totalGPU -= quota.NumberGPU
|
totalGPU -= quota.NumberGPU
|
||||||
}
|
}
|
||||||
/* if totalGPU can satisfy that job, start borrowing */
|
/* if totalGPU can satisfy that job, start borrowing */
|
||||||
if bestQueue != "" && totalGPU >= minRequestGPU {
|
if bestQueue != "" && totalGPU >= minNeedBorrow {
|
||||||
log.Info("start borrow phase")
|
log.Info("start borrow phase")
|
||||||
log.Info(bestQueue, ": ", "total=", totalGPU, " still need ", minRequestGPU)
|
log.Info(bestQueue, ": ", "total=", totalGPU, " still need ", minNeedBorrow)
|
||||||
for {
|
for {
|
||||||
/* if all satisfied, break */
|
/* if all satisfied, break */
|
||||||
if minRequestGPU <= 0 {
|
if minNeedBorrow <= 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
least := math.MaxInt32
|
least := math.MaxInt32
|
||||||
@ -192,8 +197,8 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
least = quota.NumberGPU
|
least = quota.NumberGPU
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if minRequestGPU < least*(len(scheduler.queuesQuota)-1) {
|
if minNeedBorrow < least*(len(scheduler.queuesQuota)-1) {
|
||||||
least = minRequestGPU / (len(scheduler.queuesQuota) - 1)
|
least = minNeedBorrow / (len(scheduler.queuesQuota) - 1)
|
||||||
}
|
}
|
||||||
/* start borrow */
|
/* start borrow */
|
||||||
for queue, quota := range scheduler.queuesQuota {
|
for queue, quota := range scheduler.queuesQuota {
|
||||||
@ -211,7 +216,7 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
IOU = scheduler.IOUs[bestQueue][queue]
|
IOU = scheduler.IOUs[bestQueue][queue]
|
||||||
}
|
}
|
||||||
IOU.NumberGPU += least
|
IOU.NumberGPU += least
|
||||||
minRequestGPU -= least
|
minNeedBorrow -= least
|
||||||
scheduler.queuesQuota[bestQueue].NumberGPU += least
|
scheduler.queuesQuota[bestQueue].NumberGPU += least
|
||||||
|
|
||||||
log.Info(bestQueue, " borrow ", least, " from ", queue)
|
log.Info(bestQueue, " borrow ", least, " from ", queue)
|
||||||
@ -222,10 +227,10 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
if queue == bestQueue || quota.NumberGPU == 0 {
|
if queue == bestQueue || quota.NumberGPU == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if quota.NumberGPU < minRequestGPU {
|
if quota.NumberGPU < minNeedBorrow {
|
||||||
least = quota.NumberGPU
|
least = quota.NumberGPU
|
||||||
} else {
|
} else {
|
||||||
least = minRequestGPU
|
least = minNeedBorrow
|
||||||
}
|
}
|
||||||
quota.NumberGPU -= least
|
quota.NumberGPU -= least
|
||||||
if _, ok := scheduler.IOUs[bestQueue]; !ok {
|
if _, ok := scheduler.IOUs[bestQueue]; !ok {
|
||||||
@ -238,8 +243,8 @@ func (scheduler *SchedulerFair) Start() {
|
|||||||
}
|
}
|
||||||
IOU.NumberGPU += least
|
IOU.NumberGPU += least
|
||||||
scheduler.queuesQuota[bestQueue].NumberGPU += least
|
scheduler.queuesQuota[bestQueue].NumberGPU += least
|
||||||
log.Info(bestQueue, " borrow ", minRequestGPU, " from ", queue, " now ", scheduler.queuesQuota[bestQueue].NumberGPU)
|
log.Info(bestQueue, " borrow ", minNeedBorrow, " from ", queue, " now ", scheduler.queuesQuota[bestQueue].NumberGPU)
|
||||||
minRequestGPU -= least
|
minNeedBorrow -= least
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user