mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 22:31:55 +00:00
update fair
This commit is contained in:
parent
c044d43490
commit
e0232784df
@ -81,3 +81,8 @@ GPU is occupied by which job(s)
|
|||||||
```
|
```
|
||||||
?action=allocator_update_strategy&strategy=bestfit
|
?action=allocator_update_strategy&strategy=bestfit
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**SchedulerDump**
|
||||||
|
```
|
||||||
|
?action=debug_scheduler_dump
|
||||||
|
```
|
@ -182,6 +182,13 @@ func serverAPI(w http.ResponseWriter, r *http.Request) {
|
|||||||
w.Write(js)
|
w.Write(js)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
case "debug_scheduler_dump":
|
||||||
|
log.Debug("debug_scheduler_dump")
|
||||||
|
js, _ := json.Marshal(scheduler.DebugDump())
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Write(js)
|
||||||
|
break
|
||||||
|
|
||||||
case "debug_update_parallelism":
|
case "debug_update_parallelism":
|
||||||
log.Debug("update_parallelism")
|
log.Debug("update_parallelism")
|
||||||
parallelism, _ := strconv.Atoi(r.URL.Query().Get("parallelism"))
|
parallelism, _ := strconv.Atoi(r.URL.Query().Get("parallelism"))
|
||||||
|
@ -28,4 +28,6 @@ type Scheduler interface {
|
|||||||
UpdateParallelism(parallelism int) bool
|
UpdateParallelism(parallelism int) bool
|
||||||
|
|
||||||
updateGroup(group Group) bool
|
updateGroup(group Group) bool
|
||||||
|
|
||||||
|
DebugDump() map[string]interface{}
|
||||||
}
|
}
|
||||||
|
@ -186,3 +186,8 @@ func (scheduler *SchedulerFCFS) UpdateParallelism(parallelism int) bool {
|
|||||||
func (scheduler *SchedulerFCFS) updateGroup(group Group) bool {
|
func (scheduler *SchedulerFCFS) updateGroup(group Group) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (scheduler *SchedulerFCFS) DebugDump() map[string]interface{} {
|
||||||
|
res := map[string]interface{}{}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
@ -413,3 +413,12 @@ func (scheduler *SchedulerCapacity) UpdateParallelism(parallelism int) bool {
|
|||||||
func (scheduler *SchedulerCapacity) updateGroup(group Group) bool {
|
func (scheduler *SchedulerCapacity) updateGroup(group Group) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (scheduler *SchedulerCapacity) DebugDump() map[string]interface{} {
|
||||||
|
res := map[string]interface{}{}
|
||||||
|
res["nextQueue"] = scheduler.nextQueue
|
||||||
|
res["schedulingJobs"] = scheduler.schedulingJobs
|
||||||
|
res["resourceAllocations"] = scheduler.resourceAllocations
|
||||||
|
res["allocatingGPU"] = scheduler.allocatingGPU
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
@ -30,10 +30,6 @@ type SchedulerFair struct {
|
|||||||
|
|
||||||
allocatingGPU int
|
allocatingGPU int
|
||||||
allocatingGPUMu sync.Mutex
|
allocatingGPUMu sync.Mutex
|
||||||
|
|
||||||
totalQuota ResourceCount
|
|
||||||
allocatedQuota ResourceCount
|
|
||||||
quotaMu sync.Mutex
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerFair) Start() {
|
func (scheduler *SchedulerFair) Start() {
|
||||||
@ -300,9 +296,7 @@ func (scheduler *SchedulerFair) UpdateQuota() {
|
|||||||
defer scheduler.queuesMu.Unlock()
|
defer scheduler.queuesMu.Unlock()
|
||||||
scheduler.queuesQuotaMu.Lock()
|
scheduler.queuesQuotaMu.Lock()
|
||||||
defer scheduler.queuesQuotaMu.Unlock()
|
defer scheduler.queuesQuotaMu.Unlock()
|
||||||
scheduler.quotaMu.Lock()
|
//log.Info("Updating queues quota~")
|
||||||
defer scheduler.quotaMu.Unlock()
|
|
||||||
log.Info("Updating queues quota~")
|
|
||||||
|
|
||||||
/* phase 1: DRF */
|
/* phase 1: DRF */
|
||||||
usingGPU := 0
|
usingGPU := 0
|
||||||
@ -320,6 +314,7 @@ func (scheduler *SchedulerFair) UpdateQuota() {
|
|||||||
pool := InstanceOfResourcePool()
|
pool := InstanceOfResourcePool()
|
||||||
|
|
||||||
available := pool.TotalGPU - usingGPU - allocatedGPU
|
available := pool.TotalGPU - usingGPU - allocatedGPU
|
||||||
|
/* <0 means some nodes exited */
|
||||||
if available <= 0 {
|
if available <= 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -470,3 +465,12 @@ func (scheduler *SchedulerFair) UpdateParallelism(parallelism int) bool {
|
|||||||
func (scheduler *SchedulerFair) updateGroup(group Group) bool {
|
func (scheduler *SchedulerFair) updateGroup(group Group) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (scheduler *SchedulerFair) DebugDump() map[string]interface{} {
|
||||||
|
res := map[string]interface{}{}
|
||||||
|
res["queuesQuota"] = scheduler.queuesQuota
|
||||||
|
res["schedulingJobs"] = scheduler.schedulingJobs
|
||||||
|
res["resourceAllocations"] = scheduler.resourceAllocations
|
||||||
|
res["allocatingGPU"] = scheduler.allocatingGPU
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
@ -209,3 +209,8 @@ func (scheduler *SchedulerPriority) UpdateParallelism(parallelism int) bool {
|
|||||||
func (scheduler *SchedulerPriority) updateGroup(group Group) bool {
|
func (scheduler *SchedulerPriority) updateGroup(group Group) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (scheduler *SchedulerPriority) DebugDump() map[string]interface{} {
|
||||||
|
res := map[string]interface{}{}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user