mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 22:31:55 +00:00
update fair
This commit is contained in:
parent
c044d43490
commit
e0232784df
@ -81,3 +81,8 @@ GPU is occupied by which job(s)
|
||||
```
|
||||
?action=allocator_update_strategy&strategy=bestfit
|
||||
```
|
||||
|
||||
**SchedulerDump**
|
||||
```
|
||||
?action=debug_scheduler_dump
|
||||
```
|
@ -182,6 +182,13 @@ func serverAPI(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write(js)
|
||||
break
|
||||
|
||||
case "debug_scheduler_dump":
|
||||
log.Debug("debug_scheduler_dump")
|
||||
js, _ := json.Marshal(scheduler.DebugDump())
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(js)
|
||||
break
|
||||
|
||||
case "debug_update_parallelism":
|
||||
log.Debug("update_parallelism")
|
||||
parallelism, _ := strconv.Atoi(r.URL.Query().Get("parallelism"))
|
||||
|
@ -28,4 +28,6 @@ type Scheduler interface {
|
||||
UpdateParallelism(parallelism int) bool
|
||||
|
||||
updateGroup(group Group) bool
|
||||
|
||||
DebugDump() map[string]interface{}
|
||||
}
|
||||
|
@ -186,3 +186,8 @@ func (scheduler *SchedulerFCFS) UpdateParallelism(parallelism int) bool {
|
||||
func (scheduler *SchedulerFCFS) updateGroup(group Group) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (scheduler *SchedulerFCFS) DebugDump() map[string]interface{} {
|
||||
res := map[string]interface{}{}
|
||||
return res
|
||||
}
|
||||
|
@ -413,3 +413,12 @@ func (scheduler *SchedulerCapacity) UpdateParallelism(parallelism int) bool {
|
||||
func (scheduler *SchedulerCapacity) updateGroup(group Group) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (scheduler *SchedulerCapacity) DebugDump() map[string]interface{} {
|
||||
res := map[string]interface{}{}
|
||||
res["nextQueue"] = scheduler.nextQueue
|
||||
res["schedulingJobs"] = scheduler.schedulingJobs
|
||||
res["resourceAllocations"] = scheduler.resourceAllocations
|
||||
res["allocatingGPU"] = scheduler.allocatingGPU
|
||||
return res
|
||||
}
|
||||
|
@ -30,10 +30,6 @@ type SchedulerFair struct {
|
||||
|
||||
allocatingGPU int
|
||||
allocatingGPUMu sync.Mutex
|
||||
|
||||
totalQuota ResourceCount
|
||||
allocatedQuota ResourceCount
|
||||
quotaMu sync.Mutex
|
||||
}
|
||||
|
||||
func (scheduler *SchedulerFair) Start() {
|
||||
@ -300,9 +296,7 @@ func (scheduler *SchedulerFair) UpdateQuota() {
|
||||
defer scheduler.queuesMu.Unlock()
|
||||
scheduler.queuesQuotaMu.Lock()
|
||||
defer scheduler.queuesQuotaMu.Unlock()
|
||||
scheduler.quotaMu.Lock()
|
||||
defer scheduler.quotaMu.Unlock()
|
||||
log.Info("Updating queues quota~")
|
||||
//log.Info("Updating queues quota~")
|
||||
|
||||
/* phase 1: DRF */
|
||||
usingGPU := 0
|
||||
@ -320,6 +314,7 @@ func (scheduler *SchedulerFair) UpdateQuota() {
|
||||
pool := InstanceOfResourcePool()
|
||||
|
||||
available := pool.TotalGPU - usingGPU - allocatedGPU
|
||||
/* <0 means some nodes exited */
|
||||
if available <= 0 {
|
||||
return
|
||||
}
|
||||
@ -470,3 +465,12 @@ func (scheduler *SchedulerFair) UpdateParallelism(parallelism int) bool {
|
||||
func (scheduler *SchedulerFair) updateGroup(group Group) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (scheduler *SchedulerFair) DebugDump() map[string]interface{} {
|
||||
res := map[string]interface{}{}
|
||||
res["queuesQuota"] = scheduler.queuesQuota
|
||||
res["schedulingJobs"] = scheduler.schedulingJobs
|
||||
res["resourceAllocations"] = scheduler.resourceAllocations
|
||||
res["allocatingGPU"] = scheduler.allocatingGPU
|
||||
return res
|
||||
}
|
||||
|
@ -209,3 +209,8 @@ func (scheduler *SchedulerPriority) UpdateParallelism(parallelism int) bool {
|
||||
func (scheduler *SchedulerPriority) updateGroup(group Group) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (scheduler *SchedulerPriority) DebugDump() map[string]interface{} {
|
||||
res := map[string]interface{}{}
|
||||
return res
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user