mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 14:21:55 +00:00
update fair
This commit is contained in:
parent
d1a9bad295
commit
f6e0081d53
@ -309,12 +309,15 @@ func main() {
|
|||||||
case "FCFS":
|
case "FCFS":
|
||||||
scheduler = &SchedulerFCFS{}
|
scheduler = &SchedulerFCFS{}
|
||||||
break
|
break
|
||||||
case "fair":
|
|
||||||
scheduler = &SchedulerCapacity{}
|
|
||||||
break
|
|
||||||
case "priority":
|
case "priority":
|
||||||
scheduler = &SchedulerPriority{}
|
scheduler = &SchedulerPriority{}
|
||||||
break
|
break
|
||||||
|
case "capacity":
|
||||||
|
scheduler = &SchedulerCapacity{}
|
||||||
|
break
|
||||||
|
case "fair":
|
||||||
|
scheduler = &SchedulerFair{}
|
||||||
|
break
|
||||||
default:
|
default:
|
||||||
scheduler = &SchedulerFCFS{}
|
scheduler = &SchedulerFCFS{}
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@ type SchedulerCapacity struct {
|
|||||||
nextQueue string
|
nextQueue string
|
||||||
jobs map[string]*JobManager
|
jobs map[string]*JobManager
|
||||||
queues map[string][]Job
|
queues map[string][]Job
|
||||||
queueMu sync.Mutex
|
queuesMu sync.Mutex
|
||||||
|
|
||||||
schedulingJobs map[string]bool
|
schedulingJobs map[string]bool
|
||||||
schedulingMu sync.Mutex
|
schedulingMu sync.Mutex
|
||||||
@ -28,9 +28,6 @@ type SchedulerCapacity struct {
|
|||||||
|
|
||||||
allocatingGPU int
|
allocatingGPU int
|
||||||
allocatingGPUMu sync.Mutex
|
allocatingGPUMu sync.Mutex
|
||||||
|
|
||||||
queuesSchedulingCnt map[string]int
|
|
||||||
queuesUsingGPUMu sync.Mutex
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type FairJobSorter []Job
|
type FairJobSorter []Job
|
||||||
@ -46,7 +43,7 @@ func (s FairJobSorter) Less(i, j int) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerCapacity) Start() {
|
func (scheduler *SchedulerCapacity) Start() {
|
||||||
log.Info("JS started")
|
log.Info("JS (capacity) started")
|
||||||
|
|
||||||
scheduler.jobs = map[string]*JobManager{}
|
scheduler.jobs = map[string]*JobManager{}
|
||||||
scheduler.history = []*Job{}
|
scheduler.history = []*Job{}
|
||||||
@ -57,12 +54,10 @@ func (scheduler *SchedulerCapacity) Start() {
|
|||||||
scheduler.enabled = true
|
scheduler.enabled = true
|
||||||
scheduler.schedulingJobs = map[string]bool{}
|
scheduler.schedulingJobs = map[string]bool{}
|
||||||
scheduler.allocatingGPU = 0
|
scheduler.allocatingGPU = 0
|
||||||
scheduler.queuesSchedulingCnt = map[string]int{}
|
|
||||||
|
|
||||||
scheduler.parallelism = 1
|
scheduler.parallelism = 1
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
/* capacity scheduler */
|
|
||||||
flag := true
|
flag := true
|
||||||
for {
|
for {
|
||||||
log.Debug("Scheduling")
|
log.Debug("Scheduling")
|
||||||
@ -82,7 +77,7 @@ func (scheduler *SchedulerCapacity) Start() {
|
|||||||
}
|
}
|
||||||
scheduler.schedulingMu.Unlock()
|
scheduler.schedulingMu.Unlock()
|
||||||
|
|
||||||
scheduler.queueMu.Lock()
|
scheduler.queuesMu.Lock()
|
||||||
queue := scheduler.nextQueue
|
queue := scheduler.nextQueue
|
||||||
go func() {
|
go func() {
|
||||||
scheduler.UpdateNextQueue()
|
scheduler.UpdateNextQueue()
|
||||||
@ -99,7 +94,7 @@ func (scheduler *SchedulerCapacity) Start() {
|
|||||||
pool := InstanceOfResourcePool()
|
pool := InstanceOfResourcePool()
|
||||||
log.Info(cnt, pool.TotalGPU, pool.UsingGPU, scheduler.allocatingGPU)
|
log.Info(cnt, pool.TotalGPU, pool.UsingGPU, scheduler.allocatingGPU)
|
||||||
if len(scheduler.schedulingJobs) > 1 && (cnt*10+(scheduler.allocatingGPU)*13 > (pool.TotalGPU-pool.UsingGPU)*10) {
|
if len(scheduler.schedulingJobs) > 1 && (cnt*10+(scheduler.allocatingGPU)*13 > (pool.TotalGPU-pool.UsingGPU)*10) {
|
||||||
scheduler.queueMu.Unlock()
|
scheduler.queuesMu.Unlock()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,10 +114,6 @@ func (scheduler *SchedulerCapacity) Start() {
|
|||||||
scheduler.history = append(scheduler.history, &jm.job)
|
scheduler.history = append(scheduler.history, &jm.job)
|
||||||
scheduler.historyMu.Unlock()
|
scheduler.historyMu.Unlock()
|
||||||
|
|
||||||
scheduler.queuesUsingGPUMu.Lock()
|
|
||||||
scheduler.queuesSchedulingCnt[jm.job.Group]++
|
|
||||||
scheduler.queuesUsingGPUMu.Unlock()
|
|
||||||
|
|
||||||
scheduler.schedulingMu.Lock()
|
scheduler.schedulingMu.Lock()
|
||||||
scheduler.schedulingJobs[jm.job.Name] = true
|
scheduler.schedulingJobs[jm.job.Name] = true
|
||||||
scheduler.schedulingMu.Unlock()
|
scheduler.schedulingMu.Unlock()
|
||||||
@ -132,7 +123,7 @@ func (scheduler *SchedulerCapacity) Start() {
|
|||||||
} else {
|
} else {
|
||||||
log.Debug("No more jobs to scheduling ", time.Now())
|
log.Debug("No more jobs to scheduling ", time.Now())
|
||||||
}
|
}
|
||||||
scheduler.queueMu.Unlock()
|
scheduler.queuesMu.Unlock()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
@ -147,16 +138,6 @@ func (scheduler *SchedulerCapacity) UpdateProgress(job Job, state State) {
|
|||||||
|
|
||||||
switch state {
|
switch state {
|
||||||
case Running:
|
case Running:
|
||||||
scheduler.queuesUsingGPUMu.Lock()
|
|
||||||
if _, ok := scheduler.queuesSchedulingCnt[job.Group]; ok {
|
|
||||||
scheduler.queuesSchedulingCnt[job.Group]--
|
|
||||||
if scheduler.queuesSchedulingCnt[job.Group] < 0 {
|
|
||||||
scheduler.queuesSchedulingCnt[job.Group] = 0
|
|
||||||
log.Warn("scheduler.queuesSchedulingCnt less than 0", job.Group)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
scheduler.queuesUsingGPUMu.Unlock()
|
|
||||||
|
|
||||||
for i := range scheduler.history {
|
for i := range scheduler.history {
|
||||||
if scheduler.history[i].Name == job.Name {
|
if scheduler.history[i].Name == job.Name {
|
||||||
scheduler.history[i].Status = Running
|
scheduler.history[i].Status = Running
|
||||||
@ -192,8 +173,8 @@ func (scheduler *SchedulerCapacity) UpdateProgress(job Job, state State) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerCapacity) Schedule(job Job) {
|
func (scheduler *SchedulerCapacity) Schedule(job Job) {
|
||||||
scheduler.queueMu.Lock()
|
scheduler.queuesMu.Lock()
|
||||||
defer scheduler.queueMu.Unlock()
|
defer scheduler.queuesMu.Unlock()
|
||||||
|
|
||||||
queue := job.Group
|
queue := job.Group
|
||||||
_, ok := scheduler.queues[queue]
|
_, ok := scheduler.queues[queue]
|
||||||
@ -287,9 +268,9 @@ func (scheduler *SchedulerCapacity) ReleaseResource(job Job, agent NodeStatus) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerCapacity) QueryState(jobName string) MsgJobStatus {
|
func (scheduler *SchedulerCapacity) QueryState(jobName string) MsgJobStatus {
|
||||||
scheduler.queueMu.Lock()
|
scheduler.queuesMu.Lock()
|
||||||
jm, ok := scheduler.jobs[jobName]
|
jm, ok := scheduler.jobs[jobName]
|
||||||
scheduler.queueMu.Unlock()
|
scheduler.queuesMu.Unlock()
|
||||||
if !ok {
|
if !ok {
|
||||||
return MsgJobStatus{Code: 1, Error: "Job not exist!"}
|
return MsgJobStatus{Code: 1, Error: "Job not exist!"}
|
||||||
}
|
}
|
||||||
@ -297,9 +278,9 @@ func (scheduler *SchedulerCapacity) QueryState(jobName string) MsgJobStatus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerCapacity) Stop(jobName string) MsgStop {
|
func (scheduler *SchedulerCapacity) Stop(jobName string) MsgStop {
|
||||||
scheduler.queueMu.Lock()
|
scheduler.queuesMu.Lock()
|
||||||
jm, ok := scheduler.jobs[jobName]
|
jm, ok := scheduler.jobs[jobName]
|
||||||
scheduler.queueMu.Unlock()
|
scheduler.queuesMu.Unlock()
|
||||||
if !ok {
|
if !ok {
|
||||||
return MsgStop{Code: 1, Error: "Job not exist!"}
|
return MsgStop{Code: 1, Error: "Job not exist!"}
|
||||||
}
|
}
|
||||||
@ -307,9 +288,9 @@ func (scheduler *SchedulerCapacity) Stop(jobName string) MsgStop {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerCapacity) QueryLogs(jobName string, taskName string) MsgLog {
|
func (scheduler *SchedulerCapacity) QueryLogs(jobName string, taskName string) MsgLog {
|
||||||
scheduler.queueMu.Lock()
|
scheduler.queuesMu.Lock()
|
||||||
jm, ok := scheduler.jobs[jobName]
|
jm, ok := scheduler.jobs[jobName]
|
||||||
scheduler.queueMu.Unlock()
|
scheduler.queuesMu.Unlock()
|
||||||
if !ok {
|
if !ok {
|
||||||
return MsgLog{Code: 1, Error: "Job not exist!"}
|
return MsgLog{Code: 1, Error: "Job not exist!"}
|
||||||
}
|
}
|
||||||
@ -347,11 +328,11 @@ func (scheduler *SchedulerCapacity) Summary() MsgSummary {
|
|||||||
}
|
}
|
||||||
scheduler.historyMu.Unlock()
|
scheduler.historyMu.Unlock()
|
||||||
|
|
||||||
scheduler.queueMu.Lock()
|
scheduler.queuesMu.Lock()
|
||||||
for _, v := range scheduler.queues {
|
for _, v := range scheduler.queues {
|
||||||
tmp = append(tmp, v...)
|
tmp = append(tmp, v...)
|
||||||
}
|
}
|
||||||
scheduler.queueMu.Unlock()
|
scheduler.queuesMu.Unlock()
|
||||||
|
|
||||||
for _, job := range tmp {
|
for _, job := range tmp {
|
||||||
switch job.Status {
|
switch job.Status {
|
||||||
@ -383,7 +364,7 @@ func (scheduler *SchedulerCapacity) UpdateNextQueue() {
|
|||||||
|
|
||||||
NumberGPU := float64(InstanceOfResourcePool().TotalGPU) + 0.00001
|
NumberGPU := float64(InstanceOfResourcePool().TotalGPU) + 0.00001
|
||||||
|
|
||||||
scheduler.queueMu.Lock()
|
scheduler.queuesMu.Lock()
|
||||||
for k, t := range scheduler.queues {
|
for k, t := range scheduler.queues {
|
||||||
if len(t) == 0 {
|
if len(t) == 0 {
|
||||||
continue
|
continue
|
||||||
@ -407,7 +388,7 @@ func (scheduler *SchedulerCapacity) UpdateNextQueue() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
scheduler.nextQueue = next
|
scheduler.nextQueue = next
|
||||||
scheduler.queueMu.Unlock()
|
scheduler.queuesMu.Unlock()
|
||||||
log.Debug("updateNextQueue ->", next)
|
log.Debug("updateNextQueue ->", next)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -430,11 +411,5 @@ func (scheduler *SchedulerCapacity) UpdateParallelism(parallelism int) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerCapacity) updateGroup(group Group) bool {
|
func (scheduler *SchedulerCapacity) updateGroup(group Group) bool {
|
||||||
num := 0
|
|
||||||
for _, g := range InstanceOfGroupManager().List().Groups {
|
|
||||||
if g.Reserved {
|
|
||||||
num += g.NumGPU
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user