mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 14:21:55 +00:00
update fair
This commit is contained in:
parent
fb56a936df
commit
b168389996
@ -30,18 +30,6 @@ type SchedulerCapacity struct {
|
|||||||
allocatingGPUMu sync.Mutex
|
allocatingGPUMu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
type FairJobSorter []Job
|
|
||||||
|
|
||||||
func (s FairJobSorter) Len() int {
|
|
||||||
return len(s)
|
|
||||||
}
|
|
||||||
func (s FairJobSorter) Swap(i, j int) {
|
|
||||||
s[i], s[j] = s[j], s[i]
|
|
||||||
}
|
|
||||||
func (s FairJobSorter) Less(i, j int) bool {
|
|
||||||
return s[i].CreatedAt < s[j].CreatedAt
|
|
||||||
}
|
|
||||||
|
|
||||||
func (scheduler *SchedulerCapacity) Start() {
|
func (scheduler *SchedulerCapacity) Start() {
|
||||||
log.Info("JS (capacity) started")
|
log.Info("JS (capacity) started")
|
||||||
|
|
||||||
@ -309,7 +297,7 @@ func (scheduler *SchedulerCapacity) ListJobs() MsgJobList {
|
|||||||
for _, v := range scheduler.queues {
|
for _, v := range scheduler.queues {
|
||||||
tmp = append(tmp, v...)
|
tmp = append(tmp, v...)
|
||||||
}
|
}
|
||||||
sort.Sort(FairJobSorter(tmp))
|
sort.Sort(JobSorter(tmp))
|
||||||
jobs = append(jobs, tmp...)
|
jobs = append(jobs, tmp...)
|
||||||
return MsgJobList{Code: 0, Jobs: jobs}
|
return MsgJobList{Code: 0, Jobs: jobs}
|
||||||
}
|
}
|
||||||
|
@ -674,7 +674,7 @@ func (scheduler *SchedulerFair) ListJobs() MsgJobList {
|
|||||||
for _, v := range scheduler.queues {
|
for _, v := range scheduler.queues {
|
||||||
tmp = append(tmp, v...)
|
tmp = append(tmp, v...)
|
||||||
}
|
}
|
||||||
sort.Sort(FairJobSorter(tmp))
|
sort.Sort(JobSorter(tmp))
|
||||||
jobs = append(jobs, tmp...)
|
jobs = append(jobs, tmp...)
|
||||||
return MsgJobList{Code: 0, Jobs: jobs}
|
return MsgJobList{Code: 0, Jobs: jobs}
|
||||||
}
|
}
|
||||||
|
@ -4,13 +4,18 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
"sort"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SchedulerPriority struct {
|
type SchedulerPriority struct {
|
||||||
history []*Job
|
history []*Job
|
||||||
queue []Job
|
historyMu sync.Mutex
|
||||||
mu sync.Mutex
|
|
||||||
scheduling sync.Mutex
|
queue []Job
|
||||||
|
queueMu sync.Mutex
|
||||||
|
|
||||||
|
schedulingJobs map[string]bool
|
||||||
|
schedulingMu sync.Mutex
|
||||||
|
|
||||||
jobs map[string]*JobManager
|
jobs map[string]*JobManager
|
||||||
enabled bool
|
enabled bool
|
||||||
@ -21,40 +26,87 @@ func (scheduler *SchedulerPriority) Start() {
|
|||||||
scheduler.jobs = map[string]*JobManager{}
|
scheduler.jobs = map[string]*JobManager{}
|
||||||
scheduler.history = []*Job{}
|
scheduler.history = []*Job{}
|
||||||
scheduler.enabled = true
|
scheduler.enabled = true
|
||||||
|
scheduler.parallelism = 1
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
flag := true
|
||||||
for {
|
for {
|
||||||
log.Info("Scheduling")
|
log.Debug("Scheduling")
|
||||||
time.Sleep(time.Second * 5)
|
if !flag { /* no more job */
|
||||||
scheduler.scheduling.Lock()
|
time.Sleep(time.Millisecond * 100)
|
||||||
scheduler.mu.Lock()
|
}
|
||||||
|
flag = false
|
||||||
|
scheduler.schedulingMu.Lock()
|
||||||
|
if len(scheduler.schedulingJobs) >= scheduler.parallelism {
|
||||||
|
scheduler.schedulingMu.Unlock()
|
||||||
|
time.Sleep(time.Millisecond * 100)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
scheduler.schedulingMu.Unlock()
|
||||||
|
|
||||||
|
scheduler.queueMu.Lock()
|
||||||
if len(scheduler.queue) > 0 {
|
if len(scheduler.queue) > 0 {
|
||||||
|
|
||||||
jm := JobManager{}
|
numberGPU := 0
|
||||||
jm.job = scheduler.queue[0]
|
for _, task := range scheduler.queue[0].Tasks {
|
||||||
scheduler.queue = scheduler.queue[1:]
|
numberGPU += task.NumberGPU
|
||||||
jm.scheduler = scheduler
|
}
|
||||||
scheduler.jobs[jm.job.Name] = &jm
|
if numberGPU <= (InstanceOfResourcePool().TotalGPU - InstanceOfResourcePool().UsingGPU) {
|
||||||
|
|
||||||
jm.job.Status = Starting
|
jm := JobManager{}
|
||||||
scheduler.history = append(scheduler.history, &jm.job)
|
jm.job = scheduler.queue[0]
|
||||||
|
scheduler.queue = scheduler.queue[1:]
|
||||||
|
jm.scheduler = scheduler
|
||||||
|
scheduler.jobs[jm.job.Name] = &jm
|
||||||
|
|
||||||
go func() {
|
jm.job.Status = Starting
|
||||||
jm.start()
|
scheduler.historyMu.Lock()
|
||||||
}()
|
scheduler.history = append(scheduler.history, &jm.job)
|
||||||
} else {
|
scheduler.historyMu.Unlock()
|
||||||
scheduler.scheduling.Unlock()
|
|
||||||
|
go func() {
|
||||||
|
jm.start()
|
||||||
|
}()
|
||||||
|
} else {
|
||||||
|
/* start preempt */
|
||||||
|
var jobs []Job
|
||||||
|
lowest := scheduler.queue[0].Priority
|
||||||
|
scheduler.historyMu.Lock()
|
||||||
|
for _, job := range scheduler.history {
|
||||||
|
if job.Priority < lowest {
|
||||||
|
jobs = []Job{*job}
|
||||||
|
lowest = job.Priority
|
||||||
|
} else if job.Priority == lowest {
|
||||||
|
jobs = append(jobs, *job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scheduler.historyMu.Unlock()
|
||||||
|
sort.Sort(JobSorter(jobs))
|
||||||
|
if len(jobs) > 0 {
|
||||||
|
job := jobs[0]
|
||||||
|
log.Info("Start preempt ", job.Name)
|
||||||
|
scheduler.Stop(job.Name)
|
||||||
|
scheduler.Schedule(job)
|
||||||
|
|
||||||
|
/* Remove from history */
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
scheduler.mu.Unlock()
|
scheduler.queueMu.Unlock()
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerPriority) UpdateProgress(job Job, state State) {
|
func (scheduler *SchedulerPriority) UpdateProgress(job Job, state State) {
|
||||||
|
scheduler.historyMu.Lock()
|
||||||
|
defer scheduler.historyMu.Unlock()
|
||||||
|
|
||||||
|
scheduler.schedulingMu.Lock()
|
||||||
|
delete(scheduler.schedulingJobs, job.Name)
|
||||||
|
scheduler.schedulingMu.Unlock()
|
||||||
|
|
||||||
switch state {
|
switch state {
|
||||||
case Running:
|
case Running:
|
||||||
scheduler.scheduling.Unlock()
|
|
||||||
|
|
||||||
for i := range scheduler.history {
|
for i := range scheduler.history {
|
||||||
if scheduler.history[i].Name == job.Name {
|
if scheduler.history[i].Name == job.Name {
|
||||||
scheduler.history[i].Status = Running
|
scheduler.history[i].Status = Running
|
||||||
@ -75,12 +127,20 @@ func (scheduler *SchedulerPriority) UpdateProgress(job Job, state State) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
|
case Failed:
|
||||||
|
for i := range scheduler.history {
|
||||||
|
if scheduler.history[i].Name == job.Name {
|
||||||
|
scheduler.history[i].Status = Failed
|
||||||
|
scheduler.history[i].UpdatedAt = int(time.Now().Unix())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (scheduler *SchedulerPriority) Schedule(job Job) {
|
func (scheduler *SchedulerPriority) Schedule(job Job) {
|
||||||
scheduler.mu.Lock()
|
scheduler.queueMu.Lock()
|
||||||
defer scheduler.mu.Unlock()
|
defer scheduler.queueMu.Unlock()
|
||||||
|
|
||||||
index := 0
|
index := 0
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user