1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-06-08 06:41:56 +00:00
YAO-scheduler/src/AllocatorFIFO.go

197 lines
4.1 KiB
Go
Raw Normal View History

2019-03-20 03:14:07 +00:00
package main
import (
"sync"
"time"
2019-05-13 08:31:26 +00:00
)
2019-03-20 03:14:07 +00:00
type AllocatorFIFO struct {
2019-03-25 07:36:30 +00:00
history []*Job
queue []Job
mu sync.Mutex
2019-03-20 03:14:07 +00:00
scheduling sync.Mutex
2019-03-25 07:36:30 +00:00
jobs map[string]*JobManager
2019-03-20 03:14:07 +00:00
}
func (allocator *AllocatorFIFO) start() {
2019-03-25 07:36:30 +00:00
allocator.jobs = map[string]*JobManager{}
allocator.history = []*Job{}
2019-03-20 03:14:07 +00:00
go func() {
for {
//fmt.Print("Scheduling ")
time.Sleep(time.Second * 5)
allocator.scheduling.Lock()
allocator.mu.Lock()
if len(allocator.queue) > 0 {
jm := JobManager{}
jm.job = allocator.queue[0]
allocator.queue = allocator.queue[1:]
jm.allocator = allocator
2019-03-25 07:36:30 +00:00
allocator.jobs[jm.job.Name] = &jm
for i := range allocator.history {
if allocator.history[i].Name == jm.job.Name {
allocator.history[i].Status = Starting
}
}
2019-03-20 03:14:07 +00:00
go func() {
jm.start()
}()
} else {
allocator.scheduling.Unlock()
}
allocator.mu.Unlock()
}
}()
}
2019-03-25 07:36:30 +00:00
func (allocator *AllocatorFIFO) ack(job *Job) {
2019-03-20 03:14:07 +00:00
allocator.scheduling.Unlock()
2019-04-12 09:21:09 +00:00
}
func (allocator *AllocatorFIFO) running(job *Job) {
2019-03-25 07:36:30 +00:00
for i := range allocator.history {
if allocator.history[i].Name == job.Name {
allocator.history[i].Status = Running
}
}
}
func (allocator *AllocatorFIFO) finish(job *Job) {
for i := range allocator.history {
if allocator.history[i].Name == job.Name {
allocator.history[i].Status = Finished
}
}
2019-03-20 03:14:07 +00:00
}
func (allocator *AllocatorFIFO) schedule(job Job) {
allocator.mu.Lock()
defer allocator.mu.Unlock()
allocator.queue = append(allocator.queue, job)
2019-03-25 07:36:30 +00:00
allocator.history = append(allocator.history, &job)
2019-03-20 03:14:07 +00:00
}
2019-04-16 08:59:19 +00:00
func (allocator *AllocatorFIFO) requestResource(task Task) NodeStatus {
2019-03-20 03:14:07 +00:00
pool.mu.Lock()
defer pool.mu.Unlock()
2019-04-16 08:59:19 +00:00
res := NodeStatus{}
2019-03-20 03:14:07 +00:00
for id, node := range pool.nodes {
2019-04-16 08:59:19 +00:00
var available []GPUStatus
for _, status := range node.Status {
2019-05-13 08:31:26 +00:00
if status.MemoryTotal-status.MemoryAllocated >= task.MemoryGPU {
2019-03-20 03:14:07 +00:00
available = append(available, status)
}
}
if len(available) >= task.NumberGPU {
res.ClientID = id
2019-04-16 08:59:19 +00:00
res.ClientHost = node.ClientHost
2019-03-20 03:14:07 +00:00
res.Status = available[0:task.NumberGPU]
for i := range res.Status {
2019-04-16 08:59:19 +00:00
for j := range node.Status {
if res.Status[i].UUID == node.Status[j].UUID {
2019-05-13 08:31:26 +00:00
node.Status[j].MemoryAllocated += task.MemoryGPU
res.Status[i].MemoryTotal = task.MemoryGPU
2019-03-20 03:14:07 +00:00
}
}
}
2019-04-29 12:35:05 +00:00
break
2019-03-20 03:14:07 +00:00
}
}
return res
}
2019-04-16 08:59:19 +00:00
func (allocator *AllocatorFIFO) returnResource(agent NodeStatus) {
2019-03-20 03:14:07 +00:00
pool.mu.Lock()
defer pool.mu.Unlock()
nodes := pool.nodes[agent.ClientID]
for _, gpu := range agent.Status {
2019-04-16 08:59:19 +00:00
for j := range nodes.Status {
if gpu.UUID == nodes.Status[j].UUID {
2019-05-13 08:31:26 +00:00
nodes.Status[j].MemoryAllocated -= gpu.MemoryTotal
2019-03-20 03:14:07 +00:00
}
}
}
}
func (allocator *AllocatorFIFO) status(jobName string) MsgJobStatus {
2019-03-25 07:36:30 +00:00
jm, ok := allocator.jobs[jobName]
if !ok {
return MsgJobStatus{Code: 1, Error: "Job not exist!"}
2019-03-20 03:14:07 +00:00
}
2019-03-25 07:36:30 +00:00
return jm.status()
2019-03-20 03:14:07 +00:00
}
2019-04-18 09:25:37 +00:00
func (allocator *AllocatorFIFO) stop(jobName string) MsgStop {
jm, ok := allocator.jobs[jobName]
if !ok {
return MsgStop{Code: 1, Error: "Job not exist!"}
}
return jm.stop()
}
2019-03-25 07:36:30 +00:00
func (allocator *AllocatorFIFO) logs(jobName string, taskName string) MsgLog {
jm, ok := allocator.jobs[jobName]
if !ok {
return MsgLog{Code: 1, Error: "Job not exist!"}
2019-03-20 03:14:07 +00:00
}
2019-03-25 07:36:30 +00:00
return jm.logs(taskName)
}
2019-03-20 03:14:07 +00:00
2019-03-25 07:36:30 +00:00
func (allocator *AllocatorFIFO) listJobs() MsgJobList {
return MsgJobList{Code: 0, Jobs: allocator.history}
2019-03-20 03:14:07 +00:00
}
2019-04-12 09:21:09 +00:00
func (allocator *AllocatorFIFO) summary() MsgSummary {
summary := MsgSummary{}
summary.Code = 0
finishedJobsCounter := 0
runningJobsCounter := 0
pendingJobsCounter := 0
for _, job := range allocator.history {
switch job.Status {
case Created:
pendingJobsCounter++
case Starting:
pendingJobsCounter++
break
case Running:
runningJobsCounter++
break;
case Finished:
finishedJobsCounter++
case Stopped:
finishedJobsCounter++
}
}
summary.JobsFinished = finishedJobsCounter
summary.JobsPending = pendingJobsCounter
summary.JobsRunning = runningJobsCounter
FreeGPU := 0
UsingGPU := 0
for _, node := range pool.nodes {
2019-04-16 08:59:19 +00:00
for j := range node.Status {
if node.Status[j].MemoryAllocated == 0 {
2019-04-12 09:21:09 +00:00
FreeGPU++
} else {
UsingGPU++
}
}
}
summary.FreeGPU = FreeGPU
summary.UsingGPU = UsingGPU
return summary
}