1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-12-16 00:26:43 +00:00
This commit is contained in:
2020-05-24 21:07:02 +08:00
parent 73600d3595
commit 0705c0630d
13 changed files with 486 additions and 843 deletions

View File

@@ -4,7 +4,6 @@ import (
"sync"
"time"
log "github.com/sirupsen/logrus"
"math/rand"
)
type SchedulerFCFS struct {
@@ -87,64 +86,13 @@ func (scheduler *SchedulerFCFS) Schedule(job Job) {
job.Status = Created
}
func (scheduler *SchedulerFCFS) AcquireResource(job Job, task Task, nodes []NodeStatus) NodeStatus {
segID := rand.Intn(pool.poolsCount)
seg := &pool.pools[segID]
if seg.Nodes == nil {
seg = seg.Next
}
res := NodeStatus{}
for id, node := range seg.Nodes {
var available []GPUStatus
for _, status := range node.Status {
if status.MemoryTotal-status.MemoryAllocated >= task.MemoryGPU {
available = append(available, status)
}
}
if len(available) >= task.NumberGPU {
res.ClientID = id
res.ClientHost = node.ClientHost
res.Status = available[0:task.NumberGPU]
res.NumCPU = task.NumberCPU
res.MemTotal = task.Memory
for i := range res.Status {
for j := range node.Status {
if res.Status[i].UUID == node.Status[j].UUID {
node.Status[j].MemoryAllocated += task.MemoryGPU
res.Status[i].MemoryTotal = task.MemoryGPU
}
}
}
break
}
}
func (scheduler *SchedulerFCFS) AcquireResource(job Job) []NodeStatus {
res := InstanceOfResourcePool().acquireResource(job)
return res
}
func (scheduler *SchedulerFCFS) ReleaseResource(job Job, agent NodeStatus) {
segID := pool.getNodePool(agent.ClientID)
seg := &pool.pools[segID]
if seg.Nodes == nil {
seg = seg.Next
}
seg.Lock.Lock()
defer seg.Lock.Unlock()
node := seg.Nodes[agent.ClientID]
for _, gpu := range agent.Status {
for j := range node.Status {
if gpu.UUID == node.Status[j].UUID {
node.Status[j].MemoryAllocated -= gpu.MemoryTotal
if node.Status[j].MemoryAllocated < 0 {
// in case of error
log.Warn(node.ClientID, "More Memory Allocated")
node.Status[j].MemoryAllocated = 0
}
}
}
}
InstanceOfResourcePool().releaseResource(job, agent)
}
func (scheduler *SchedulerFCFS) QueryState(jobName string) MsgJobStatus {
@@ -203,7 +151,7 @@ func (scheduler *SchedulerFCFS) Summary() MsgSummary {
break
case Running:
runningJobsCounter++
break;
break
case Finished:
finishedJobsCounter++
case Stopped:
@@ -214,49 +162,11 @@ func (scheduler *SchedulerFCFS) Summary() MsgSummary {
summary.JobsPending = pendingJobsCounter
summary.JobsRunning = runningJobsCounter
FreeGPU := 0
UsingGPU := 0
start := pool.pools[0].Next
for cur := start; ; {
cur.Lock.Lock()
for _, node := range cur.Nodes {
for j := range node.Status {
if node.Status[j].MemoryAllocated == 0 {
FreeGPU++
} else {
UsingGPU++
}
}
}
cur.Lock.Unlock()
cur = cur.Next
if cur == start {
break
}
}
summary.FreeGPU = FreeGPU
summary.UsingGPU = UsingGPU
summary.FreeGPU, summary.UsingGPU = InstanceOfResourcePool().countGPU()
return summary
}
func (scheduler *SchedulerFCFS) AcquireNetwork() string {
return pool.acquireNetwork()
}
func (scheduler *SchedulerFCFS) ReleaseNetwork(network string) {
pool.releaseNetwork(network)
}
func (scheduler *SchedulerFCFS) Attach(GPU string, job string) {
pool.attach(GPU, job)
}
func (scheduler *SchedulerFCFS) Detach(GPU string, job Job) {
pool.detach(GPU, job)
}
func (scheduler *SchedulerFCFS) Enable() bool {
scheduler.enabled = true
return true
@@ -273,18 +183,6 @@ func (scheduler *SchedulerFCFS) UpdateParallelism(parallelism int) bool {
return true
}
func (scheduler *SchedulerFCFS) SetShareRatio(ratio float64) bool {
//scheduler.enableShareRatio = ratio
log.Info("enableShareRatio is updated to", ratio)
return true
}
func (scheduler *SchedulerFCFS) SetPreScheduleRatio(ratio float64) bool {
//scheduler.enablePreScheduleRatio = ratio
log.Info("enablePreScheduleRatio is updated to", ratio)
return true
}
func (scheduler *SchedulerFCFS) updateGroup(group Group) bool {
return true
}