1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-12-13 07:46:43 +00:00
This commit is contained in:
2020-05-01 14:54:29 +08:00
parent 61415a612e
commit d726c0b850
2 changed files with 36 additions and 10 deletions

View File

@@ -27,28 +27,54 @@ func (jm *JobManager) start() {
InstanceJobHistoryLogger().submitJob(jm.job)
/* request for resources */
for range jm.job.Tasks {
jm.resources = append(jm.resources, NodeStatus{})
for range jm.job.Tasks { //append would cause uncertain order
jm.resources = append(jm.resources, NodeStatus{ClientID: "null"})
}
for i := range jm.job.Tasks {
start := time.Now().Unix()
for i := 0; i < len(jm.job.Tasks); i++ {
var resource NodeStatus
for {
if jm.killedFlag {
break
}
resource = jm.scheduler.AcquireResource(jm.job, jm.job.Tasks[i], jm.resources)
var tmp []NodeStatus
for _, t := range jm.resources {
if t.ClientID != "null" {
tmp = append(tmp, t)
}
}
resource = jm.scheduler.AcquireResource(jm.job, jm.job.Tasks[i], tmp)
if len(resource.Status) > 0 {
break
}
if time.Now().Unix()-start > 30 {
log.Info("Wait too long, return all resource and retry")
for _, tt := range jm.resources {
if tt.ClientID != "null" {
jm.scheduler.ReleaseResource(jm.job, tt)
log.Info("return resource ", tt.ClientID)
jm.resources[i].ClientID = "null"
for _, t := range tt.Status {
jm.scheduler.Detach(t.UUID, jm.job.Name)
}
}
}
i = -1
start = time.Now().Unix()
}
time.Sleep(time.Second * 1)
}
if len(resource.Status) > 0 {
log.Info("Receive resource", resource)
jm.resources[i] = resource
for _, t := range resource.Status {
jm.scheduler.Attach(t.UUID, jm.job.Name)
}
}
}
jm.scheduler.UpdateProgress(jm.job.Name, Running)

View File

@@ -14,7 +14,7 @@ type Optimizer struct {
jobUtilsGPU map[string]*OptimizerUtilGPU
heartbeatInterval int
cache map[string]*OptimizerJobExecutionTime
}
var optimizerInstance *Optimizer
@@ -28,7 +28,7 @@ func InstanceOfOptimizer() *Optimizer {
optimizerInstance = &Optimizer{}
optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{}
optimizerInstance.jobUtilsGPU = map[string]*OptimizerUtilGPU{}
optimizerInstance.heartbeatInterval = 3
optimizerInstance.cache = map[string]*OptimizerJobExecutionTime{}
}
return optimizerInstance
}