mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-12-13 07:46:43 +00:00
update
This commit is contained in:
@@ -27,27 +27,53 @@ func (jm *JobManager) start() {
|
|||||||
InstanceJobHistoryLogger().submitJob(jm.job)
|
InstanceJobHistoryLogger().submitJob(jm.job)
|
||||||
|
|
||||||
/* request for resources */
|
/* request for resources */
|
||||||
for range jm.job.Tasks {
|
for range jm.job.Tasks { //append would cause uncertain order
|
||||||
jm.resources = append(jm.resources, NodeStatus{})
|
jm.resources = append(jm.resources, NodeStatus{ClientID: "null"})
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range jm.job.Tasks {
|
start := time.Now().Unix()
|
||||||
|
for i := 0; i < len(jm.job.Tasks); i++ {
|
||||||
var resource NodeStatus
|
var resource NodeStatus
|
||||||
for {
|
for {
|
||||||
if jm.killedFlag {
|
if jm.killedFlag {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
resource = jm.scheduler.AcquireResource(jm.job, jm.job.Tasks[i], jm.resources)
|
|
||||||
|
var tmp []NodeStatus
|
||||||
|
for _, t := range jm.resources {
|
||||||
|
if t.ClientID != "null" {
|
||||||
|
tmp = append(tmp, t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resource = jm.scheduler.AcquireResource(jm.job, jm.job.Tasks[i], tmp)
|
||||||
if len(resource.Status) > 0 {
|
if len(resource.Status) > 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if time.Now().Unix()-start > 30 {
|
||||||
|
log.Info("Wait too long, return all resource and retry")
|
||||||
|
for _, tt := range jm.resources {
|
||||||
|
if tt.ClientID != "null" {
|
||||||
|
jm.scheduler.ReleaseResource(jm.job, tt)
|
||||||
|
log.Info("return resource ", tt.ClientID)
|
||||||
|
jm.resources[i].ClientID = "null"
|
||||||
|
for _, t := range tt.Status {
|
||||||
|
jm.scheduler.Detach(t.UUID, jm.job.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i = -1
|
||||||
|
start = time.Now().Unix()
|
||||||
|
}
|
||||||
time.Sleep(time.Second * 1)
|
time.Sleep(time.Second * 1)
|
||||||
}
|
}
|
||||||
log.Info("Receive resource", resource)
|
if len(resource.Status) > 0 {
|
||||||
jm.resources[i] = resource
|
log.Info("Receive resource", resource)
|
||||||
|
jm.resources[i] = resource
|
||||||
|
|
||||||
for _, t := range resource.Status {
|
for _, t := range resource.Status {
|
||||||
jm.scheduler.Attach(t.UUID, jm.job.Name)
|
jm.scheduler.Attach(t.UUID, jm.job.Name)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ type Optimizer struct {
|
|||||||
|
|
||||||
jobUtilsGPU map[string]*OptimizerUtilGPU
|
jobUtilsGPU map[string]*OptimizerUtilGPU
|
||||||
|
|
||||||
heartbeatInterval int
|
cache map[string]*OptimizerJobExecutionTime
|
||||||
}
|
}
|
||||||
|
|
||||||
var optimizerInstance *Optimizer
|
var optimizerInstance *Optimizer
|
||||||
@@ -28,7 +28,7 @@ func InstanceOfOptimizer() *Optimizer {
|
|||||||
optimizerInstance = &Optimizer{}
|
optimizerInstance = &Optimizer{}
|
||||||
optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{}
|
optimizerInstance.predicts = map[string]*OptimizerJobExecutionTime{}
|
||||||
optimizerInstance.jobUtilsGPU = map[string]*OptimizerUtilGPU{}
|
optimizerInstance.jobUtilsGPU = map[string]*OptimizerUtilGPU{}
|
||||||
optimizerInstance.heartbeatInterval = 3
|
optimizerInstance.cache = map[string]*OptimizerJobExecutionTime{}
|
||||||
}
|
}
|
||||||
return optimizerInstance
|
return optimizerInstance
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user