diff --git a/src/job_manager.go b/src/job_manager.go index 743a2f1..476ee01 100644 --- a/src/job_manager.go +++ b/src/job_manager.go @@ -98,6 +98,7 @@ func (jm *JobManager) start() { for { res := jm.status() flag := false + onlyPS := true for i := range res.Status { if res.Status[i].Status == "ready" { log.Debug(jm.job.Name, "-", i, " is ready to run") @@ -105,10 +106,12 @@ func (jm *JobManager) start() { } else if res.Status[i].Status == "running" { log.Debug(jm.job.Name, "-", i, " is running") flag = true + if !jm.job.Tasks[i].IsPS { + onlyPS = false + } InstanceJobHistoryLogger().submitTaskStatus(jm.job.Name, res.Status[i]) } else { log.Info(jm.job.Name, "-", i, " ", res.Status[i].Status) - /* save logs etc. */ /* remove exited containers */ @@ -132,6 +135,10 @@ func (jm *JobManager) start() { InstanceJobHistoryLogger().submitTaskStatus(jm.job.Name, res.Status[i]) } } + if onlyPS { + jm.stop() + break + } if !flag { break } diff --git a/src/util.go b/src/util.go index 871d850..2b1e32e 100644 --- a/src/util.go +++ b/src/util.go @@ -141,6 +141,8 @@ type Task struct { Memory int `json:"memory"` NumberGPU int `json:"gpu_number"` MemoryGPU int `json:"gpu_memory"` + IsPS bool `json:"is_ps"` + ModelGPU bool `json:"gpu_model"` } type Group struct {