diff --git a/src/ga.go b/src/ga.go index a83c179..7ba1a6e 100644 --- a/src/ga.go +++ b/src/ga.go @@ -583,7 +583,7 @@ func VectorFactory(rng *rand.Rand) eaopt.Genome { } func main3() { - numTask := 5 + numTask := 20 nodesMap = map[string]Node{} tasksMap = map[string]Task{} @@ -646,7 +646,7 @@ func main3() { ga.EarlyStop = func(ga *eaopt.GA) bool { gap := math.Abs(ga.HallOfFame[0].Fitness - bestFitness) if gap <= 0.000001 || ga.HallOfFame[0].Fitness >= bestFitness { - if count >= 50 || time.Since(ts) > time.Second*30 { + if count >= 30 || time.Since(ts) > time.Second*30 { fmt.Println("Early Stop") return true } else { diff --git a/src/job_manager.go b/src/job_manager.go index 77c03ea..b79ab86 100644 --- a/src/job_manager.go +++ b/src/job_manager.go @@ -161,7 +161,7 @@ func (jm *JobManager) start() { if !jm.isRunning { break } - time.Sleep(time.Second * 10) + time.Sleep(time.Second * 25) } } diff --git a/src/resource_pool.go b/src/resource_pool.go index b7935e6..b1631f1 100644 --- a/src/resource_pool.go +++ b/src/resource_pool.go @@ -62,11 +62,11 @@ func (pool *ResourcePool) start() { for i := 0; i < pool.poolsCount; i++ { pool.pools = append(pool.pools, PoolSeg{Lock: sync.Mutex{}, IsVirtual: true, ID: i}) } - /* make non-virtual seg */ + /* make non-virtual segs */ for i := 0; i < pool.poolsCount/3; i++ { pool.pools[rand.Intn(pool.poolsCount)].IsVirtual = false } - /* make working srg */ + /* generate working segs */ for i := 0; i < 10; i++ { pool.pools[rand.Intn(pool.poolsCount)].Nodes = map[string]*NodeStatus{} } diff --git a/src/scheduler_fair.go b/src/scheduler_fair.go index 358992d..36a4bbd 100644 --- a/src/scheduler_fair.go +++ b/src/scheduler_fair.go @@ -344,9 +344,10 @@ func (scheduler *SchedulerFair) Schedule(job Job) { } func (scheduler *SchedulerFair) AcquireResource(job Job, task Task, nodes []NodeStatus) NodeStatus { - scheduler.mu.Lock() - defer scheduler.mu.Unlock() segID := rand.Intn(pool.poolsCount) + if pool.TotalGPU < 100 { + segID = 0 + } res := NodeStatus{} start := &pool.pools[segID] if start.Nodes == nil { @@ -366,7 +367,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task, nodes []Node allocationType = 1 if util, valid := InstanceOfOptimizer().predictUtilGPU(job.Name); valid { - for cur := start; ; { + for cur := start; cur.ID < cur.Next.ID; { if _, ok := locks[cur.ID]; !ok { cur.Lock.Lock() locks[cur.ID] = &cur.Lock @@ -430,7 +431,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task, nodes []Node } if len(candidates) == 0 && flag { allocationType = 2 - for cur := start; ; { + for cur := start; cur.ID < cur.Next.ID; { if _, ok := locks[cur.ID]; !ok { cur.Lock.Lock() locks[cur.ID] = &cur.Lock @@ -471,7 +472,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task, nodes []Node if pool.TotalGPU != 0 && float64(scheduler.UsingGPU)/float64(pool.TotalGPU) >= scheduler.enablePreScheduleRatio && valid { allocationType = 3 - for cur := start; ; { + for cur := start; cur.ID < cur.Next.ID; { if _, ok := locks[cur.ID]; !ok { cur.Lock.Lock() locks[cur.ID] = &cur.Lock