diff --git a/src/job_manager.go b/src/job_manager.go index f006ff1..a1d1226 100644 --- a/src/job_manager.go +++ b/src/job_manager.go @@ -19,7 +19,7 @@ type JobManager struct { } func (jm *JobManager) start() { - log.Info("start job ", jm.job.Name, time.Now()) + log.Debug("start job ", jm.job.Name, time.Now()) jm.jobStatus = JobStatus{Name: jm.job.Name, tasks: map[string]TaskStatus{}} network := jm.scheduler.AcquireNetwork() @@ -39,7 +39,7 @@ func (jm *JobManager) start() { } time.Sleep(time.Second * 1) } - log.Info("Receive resource", resource) + log.Debug("Receive resource", resource) jm.resources = append(jm.resources, resource) for _, t := range resource.Status { @@ -49,7 +49,7 @@ func (jm *JobManager) start() { } jm.scheduler.UpdateProgress(jm.job.Name, Running) - log.Info("ready to run job ", jm.job.Name, time.Now()) + log.Debug("ready to run job ", jm.job.Name, time.Now()) /* bring up containers */ for i := range jm.job.Tasks { @@ -123,7 +123,7 @@ func (jm *JobManager) start() { /* return resource */ jm.scheduler.ReleaseResource(jm.job, jm.resources[i]) - log.Info("return resource ", jm.resources[i].ClientID) + log.Debug("return resource ", jm.resources[i].ClientID) for _, t := range jm.resources[i].Status { jm.scheduler.Detach(t.UUID, jm.job.Name) diff --git a/src/resource_pool.go b/src/resource_pool.go index f2b769e..6ead8fa 100644 --- a/src/resource_pool.go +++ b/src/resource_pool.go @@ -174,7 +174,7 @@ func (pool *ResourcePool) acquireNetwork() string { pool.networkMu.Lock() defer pool.networkMu.Unlock() var network string - log.Info(pool.networksFree) + log.Debug(pool.networksFree) if len(pool.networksFree) == 0 { for { for { diff --git a/src/scheduler_fair.go b/src/scheduler_fair.go index c54d3ad..903d421 100644 --- a/src/scheduler_fair.go +++ b/src/scheduler_fair.go @@ -71,6 +71,7 @@ func (scheduler *SchedulerFair) Start() { jm.start() }() } else { + log.Info("No more jobs to schedule", time.Now()) scheduler.scheduling.Unlock() go func() { scheduler.UpdateNextQueue() @@ -206,14 +207,15 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus { func (scheduler *SchedulerFair) ReleaseResource(job Job, agent NodeStatus) { pool.mu.Lock() defer pool.mu.Unlock() - nodes := pool.nodes[agent.ClientID] + node := pool.nodes[agent.ClientID] for _, gpu := range agent.Status { - for j := range nodes.Status { - if gpu.UUID == nodes.Status[j].UUID { - nodes.Status[j].MemoryAllocated -= gpu.MemoryTotal - if nodes.Status[j].MemoryAllocated < 0 { + for j := range node.Status { + if gpu.UUID == node.Status[j].UUID { + node.Status[j].MemoryAllocated -= gpu.MemoryTotal + if node.Status[j].MemoryAllocated < 0 { // in case of error - nodes.Status[j].MemoryAllocated = 0 + log.Warn(node.ClientID, "More Memory Allocated") + node.Status[j].MemoryAllocated = 0 } } }