mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-12-13 07:46:43 +00:00
minify logs
This commit is contained in:
@@ -19,7 +19,7 @@ type JobManager struct {
|
||||
}
|
||||
|
||||
func (jm *JobManager) start() {
|
||||
log.Info("start job ", jm.job.Name, time.Now())
|
||||
log.Debug("start job ", jm.job.Name, time.Now())
|
||||
jm.jobStatus = JobStatus{Name: jm.job.Name, tasks: map[string]TaskStatus{}}
|
||||
|
||||
network := jm.scheduler.AcquireNetwork()
|
||||
@@ -39,7 +39,7 @@ func (jm *JobManager) start() {
|
||||
}
|
||||
time.Sleep(time.Second * 1)
|
||||
}
|
||||
log.Info("Receive resource", resource)
|
||||
log.Debug("Receive resource", resource)
|
||||
jm.resources = append(jm.resources, resource)
|
||||
|
||||
for _, t := range resource.Status {
|
||||
@@ -49,7 +49,7 @@ func (jm *JobManager) start() {
|
||||
}
|
||||
jm.scheduler.UpdateProgress(jm.job.Name, Running)
|
||||
|
||||
log.Info("ready to run job ", jm.job.Name, time.Now())
|
||||
log.Debug("ready to run job ", jm.job.Name, time.Now())
|
||||
|
||||
/* bring up containers */
|
||||
for i := range jm.job.Tasks {
|
||||
@@ -123,7 +123,7 @@ func (jm *JobManager) start() {
|
||||
|
||||
/* return resource */
|
||||
jm.scheduler.ReleaseResource(jm.job, jm.resources[i])
|
||||
log.Info("return resource ", jm.resources[i].ClientID)
|
||||
log.Debug("return resource ", jm.resources[i].ClientID)
|
||||
|
||||
for _, t := range jm.resources[i].Status {
|
||||
jm.scheduler.Detach(t.UUID, jm.job.Name)
|
||||
|
||||
@@ -174,7 +174,7 @@ func (pool *ResourcePool) acquireNetwork() string {
|
||||
pool.networkMu.Lock()
|
||||
defer pool.networkMu.Unlock()
|
||||
var network string
|
||||
log.Info(pool.networksFree)
|
||||
log.Debug(pool.networksFree)
|
||||
if len(pool.networksFree) == 0 {
|
||||
for {
|
||||
for {
|
||||
|
||||
@@ -71,6 +71,7 @@ func (scheduler *SchedulerFair) Start() {
|
||||
jm.start()
|
||||
}()
|
||||
} else {
|
||||
log.Info("No more jobs to schedule", time.Now())
|
||||
scheduler.scheduling.Unlock()
|
||||
go func() {
|
||||
scheduler.UpdateNextQueue()
|
||||
@@ -206,14 +207,15 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
||||
func (scheduler *SchedulerFair) ReleaseResource(job Job, agent NodeStatus) {
|
||||
pool.mu.Lock()
|
||||
defer pool.mu.Unlock()
|
||||
nodes := pool.nodes[agent.ClientID]
|
||||
node := pool.nodes[agent.ClientID]
|
||||
for _, gpu := range agent.Status {
|
||||
for j := range nodes.Status {
|
||||
if gpu.UUID == nodes.Status[j].UUID {
|
||||
nodes.Status[j].MemoryAllocated -= gpu.MemoryTotal
|
||||
if nodes.Status[j].MemoryAllocated < 0 {
|
||||
for j := range node.Status {
|
||||
if gpu.UUID == node.Status[j].UUID {
|
||||
node.Status[j].MemoryAllocated -= gpu.MemoryTotal
|
||||
if node.Status[j].MemoryAllocated < 0 {
|
||||
// in case of error
|
||||
nodes.Status[j].MemoryAllocated = 0
|
||||
log.Warn(node.ClientID, "More Memory Allocated")
|
||||
node.Status[j].MemoryAllocated = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user