mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 14:21:55 +00:00
update
This commit is contained in:
parent
0503752a51
commit
6eca76eed7
10
README.md
10
README.md
@ -90,4 +90,14 @@ GPU is occupied by which job(s)
|
||||
**DescribeJob**
|
||||
```
|
||||
?action=debug_optimizer_describe_job&job=
|
||||
```
|
||||
|
||||
**EnableBatchAllocation**
|
||||
```
|
||||
?action=pool_enable_batch
|
||||
```
|
||||
|
||||
**DisableBatchAllocation**
|
||||
```
|
||||
?action=pool_disable_batch
|
||||
```
|
14
src/main.go
14
src/main.go
@ -295,6 +295,20 @@ func serverAPI(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write(js)
|
||||
break
|
||||
|
||||
case "pool_enable_batch":
|
||||
log.Debug("pool_enable_batch")
|
||||
js, _ := json.Marshal(InstanceOfResourcePool().EnableBatch())
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(js)
|
||||
break
|
||||
|
||||
case "pool_disable_batch":
|
||||
log.Debug("pool_disable_batch")
|
||||
js, _ := json.Marshal(InstanceOfResourcePool().DisableBatch())
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(js)
|
||||
break
|
||||
|
||||
default:
|
||||
http.Error(w, "Not Found", http.StatusNotFound)
|
||||
break
|
||||
|
@ -124,28 +124,36 @@ func (pool *ResourcePool) init(conf Configuration) {
|
||||
}()
|
||||
|
||||
go func() {
|
||||
time.Sleep(time.Second * 10)
|
||||
pool.batchMu.Lock()
|
||||
var tasks []Task
|
||||
for _, job := range pool.batchJobs {
|
||||
for _, task := range job.Tasks {
|
||||
task.Job = job.Name
|
||||
tasks = append(tasks, task)
|
||||
/* batch allocation */
|
||||
for {
|
||||
time.Sleep(time.Second * 15)
|
||||
if !pool.enableBatch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
job := Job{Tasks: tasks}
|
||||
pool.batchMu.Lock()
|
||||
var tasks []Task
|
||||
for _, job := range pool.batchJobs {
|
||||
for _, task := range job.Tasks {
|
||||
task.Job = job.Name
|
||||
tasks = append(tasks, task)
|
||||
}
|
||||
}
|
||||
if len(tasks) != 0 {
|
||||
job := Job{Tasks: tasks}
|
||||
|
||||
nodes := pool.doAcquireResource(job)
|
||||
for i, task := range job.Tasks {
|
||||
if _, ok := pool.batchAllocations[task.Job]; !ok {
|
||||
pool.batchAllocations[task.Job] = []NodeStatus{}
|
||||
nodes := pool.doAcquireResource(job)
|
||||
for i, task := range job.Tasks {
|
||||
if _, ok := pool.batchAllocations[task.Job]; !ok {
|
||||
pool.batchAllocations[task.Job] = []NodeStatus{}
|
||||
}
|
||||
pool.batchAllocations[task.Job] = append(pool.batchAllocations[task.Job], nodes[i])
|
||||
}
|
||||
if len(nodes) > 0 {
|
||||
pool.batchJobs = []Job{}
|
||||
}
|
||||
}
|
||||
pool.batchAllocations[task.Job] = append(pool.batchAllocations[task.Job], nodes[i])
|
||||
pool.batchMu.Unlock()
|
||||
}
|
||||
if len(nodes) > 0 {
|
||||
pool.batchJobs = []Job{}
|
||||
}
|
||||
pool.batchMu.Unlock()
|
||||
}()
|
||||
}
|
||||
|
||||
@ -885,9 +893,9 @@ func (pool *ResourcePool) doAcquireResource(job Job) []NodeStatus {
|
||||
}
|
||||
}
|
||||
|
||||
log.Info(tasks, factor)
|
||||
//log.Info(tasks, factor)
|
||||
allocation := InstanceOfAllocator().allocate(nodesT, tasks)
|
||||
log.Info(allocation)
|
||||
//log.Info(allocation)
|
||||
if allocation.Flags["valid"] {
|
||||
|
||||
for range job.Tasks { //append would cause uncertain order
|
||||
@ -991,6 +999,18 @@ func (pool *ResourcePool) releaseResource(job Job, agent NodeStatus) {
|
||||
}
|
||||
}
|
||||
|
||||
func (pool *ResourcePool) EnableBatch() bool {
|
||||
pool.enableBatch = true
|
||||
log.Info("enableBatch is set to true")
|
||||
return true
|
||||
}
|
||||
|
||||
func (pool *ResourcePool) DisableBatch() bool {
|
||||
pool.enableBatch = false
|
||||
log.Info("enableBatch is set to false")
|
||||
return true
|
||||
}
|
||||
|
||||
func (pool *ResourcePool) SetShareRatio(ratio float64) bool {
|
||||
pool.enableShareRatio = ratio
|
||||
log.Info("enableShareRatio is updated to ", ratio)
|
||||
|
Loading…
Reference in New Issue
Block a user