mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-12-15 08:16:43 +00:00
update
This commit is contained in:
@@ -188,7 +188,6 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
|||||||
var candidates []NodeStatus
|
var candidates []NodeStatus
|
||||||
/* first round, find vacant gpu */
|
/* first round, find vacant gpu */
|
||||||
for i := 0; i < pool.poolsCount; i++ {
|
for i := 0; i < pool.poolsCount; i++ {
|
||||||
log.Info("lock,", (i+poolID)%pool.poolsCount)
|
|
||||||
pool.poolsMu[(i+poolID)%pool.poolsCount].Lock()
|
pool.poolsMu[(i+poolID)%pool.poolsCount].Lock()
|
||||||
locks[(i+poolID)%pool.poolsCount] = pool.poolsMu[(i+poolID)%pool.poolsCount]
|
locks[(i+poolID)%pool.poolsCount] = pool.poolsMu[(i+poolID)%pool.poolsCount]
|
||||||
for _, node := range pool.pools[(i+poolID)%pool.poolsCount] {
|
for _, node := range pool.pools[(i+poolID)%pool.poolsCount] {
|
||||||
@@ -199,13 +198,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(available) >= task.NumberGPU {
|
if len(available) >= task.NumberGPU {
|
||||||
tmp := NodeStatus{}
|
candidates = append(candidates, node)
|
||||||
tmp.ClientID = node.ClientID
|
|
||||||
tmp.ClientHost = node.ClientHost
|
|
||||||
tmp.Status = available
|
|
||||||
tmp.NumCPU = node.NumCPU
|
|
||||||
tmp.MemTotal = node.MemAvailable
|
|
||||||
candidates = append(candidates, tmp)
|
|
||||||
if len(candidates) >= 8 {
|
if len(candidates) >= 8 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -223,10 +216,10 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
|||||||
log.Info("dasdsa")
|
log.Info("dasdsa")
|
||||||
if util, valid := InstanceOfOptimizer().predictUtilGPU(job.Name); valid {
|
if util, valid := InstanceOfOptimizer().predictUtilGPU(job.Name); valid {
|
||||||
|
|
||||||
for i := poolID; i < pool.poolsCount; i++ {
|
for i := 0; i < pool.poolsCount; i++ {
|
||||||
if _, err := locks[i]; err {
|
if _, err := locks[(i+poolID)%pool.poolsCount]; err {
|
||||||
pool.poolsMu[i].Lock()
|
pool.poolsMu[(i+poolID)%pool.poolsCount].Lock()
|
||||||
locks[i] = pool.poolsMu[i]
|
locks[(i+poolID)%pool.poolsCount] = pool.poolsMu[(i+poolID)%pool.poolsCount]
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, node := range pool.pools[i] {
|
for _, node := range pool.pools[i] {
|
||||||
@@ -248,13 +241,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(available) >= task.NumberGPU {
|
if len(available) >= task.NumberGPU {
|
||||||
tmp := NodeStatus{}
|
candidates = append(candidates, node)
|
||||||
tmp.ClientID = node.ClientID
|
|
||||||
tmp.ClientHost = node.ClientHost
|
|
||||||
tmp.Status = available
|
|
||||||
tmp.NumCPU = node.NumCPU
|
|
||||||
tmp.MemTotal = node.MemAvailable
|
|
||||||
candidates = append(candidates, tmp)
|
|
||||||
if len(candidates) >= 8 {
|
if len(candidates) >= 8 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@@ -287,9 +274,7 @@ func (scheduler *SchedulerFair) AcquireResource(job Job, task Task) NodeStatus {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, _ := range locks {
|
for i := range locks {
|
||||||
log.Info("unlock ", i)
|
|
||||||
//lock.Unlock()
|
|
||||||
pool.poolsMu[i].Unlock()
|
pool.poolsMu[i].Unlock()
|
||||||
}
|
}
|
||||||
go func(res NodeStatus) {
|
go func(res NodeStatus) {
|
||||||
|
|||||||
Reference in New Issue
Block a user