mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-06 22:01:55 +00:00
update
This commit is contained in:
parent
b979373cbd
commit
4a2bf436c7
10
README.md
10
README.md
@ -110,4 +110,14 @@ GPU is occupied by which job(s)
|
||||
**PoolDump**
|
||||
```
|
||||
?action=debug_pool_dump
|
||||
```
|
||||
|
||||
**EnableMock**
|
||||
```
|
||||
?action=debug_enable_mock
|
||||
```
|
||||
|
||||
**DisableMock**
|
||||
```
|
||||
?action=debug_disable_mock
|
||||
```
|
43
src/configuration.go
Normal file
43
src/configuration.go
Normal file
@ -0,0 +1,43 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"sync"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type Configuration struct {
|
||||
KafkaBrokers []string `json:"kafkaBrokers"`
|
||||
KafkaTopic string `json:"kafkaTopic"`
|
||||
SchedulerPolicy string `json:"schedulerPolicy"`
|
||||
mock bool
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
var ConfigurationInstance *Configuration
|
||||
var ConfigurationInstanceLock sync.Mutex
|
||||
|
||||
func InstanceOfConfiguration() *Configuration {
|
||||
ConfigurationInstanceLock.Lock()
|
||||
defer ConfigurationInstanceLock.Unlock()
|
||||
|
||||
if ConfigurationInstance == nil {
|
||||
ConfigurationInstance = &Configuration{mock: false}
|
||||
}
|
||||
return ConfigurationInstance
|
||||
}
|
||||
|
||||
func (config *Configuration) EnableMock() bool {
|
||||
config.mu.Lock()
|
||||
defer config.mu.Unlock()
|
||||
config.mock = true
|
||||
log.Info("configuration.mock = true")
|
||||
return true
|
||||
}
|
||||
|
||||
func (config *Configuration) DisableMock() bool {
|
||||
config.mu.Lock()
|
||||
defer config.mu.Unlock()
|
||||
config.mock = false
|
||||
log.Info("configuration.mock = false")
|
||||
return true
|
||||
}
|
@ -52,6 +52,16 @@ func (jm *JobManager) start() {
|
||||
time.Sleep(time.Millisecond * time.Duration(500+rand.Intn(500)))
|
||||
}
|
||||
|
||||
if InstanceOfConfiguration().mock {
|
||||
jm.isRunning = false
|
||||
duration := InstanceOfMocker().GetDuration(jm.job, jm.resources)
|
||||
log.Info("mock ", jm.job.Name, ", wait ", duration)
|
||||
time.Sleep(time.Second * time.Duration(duration))
|
||||
jm.returnResource([]TaskStatus{})
|
||||
log.Info("JobMaster exited ", jm.job.Name)
|
||||
return
|
||||
}
|
||||
|
||||
if !jm.killFlag {
|
||||
/* switch to Running state */
|
||||
jm.scheduler.UpdateProgress(jm.job, Running)
|
||||
@ -147,7 +157,9 @@ func (jm *JobManager) returnResource(status []TaskStatus) {
|
||||
InstanceOfResourcePool().detach(t.UUID, jm.job)
|
||||
}
|
||||
|
||||
InstanceJobHistoryLogger().submitTaskStatus(jm.job.Name, status[i])
|
||||
if !InstanceOfConfiguration().mock {
|
||||
InstanceJobHistoryLogger().submitTaskStatus(jm.job.Name, status[i])
|
||||
}
|
||||
|
||||
/* remove exited containers */
|
||||
//v := url.Values{}
|
||||
|
14
src/main.go
14
src/main.go
@ -327,6 +327,20 @@ func serverAPI(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write(js)
|
||||
break
|
||||
|
||||
case "debug_enable_mock":
|
||||
log.Debug("debug_enable_mock")
|
||||
js, _ := json.Marshal(InstanceOfConfiguration().EnableMock())
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(js)
|
||||
break
|
||||
|
||||
case "debug_disable_mock":
|
||||
log.Debug("debug_disable_mock")
|
||||
js, _ := json.Marshal(InstanceOfConfiguration().DisableMock())
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(js)
|
||||
break
|
||||
|
||||
default:
|
||||
http.Error(w, "Not Found", http.StatusNotFound)
|
||||
break
|
||||
|
101
src/mocker.go
Normal file
101
src/mocker.go
Normal file
@ -0,0 +1,101 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Mocker struct {
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
var MockerInstance *Mocker
|
||||
var MockerInstanceLock sync.Mutex
|
||||
|
||||
func InstanceOfMocker() *Mocker {
|
||||
MockerInstanceLock.Lock()
|
||||
defer MockerInstanceLock.Unlock()
|
||||
|
||||
if MockerInstance == nil {
|
||||
MockerInstance = &Mocker{}
|
||||
}
|
||||
return MockerInstance
|
||||
}
|
||||
|
||||
func (mocker *Mocker) GetDuration(job Job, nodes []NodeStatus) int {
|
||||
str := strings.Split(job.Name, "-")
|
||||
duration := 300
|
||||
|
||||
mode := "unknown"
|
||||
if len(job.Tasks) == 1 {
|
||||
if job.Tasks[0].NumberGPU == 1 {
|
||||
mode = "s1"
|
||||
} else if job.Tasks[0].NumberGPU == 2 {
|
||||
mode = "s2"
|
||||
}
|
||||
} else if len(job.Tasks) == 3 {
|
||||
var psNodes []string
|
||||
var workerNodes []string
|
||||
for i, task := range job.Tasks {
|
||||
if task.IsPS {
|
||||
psNodes = append(psNodes, nodes[i].ClientHost)
|
||||
} else {
|
||||
workerNodes = append(workerNodes, nodes[i].ClientHost)
|
||||
}
|
||||
}
|
||||
if psNodes[0] == workerNodes[0] {
|
||||
if psNodes[0] == workerNodes[1] {
|
||||
mode = "pww"
|
||||
} else {
|
||||
mode = "pw:w"
|
||||
}
|
||||
} else {
|
||||
if psNodes[0] == workerNodes[1] {
|
||||
mode = "pw:w"
|
||||
} else if workerNodes[0] == workerNodes[1] {
|
||||
mode = "p:ww"
|
||||
} else {
|
||||
mode = "p:w:w"
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if len(str) == 2 {
|
||||
jobName := str[0]
|
||||
|
||||
durations := map[string]map[string]int{
|
||||
"vgg16": {
|
||||
"s1": 220,
|
||||
"s2": 227,
|
||||
"pww": 510,
|
||||
"pw:w": 767,
|
||||
"p:ww": 1190,
|
||||
"p:w:w": 810,
|
||||
},
|
||||
"resnet50": {
|
||||
"s1": 146,
|
||||
"s2": 164,
|
||||
"pww": 203,
|
||||
"pw:w": 204,
|
||||
"p:ww": 255,
|
||||
"p:w:w": 210,
|
||||
},
|
||||
"inception3": {
|
||||
"s1": 253,
|
||||
"s2": 257,
|
||||
"pww": 289,
|
||||
"pw:w": 295,
|
||||
"p:ww": 310,
|
||||
"p:w:w": 290,
|
||||
},
|
||||
}
|
||||
|
||||
if vals, ok := durations[jobName]; ok {
|
||||
if val, ok2 := vals[mode]; ok2 {
|
||||
return val
|
||||
}
|
||||
}
|
||||
}
|
||||
return duration
|
||||
}
|
@ -8,12 +8,6 @@ import (
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type Configuration struct {
|
||||
KafkaBrokers []string `json:"kafkaBrokers"`
|
||||
KafkaTopic string `json:"kafkaTopic"`
|
||||
SchedulerPolicy string `json:"schedulerPolicy"`
|
||||
}
|
||||
|
||||
type Job struct {
|
||||
ID int `json:"id"`
|
||||
Name string `json:"name"`
|
||||
|
Loading…
Reference in New Issue
Block a user