mirror of
https://github.com/newnius/YAO-scheduler.git
synced 2025-06-07 06:11:56 +00:00
update
This commit is contained in:
parent
a66e882e08
commit
cc6f358699
@ -1,9 +0,0 @@
|
|||||||
{
|
|
||||||
"kafkaBrokers": [
|
|
||||||
"kafka-node1:9092",
|
|
||||||
"kafka-node2:9092",
|
|
||||||
"kafka-node3:9092"
|
|
||||||
],
|
|
||||||
"kafkaTopic": "yao",
|
|
||||||
"schedulerPolicy": "fair"
|
|
||||||
}
|
|
@ -3,27 +3,77 @@ package main
|
|||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Configuration struct {
|
type Configuration struct {
|
||||||
KafkaBrokers []string `json:"kafkaBrokers"`
|
KafkaBrokers []string `json:"KafkaBrokers"`
|
||||||
KafkaTopic string `json:"kafkaTopic"`
|
KafkaTopic string `json:"KafkaTopic"`
|
||||||
SchedulerPolicy string `json:"schedulerPolicy"`
|
SchedulerPolicy string `json:"SchedulerPolicy"`
|
||||||
|
ListenAddr string `json:"ListenAddr"`
|
||||||
|
HDFSAddress string `json:"HDFSAddress"`
|
||||||
|
HDFSBaseDir string `json:"HDFSBaseDir"`
|
||||||
|
DFSBaseDir string `json:"DFSBaseDir"`
|
||||||
mock bool
|
mock bool
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
var ConfigurationInstance *Configuration
|
var configurationInstance *Configuration
|
||||||
var ConfigurationInstanceLock sync.Mutex
|
var ConfigurationInstanceLock sync.Mutex
|
||||||
|
|
||||||
func InstanceOfConfiguration() *Configuration {
|
func InstanceOfConfiguration() *Configuration {
|
||||||
ConfigurationInstanceLock.Lock()
|
ConfigurationInstanceLock.Lock()
|
||||||
defer ConfigurationInstanceLock.Unlock()
|
defer ConfigurationInstanceLock.Unlock()
|
||||||
|
|
||||||
if ConfigurationInstance == nil {
|
if configurationInstance == nil {
|
||||||
ConfigurationInstance = &Configuration{mock: false}
|
/* set default values */
|
||||||
|
configurationInstance = &Configuration{
|
||||||
|
mock: false,
|
||||||
|
KafkaBrokers: []string{
|
||||||
|
"kafka-node1:9092",
|
||||||
|
"kafka-node2:9092",
|
||||||
|
"kafka-node3:9092",
|
||||||
|
},
|
||||||
|
KafkaTopic: "yao",
|
||||||
|
SchedulerPolicy: "fair",
|
||||||
|
ListenAddr: "0.0.0.0:8080",
|
||||||
|
HDFSAddress: "",
|
||||||
|
HDFSBaseDir: "/user/root/",
|
||||||
|
DFSBaseDir: "",
|
||||||
|
}
|
||||||
|
|
||||||
|
/* override conf value from env */
|
||||||
|
value := os.Getenv("KafkaBrokers")
|
||||||
|
if len(value) != 0 {
|
||||||
|
configurationInstance.KafkaBrokers = strings.Split(value, ",")
|
||||||
|
}
|
||||||
|
value = os.Getenv("KafkaTopic")
|
||||||
|
if len(value) != 0 {
|
||||||
|
configurationInstance.KafkaTopic = value
|
||||||
|
}
|
||||||
|
value = os.Getenv("SchedulerPolicy")
|
||||||
|
if len(value) != 0 {
|
||||||
|
configurationInstance.SchedulerPolicy = value
|
||||||
|
}
|
||||||
|
value = os.Getenv("ListenAddr")
|
||||||
|
if len(value) != 0 {
|
||||||
|
configurationInstance.ListenAddr = value
|
||||||
|
}
|
||||||
|
value = os.Getenv("HDFSAddress")
|
||||||
|
if len(value) != 0 {
|
||||||
|
configurationInstance.HDFSAddress = value
|
||||||
|
}
|
||||||
|
value = os.Getenv("HDFSBaseDir")
|
||||||
|
if len(value) != 0 {
|
||||||
|
configurationInstance.HDFSBaseDir = value
|
||||||
|
}
|
||||||
|
value = os.Getenv("DFSBaseDir")
|
||||||
|
if len(value) != 0 {
|
||||||
|
configurationInstance.DFSBaseDir = value
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ConfigurationInstance
|
return configurationInstance
|
||||||
}
|
}
|
||||||
|
|
||||||
func (config *Configuration) EnableMock() bool {
|
func (config *Configuration) EnableMock() bool {
|
||||||
@ -41,3 +91,16 @@ func (config *Configuration) DisableMock() bool {
|
|||||||
log.Info("configuration.mock = false")
|
log.Info("configuration.mock = false")
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (config *Configuration) Dump() map[string]interface{} {
|
||||||
|
res := map[string]interface{}{}
|
||||||
|
res["KafkaBrokers"] = config.KafkaBrokers
|
||||||
|
res["KafkaTopic"] = config.KafkaTopic
|
||||||
|
res["SchedulerPolicy"] = config.SchedulerPolicy
|
||||||
|
res["ListenAddr"] = config.ListenAddr
|
||||||
|
res["Mock"] = config.mock
|
||||||
|
res["HDFSAddress"] = config.HDFSAddress
|
||||||
|
res["HDFSBaseDir"] = config.HDFSBaseDir
|
||||||
|
res["DFSBaseDir"] = config.DFSBaseDir
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
@ -107,10 +107,10 @@ func (jm *JobManager) start() {
|
|||||||
v.Set("network", jm.network)
|
v.Set("network", jm.network)
|
||||||
v.Set("should_wait", shouldWait)
|
v.Set("should_wait", shouldWait)
|
||||||
v.Set("output_dir", "/tmp/")
|
v.Set("output_dir", "/tmp/")
|
||||||
v.Set("hdfs_address", "http://192.168.100.104:50070/")
|
v.Set("hdfs_address", InstanceOfConfiguration().HDFSAddress)
|
||||||
v.Set("hdfs_dir", "/user/yao/output/"+jm.job.Name)
|
v.Set("hdfs_dir", InstanceOfConfiguration().HDFSBaseDir+jm.job.Name)
|
||||||
v.Set("gpu_mem", strconv.Itoa(jm.job.Tasks[index].MemoryGPU))
|
v.Set("gpu_mem", strconv.Itoa(jm.job.Tasks[index].MemoryGPU))
|
||||||
v.Set("dfs_src", "/dfs/yao-jobs/"+jm.job.Name+"/task-"+strconv.Itoa(index))
|
v.Set("dfs_src", InstanceOfConfiguration().DFSBaseDir+jm.job.Name+"/task-"+strconv.Itoa(index))
|
||||||
v.Set("dfs_dst", "/tmp")
|
v.Set("dfs_dst", "/tmp")
|
||||||
|
|
||||||
resp, err := doRequest("POST", "http://"+jm.resources[index].ClientHost+":8000/create", strings.NewReader(v.Encode()), "application/x-www-form-urlencoded", "")
|
resp, err := doRequest("POST", "http://"+jm.resources[index].ClientHost+":8000/create", strings.NewReader(v.Encode()), "application/x-www-form-urlencoded", "")
|
||||||
|
38
src/main.go
38
src/main.go
@ -1,19 +1,15 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"os"
|
|
||||||
"time"
|
"time"
|
||||||
"strconv"
|
"strconv"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
var addr = flag.String("addr", "0.0.0.0:8080", "http service address")
|
|
||||||
var confFile = flag.String("conf", "/etc/yao/config.json", "configuration file path")
|
|
||||||
|
|
||||||
var scheduler Scheduler
|
var scheduler Scheduler
|
||||||
|
|
||||||
func serverAPI(w http.ResponseWriter, r *http.Request) {
|
func serverAPI(w http.ResponseWriter, r *http.Request) {
|
||||||
@ -326,6 +322,13 @@ func serverAPI(w http.ResponseWriter, r *http.Request) {
|
|||||||
w.Write(js)
|
w.Write(js)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
case "debug_conf_dump":
|
||||||
|
log.Debug("debug_conf_dump")
|
||||||
|
js, _ := json.Marshal(InstanceOfConfiguration().Dump())
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Write(js)
|
||||||
|
break
|
||||||
|
|
||||||
default:
|
default:
|
||||||
http.Error(w, "Not Found", http.StatusNotFound)
|
http.Error(w, "Not Found", http.StatusNotFound)
|
||||||
break
|
break
|
||||||
@ -333,21 +336,19 @@ func serverAPI(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
value := os.Getenv("LoggerOutputDir")
|
||||||
/* read configuration */
|
if len(value) != 0 {
|
||||||
file, err := os.Open(*confFile)
|
f, err := os.OpenFile(value, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
||||||
if err != nil {
|
defer f.Close()
|
||||||
log.Fatal(err)
|
if err != nil {
|
||||||
|
log.Fatalf("error opening file: %v", err)
|
||||||
|
}
|
||||||
|
log.SetOutput(f)
|
||||||
}
|
}
|
||||||
defer file.Close()
|
//log.SetLevel(log.InfoLevel)
|
||||||
|
|
||||||
/* parse configuration */
|
/* parse configuration */
|
||||||
decoder := json.NewDecoder(file)
|
config := *InstanceOfConfiguration()
|
||||||
config := Configuration{}
|
|
||||||
err = decoder.Decode(&config)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
/* init components */
|
/* init components */
|
||||||
InstanceOfResourcePool().init(config)
|
InstanceOfResourcePool().init(config)
|
||||||
@ -378,9 +379,8 @@ func main() {
|
|||||||
serverAPI(w, r)
|
serverAPI(w, r)
|
||||||
})
|
})
|
||||||
|
|
||||||
err = http.ListenAndServe(*addr, nil)
|
err := http.ListenAndServe(config.ListenAddr, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal("ListenAndServe: ", err)
|
log.Fatal("ListenAndServe: ", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -9,10 +9,13 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"math"
|
"math"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Optimizer struct {
|
type Optimizer struct {
|
||||||
versions map[string]int
|
versions map[string]int
|
||||||
|
cache map[string]OptimizerJobExecutionTime
|
||||||
|
cacheMu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
var optimizerInstance *Optimizer
|
var optimizerInstance *Optimizer
|
||||||
@ -25,6 +28,25 @@ func InstanceOfOptimizer() *Optimizer {
|
|||||||
if optimizerInstance == nil {
|
if optimizerInstance == nil {
|
||||||
optimizerInstance = &Optimizer{}
|
optimizerInstance = &Optimizer{}
|
||||||
optimizerInstance.versions = map[string]int{}
|
optimizerInstance.versions = map[string]int{}
|
||||||
|
optimizerInstance.cache = map[string]OptimizerJobExecutionTime{}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
/* remove expired cache */
|
||||||
|
for {
|
||||||
|
time.Sleep(time.Second * 30)
|
||||||
|
optimizerInstance.cacheMu.Lock()
|
||||||
|
var expired []string
|
||||||
|
for k, v := range optimizerInstance.cache {
|
||||||
|
if time.Now().Unix()-v.Version > 300 {
|
||||||
|
expired = append(expired, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, k := range expired {
|
||||||
|
delete(optimizerInstance.cache, k)
|
||||||
|
}
|
||||||
|
optimizerInstance.cacheMu.Unlock()
|
||||||
|
}
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
return optimizerInstance
|
return optimizerInstance
|
||||||
}
|
}
|
||||||
@ -165,8 +187,14 @@ func (optimizer *Optimizer) trainTime(jobName string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (optimizer *Optimizer) PredictTime(job Job) OptimizerJobExecutionTime {
|
func (optimizer *Optimizer) PredictTime(job Job) OptimizerJobExecutionTime {
|
||||||
res := OptimizerJobExecutionTime{Pre: 0, Post: 0, Total: math.MaxInt64}
|
optimizer.cacheMu.Lock()
|
||||||
|
if val, ok := optimizer.cache[job.Name]; ok {
|
||||||
|
optimizer.cacheMu.Unlock()
|
||||||
|
return val
|
||||||
|
}
|
||||||
|
optimizer.cacheMu.Unlock()
|
||||||
|
|
||||||
|
res := OptimizerJobExecutionTime{Pre: 0, Post: 0, Total: math.MaxInt64}
|
||||||
var jobName string
|
var jobName string
|
||||||
str := strings.Split(job.Name, "-")
|
str := strings.Split(job.Name, "-")
|
||||||
if len(str) == 2 {
|
if len(str) == 2 {
|
||||||
@ -246,6 +274,10 @@ func (optimizer *Optimizer) PredictTime(job Job) OptimizerJobExecutionTime {
|
|||||||
res.Total = int(math.Ceil(v))
|
res.Total = int(math.Ceil(v))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
res.Version = time.Now().Unix()
|
||||||
|
optimizer.cacheMu.Lock()
|
||||||
|
optimizer.cache[job.Name] = res
|
||||||
|
optimizer.cacheMu.Unlock()
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
10
src/util.go
10
src/util.go
@ -46,11 +46,11 @@ type UtilGPUTimeSeries struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type OptimizerJobExecutionTime struct {
|
type OptimizerJobExecutionTime struct {
|
||||||
Pre int `json:"pre"`
|
Pre int `json:"pre"`
|
||||||
Post int `json:"post"`
|
Post int `json:"post"`
|
||||||
Total int `json:"total"`
|
Total int `json:"total"`
|
||||||
Main int `json:"main"`
|
Main int `json:"main"`
|
||||||
Version int `json:"version"`
|
Version int64 `json:"version"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type OptimizerUtilGPU struct {
|
type OptimizerUtilGPU struct {
|
||||||
|
Loading…
Reference in New Issue
Block a user