1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-06-08 06:41:56 +00:00
YAO-scheduler/src/main.go

328 lines
7.9 KiB
Go
Raw Normal View History

2019-03-04 09:19:55 +00:00
package main
import (
"flag"
"net/http"
2019-07-10 12:40:43 +00:00
log "github.com/sirupsen/logrus"
2019-03-04 09:19:55 +00:00
"encoding/json"
2019-10-24 12:25:59 +00:00
"os"
2020-04-12 02:44:32 +00:00
"time"
2020-04-13 15:53:38 +00:00
"strconv"
2019-03-04 09:19:55 +00:00
)
2019-11-11 07:33:04 +00:00
var addr = flag.String("addr", "0.0.0.0:8080", "http service address")
2019-11-11 06:30:40 +00:00
var confFile = flag.String("conf", "/etc/yao/config.json", "configuration file path")
2019-03-04 09:19:55 +00:00
var pool *ResourcePool
2019-07-10 12:40:43 +00:00
var scheduler Scheduler
2019-03-20 03:14:07 +00:00
2019-03-04 09:19:55 +00:00
func serverAPI(w http.ResponseWriter, r *http.Request) {
switch r.URL.Query().Get("action") {
2019-04-29 09:05:15 +00:00
case "resource_list":
js, _ := json.Marshal(pool.list())
2019-03-04 09:19:55 +00:00
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2019-03-20 03:14:07 +00:00
2019-03-04 09:19:55 +00:00
case "resource_get_by_node":
2019-04-18 09:25:37 +00:00
id := r.URL.Query().Get("id")
2019-03-04 09:19:55 +00:00
js, _ := json.Marshal(pool.getByID(id))
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2019-03-20 03:14:07 +00:00
2019-03-04 09:19:55 +00:00
case "job_submit":
2019-03-20 03:14:07 +00:00
var job Job
2019-07-29 06:56:18 +00:00
log.Debug("job_submit")
2019-04-16 07:33:37 +00:00
msgSubmit := MsgSubmit{Code: 0}
2019-03-20 03:14:07 +00:00
err := json.Unmarshal([]byte(string(r.PostFormValue("job"))), &job)
2020-04-12 02:44:32 +00:00
log.Info("Submit job", job.Name, time.Now())
2019-03-20 03:14:07 +00:00
if err != nil {
2019-04-16 07:33:37 +00:00
msgSubmit.Code = 1
msgSubmit.Error = err.Error()
} else {
2019-07-10 12:40:43 +00:00
scheduler.Schedule(job)
2019-03-20 03:14:07 +00:00
}
2019-04-16 07:33:37 +00:00
js, _ := json.Marshal(msgSubmit)
2019-03-04 09:19:55 +00:00
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2019-03-20 03:14:07 +00:00
case "job_status":
2019-07-29 06:56:18 +00:00
log.Debug("job_status")
2019-07-10 12:40:43 +00:00
js, _ := json.Marshal(scheduler.QueryState(r.URL.Query().Get("id")))
2019-03-20 03:14:07 +00:00
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2019-04-18 09:25:37 +00:00
case "job_stop":
2019-07-29 06:56:18 +00:00
log.Debug("job_stop")
2019-07-10 12:40:43 +00:00
js, _ := json.Marshal(scheduler.Stop(string(r.PostFormValue("id"))))
2019-04-18 09:25:37 +00:00
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2019-03-20 03:14:07 +00:00
case "task_logs":
2019-07-29 06:56:18 +00:00
log.Debug("task_logs")
2019-07-10 12:40:43 +00:00
js, _ := json.Marshal(scheduler.QueryLogs(r.URL.Query().Get("job"), r.URL.Query().Get("task")))
2019-03-25 07:36:30 +00:00
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "jobs":
2019-07-29 06:56:18 +00:00
log.Debug("job_list")
2019-07-10 12:40:43 +00:00
js, _ := json.Marshal(scheduler.ListJobs())
2019-03-20 03:14:07 +00:00
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2019-04-12 09:21:09 +00:00
case "summary":
2019-07-29 06:56:18 +00:00
log.Debug("summary")
2019-07-10 12:40:43 +00:00
js, _ := json.Marshal(scheduler.Summary())
2019-04-12 09:21:09 +00:00
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2019-04-29 09:05:15 +00:00
case "pool_status_history":
2019-07-29 06:56:18 +00:00
log.Debug("pool_status_history")
2019-04-29 09:05:15 +00:00
js, _ := json.Marshal(pool.statusHistory())
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2020-03-29 13:12:44 +00:00
case "get_counter":
log.Debug("get_counters")
js, _ := json.Marshal(pool.getCounter())
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2020-04-12 02:44:32 +00:00
case "get_bindings":
log.Debug("get_bindings")
js, _ := json.Marshal(pool.getBindings())
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2019-07-29 06:56:18 +00:00
case "group_list":
log.Debug("group_list")
js, _ := json.Marshal(InstanceOfGroupManager().List())
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "group_add":
log.Debug("group_add")
var group Group
msg := MsgGroupCreate{Code: 0}
2019-07-30 07:35:29 +00:00
err := json.Unmarshal([]byte(string(r.PostFormValue("group"))), &group)
2019-07-29 06:56:18 +00:00
if err != nil {
msg.Code = 1
msg.Error = err.Error()
} else {
msg = InstanceOfGroupManager().Add(group)
}
js, _ := json.Marshal(msg)
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "group_update":
log.Debug("group_update")
var group Group
msg := MsgGroupCreate{Code: 0}
2019-08-01 01:54:38 +00:00
err := json.Unmarshal([]byte(string(r.PostFormValue("group"))), &group)
2019-07-29 06:56:18 +00:00
if err != nil {
msg.Code = 1
msg.Error = err.Error()
} else {
msg = InstanceOfGroupManager().Update(group)
}
js, _ := json.Marshal(msg)
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "group_remove":
log.Debug("group_remove")
var group Group
msg := MsgGroupCreate{Code: 0}
2019-08-01 01:54:38 +00:00
err := json.Unmarshal([]byte(string(r.PostFormValue("group"))), &group)
2019-07-29 06:56:18 +00:00
if err != nil {
msg.Code = 1
msg.Error = err.Error()
} else {
msg = InstanceOfGroupManager().Remove(group)
}
js, _ := json.Marshal(msg)
w.Header().Set("Content-Type", "application/json")
w.Write(js)
2020-04-10 10:55:51 +00:00
break
case "jhl_job_status":
log.Debug("jhl_job_status")
js, _ := json.Marshal(InstanceJobHistoryLogger().getTaskStatus(r.URL.Query().Get("job")))
w.Header().Set("Content-Type", "application/json")
w.Write(js)
2020-04-13 10:26:40 +00:00
break
case "debug_enable":
log.Debug("enable schedule")
2020-04-13 10:37:54 +00:00
js, _ := json.Marshal(scheduler.Enable())
2020-04-13 10:26:40 +00:00
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "debug_disable":
log.Debug("disable schedule")
2020-04-13 10:37:54 +00:00
js, _ := json.Marshal(scheduler.Disable())
2020-04-13 10:26:40 +00:00
w.Header().Set("Content-Type", "application/json")
2020-04-13 15:53:38 +00:00
w.Write(js)
break
case "debug_update_parallelism":
log.Debug("update_parallelism")
parallelism, _ := strconv.Atoi(r.URL.Query().Get("parallelism"))
js, _ := json.Marshal(scheduler.UpdateParallelism(parallelism))
w.Header().Set("Content-Type", "application/json")
2020-04-30 08:11:34 +00:00
w.Write(js)
break
2020-04-30 09:52:52 +00:00
case "debug_update_enable_share_ratio":
log.Debug("debug_update_enable_share_ratio")
ratio := 0.75
if t, err := strconv.ParseFloat(r.URL.Query().Get("ratio"), 32); err == nil {
ratio = t
}
js, _ := json.Marshal(scheduler.SetShareRatio(ratio))
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "debug_update_enable_pre_schedule_ratio":
log.Debug("debug_update_enable_pre_schedule_ratio")
ratio := 0.95
if t, err := strconv.ParseFloat(r.URL.Query().Get("ratio"), 32); err == nil {
ratio = t
}
js, _ := json.Marshal(scheduler.SetPreScheduleRatio(ratio))
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
2020-04-30 08:11:34 +00:00
case "debug_get_predicts":
log.Debug("debug_get_predicts")
js, _ := json.Marshal(InstanceOfOptimizer().getAllPredicts())
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "debug_get_gpu_utils":
log.Debug("debug_get_gpu_utils")
js, _ := json.Marshal(InstanceOfOptimizer().getAllGPUUtils())
w.Header().Set("Content-Type", "application/json")
2020-04-13 10:26:40 +00:00
w.Write(js)
2019-07-29 06:56:18 +00:00
break
2020-05-02 13:09:25 +00:00
case "debug_optimizer_feed_dl":
log.Debug("debug_optimizer_feed_dl")
var job string
var seq int
var value int
job = r.URL.Query().Get("job")
2020-05-02 13:19:54 +00:00
if t, err := strconv.Atoi(r.URL.Query().Get("seq")); err == nil {
2020-05-02 13:09:25 +00:00
seq = t
}
2020-05-02 13:19:54 +00:00
if t, err := strconv.Atoi(r.URL.Query().Get("value")); err == nil {
2020-05-02 13:09:25 +00:00
value = t
}
InstanceOfOptimizer().feedData(job, seq, 0, 0, 0, value)
js, _ := json.Marshal(OptimizerJobExecutionTime{})
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "debug_optimizer_train_dl":
log.Debug("debug_optimizer_train_dl")
InstanceOfOptimizer().train(r.URL.Query().Get("job"))
js, _ := json.Marshal(OptimizerJobExecutionTime{})
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
case "debug_get_predict_dl":
log.Debug("debug_get_predict_dl")
2020-05-02 13:19:54 +00:00
if seq, err := strconv.Atoi(r.URL.Query().Get("seq")); err == nil {
2020-05-02 13:09:25 +00:00
est, _ := InstanceOfOptimizer().predict(r.URL.Query().Get("job"), seq)
js, _ := json.Marshal(est)
w.Header().Set("Content-Type", "application/json")
w.Write(js)
} else {
js, _ := json.Marshal(OptimizerJobExecutionTime{})
w.Header().Set("Content-Type", "application/json")
w.Write(js)
}
break
2019-03-04 09:19:55 +00:00
default:
http.Error(w, "Not Found", http.StatusNotFound)
break
}
}
func main() {
2019-10-24 12:30:55 +00:00
flag.Parse()
2019-10-24 12:25:59 +00:00
/* read configuration */
2019-10-24 12:30:55 +00:00
file, err := os.Open(*confFile)
2019-10-24 12:25:59 +00:00
if err != nil {
log.Fatal(err)
}
defer file.Close()
/* parse configuration */
decoder := json.NewDecoder(file)
config := Configuration{}
err = decoder.Decode(&config)
if err != nil {
log.Fatal(err)
}
2020-04-10 09:34:56 +00:00
/* init jhl */
InstanceJobHistoryLogger().init()
2019-03-04 09:19:55 +00:00
pool = &ResourcePool{}
2019-04-29 09:05:15 +00:00
pool.start()
2019-03-20 03:14:07 +00:00
2019-10-24 12:25:59 +00:00
switch config.SchedulerPolicy {
case "FCFS":
scheduler = &SchedulerFCFS{}
break
case "fair":
scheduler = &SchedulerFair{}
break
case "priority":
scheduler = &SchedulerPriority{}
break
default:
scheduler = &SchedulerFCFS{}
}
2019-07-10 12:40:43 +00:00
scheduler.Start()
2019-03-20 03:14:07 +00:00
2019-03-04 09:19:55 +00:00
go func() {
2019-10-24 12:25:59 +00:00
start(pool, config)
2019-03-04 09:19:55 +00:00
}()
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
serverAPI(w, r)
})
2019-10-24 12:25:59 +00:00
err = http.ListenAndServe(*addr, nil)
2019-03-04 09:19:55 +00:00
if err != nil {
log.Fatal("ListenAndServe: ", err)
}
}