2019-03-04 09:19:55 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"flag"
|
|
|
|
"net/http"
|
2019-07-10 12:40:43 +00:00
|
|
|
log "github.com/sirupsen/logrus"
|
2019-03-04 09:19:55 +00:00
|
|
|
"encoding/json"
|
2019-10-24 12:25:59 +00:00
|
|
|
"os"
|
2020-04-12 02:44:32 +00:00
|
|
|
"time"
|
2020-04-13 15:53:38 +00:00
|
|
|
"strconv"
|
2019-03-04 09:19:55 +00:00
|
|
|
)
|
|
|
|
|
2019-11-11 07:33:04 +00:00
|
|
|
var addr = flag.String("addr", "0.0.0.0:8080", "http service address")
|
2019-11-11 06:30:40 +00:00
|
|
|
var confFile = flag.String("conf", "/etc/yao/config.json", "configuration file path")
|
2019-03-04 09:19:55 +00:00
|
|
|
|
|
|
|
var pool *ResourcePool
|
|
|
|
|
2019-07-10 12:40:43 +00:00
|
|
|
var scheduler Scheduler
|
2019-03-20 03:14:07 +00:00
|
|
|
|
2019-03-04 09:19:55 +00:00
|
|
|
func serverAPI(w http.ResponseWriter, r *http.Request) {
|
|
|
|
switch r.URL.Query().Get("action") {
|
2019-04-29 09:05:15 +00:00
|
|
|
case "resource_list":
|
|
|
|
js, _ := json.Marshal(pool.list())
|
2019-03-04 09:19:55 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
2019-03-20 03:14:07 +00:00
|
|
|
|
2019-03-04 09:19:55 +00:00
|
|
|
case "resource_get_by_node":
|
2019-04-18 09:25:37 +00:00
|
|
|
id := r.URL.Query().Get("id")
|
2019-03-04 09:19:55 +00:00
|
|
|
js, _ := json.Marshal(pool.getByID(id))
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
2019-03-20 03:14:07 +00:00
|
|
|
|
2019-03-04 09:19:55 +00:00
|
|
|
case "job_submit":
|
2019-03-20 03:14:07 +00:00
|
|
|
var job Job
|
2019-07-29 06:56:18 +00:00
|
|
|
log.Debug("job_submit")
|
2019-04-16 07:33:37 +00:00
|
|
|
msgSubmit := MsgSubmit{Code: 0}
|
2019-03-20 03:14:07 +00:00
|
|
|
err := json.Unmarshal([]byte(string(r.PostFormValue("job"))), &job)
|
2020-04-12 02:44:32 +00:00
|
|
|
log.Info("Submit job", job.Name, time.Now())
|
2019-03-20 03:14:07 +00:00
|
|
|
if err != nil {
|
2019-04-16 07:33:37 +00:00
|
|
|
msgSubmit.Code = 1
|
|
|
|
msgSubmit.Error = err.Error()
|
|
|
|
} else {
|
2019-07-10 12:40:43 +00:00
|
|
|
scheduler.Schedule(job)
|
2019-03-20 03:14:07 +00:00
|
|
|
}
|
2019-04-16 07:33:37 +00:00
|
|
|
js, _ := json.Marshal(msgSubmit)
|
2019-03-04 09:19:55 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
2019-03-20 03:14:07 +00:00
|
|
|
|
|
|
|
case "job_status":
|
2019-07-29 06:56:18 +00:00
|
|
|
log.Debug("job_status")
|
2019-07-10 12:40:43 +00:00
|
|
|
js, _ := json.Marshal(scheduler.QueryState(r.URL.Query().Get("id")))
|
2019-03-20 03:14:07 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
2019-04-18 09:25:37 +00:00
|
|
|
case "job_stop":
|
2019-07-29 06:56:18 +00:00
|
|
|
log.Debug("job_stop")
|
2019-07-10 12:40:43 +00:00
|
|
|
js, _ := json.Marshal(scheduler.Stop(string(r.PostFormValue("id"))))
|
2019-04-18 09:25:37 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
2019-03-20 03:14:07 +00:00
|
|
|
case "task_logs":
|
2019-07-29 06:56:18 +00:00
|
|
|
log.Debug("task_logs")
|
2019-07-10 12:40:43 +00:00
|
|
|
js, _ := json.Marshal(scheduler.QueryLogs(r.URL.Query().Get("job"), r.URL.Query().Get("task")))
|
2019-03-25 07:36:30 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "jobs":
|
2019-07-29 06:56:18 +00:00
|
|
|
log.Debug("job_list")
|
2019-07-10 12:40:43 +00:00
|
|
|
js, _ := json.Marshal(scheduler.ListJobs())
|
2019-03-20 03:14:07 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
2019-04-12 09:21:09 +00:00
|
|
|
|
|
|
|
case "summary":
|
2019-07-29 06:56:18 +00:00
|
|
|
log.Debug("summary")
|
2019-07-10 12:40:43 +00:00
|
|
|
js, _ := json.Marshal(scheduler.Summary())
|
2019-04-12 09:21:09 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
2019-04-29 09:05:15 +00:00
|
|
|
case "pool_status_history":
|
2019-07-29 06:56:18 +00:00
|
|
|
log.Debug("pool_status_history")
|
2019-04-29 09:05:15 +00:00
|
|
|
js, _ := json.Marshal(pool.statusHistory())
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
2020-03-29 13:12:44 +00:00
|
|
|
case "get_counter":
|
|
|
|
log.Debug("get_counters")
|
|
|
|
js, _ := json.Marshal(pool.getCounter())
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
2020-04-12 02:44:32 +00:00
|
|
|
case "get_bindings":
|
|
|
|
log.Debug("get_bindings")
|
|
|
|
js, _ := json.Marshal(pool.getBindings())
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
2019-07-29 06:56:18 +00:00
|
|
|
case "group_list":
|
|
|
|
log.Debug("group_list")
|
|
|
|
js, _ := json.Marshal(InstanceOfGroupManager().List())
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "group_add":
|
|
|
|
log.Debug("group_add")
|
|
|
|
var group Group
|
|
|
|
msg := MsgGroupCreate{Code: 0}
|
2019-07-30 07:35:29 +00:00
|
|
|
err := json.Unmarshal([]byte(string(r.PostFormValue("group"))), &group)
|
2019-07-29 06:56:18 +00:00
|
|
|
if err != nil {
|
|
|
|
msg.Code = 1
|
|
|
|
msg.Error = err.Error()
|
|
|
|
} else {
|
|
|
|
msg = InstanceOfGroupManager().Add(group)
|
2020-05-03 02:30:12 +00:00
|
|
|
scheduler.updateGroup(group)
|
2019-07-29 06:56:18 +00:00
|
|
|
}
|
|
|
|
js, _ := json.Marshal(msg)
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "group_update":
|
|
|
|
log.Debug("group_update")
|
|
|
|
var group Group
|
|
|
|
msg := MsgGroupCreate{Code: 0}
|
2019-08-01 01:54:38 +00:00
|
|
|
err := json.Unmarshal([]byte(string(r.PostFormValue("group"))), &group)
|
2019-07-29 06:56:18 +00:00
|
|
|
if err != nil {
|
|
|
|
msg.Code = 1
|
|
|
|
msg.Error = err.Error()
|
|
|
|
} else {
|
|
|
|
msg = InstanceOfGroupManager().Update(group)
|
2020-05-03 02:30:12 +00:00
|
|
|
scheduler.updateGroup(group)
|
2019-07-29 06:56:18 +00:00
|
|
|
}
|
|
|
|
js, _ := json.Marshal(msg)
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "group_remove":
|
|
|
|
log.Debug("group_remove")
|
|
|
|
var group Group
|
|
|
|
msg := MsgGroupCreate{Code: 0}
|
2019-08-01 01:54:38 +00:00
|
|
|
err := json.Unmarshal([]byte(string(r.PostFormValue("group"))), &group)
|
2019-07-29 06:56:18 +00:00
|
|
|
if err != nil {
|
|
|
|
msg.Code = 1
|
|
|
|
msg.Error = err.Error()
|
|
|
|
} else {
|
|
|
|
msg = InstanceOfGroupManager().Remove(group)
|
2020-05-03 02:30:12 +00:00
|
|
|
scheduler.updateGroup(group)
|
2019-07-29 06:56:18 +00:00
|
|
|
}
|
|
|
|
js, _ := json.Marshal(msg)
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
2020-04-10 10:55:51 +00:00
|
|
|
break
|
|
|
|
|
|
|
|
case "jhl_job_status":
|
|
|
|
log.Debug("jhl_job_status")
|
|
|
|
js, _ := json.Marshal(InstanceJobHistoryLogger().getTaskStatus(r.URL.Query().Get("job")))
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
2020-04-13 10:26:40 +00:00
|
|
|
break
|
|
|
|
|
|
|
|
case "debug_enable":
|
|
|
|
log.Debug("enable schedule")
|
2020-04-13 10:37:54 +00:00
|
|
|
js, _ := json.Marshal(scheduler.Enable())
|
2020-04-13 10:26:40 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "debug_disable":
|
|
|
|
log.Debug("disable schedule")
|
2020-04-13 10:37:54 +00:00
|
|
|
js, _ := json.Marshal(scheduler.Disable())
|
2020-04-13 10:26:40 +00:00
|
|
|
w.Header().Set("Content-Type", "application/json")
|
2020-04-13 15:53:38 +00:00
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "debug_update_parallelism":
|
|
|
|
log.Debug("update_parallelism")
|
|
|
|
parallelism, _ := strconv.Atoi(r.URL.Query().Get("parallelism"))
|
|
|
|
js, _ := json.Marshal(scheduler.UpdateParallelism(parallelism))
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
2020-04-30 08:11:34 +00:00
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
2020-04-30 09:52:52 +00:00
|
|
|
case "debug_update_enable_share_ratio":
|
|
|
|
log.Debug("debug_update_enable_share_ratio")
|
|
|
|
|
|
|
|
ratio := 0.75
|
|
|
|
if t, err := strconv.ParseFloat(r.URL.Query().Get("ratio"), 32); err == nil {
|
|
|
|
ratio = t
|
|
|
|
}
|
|
|
|
js, _ := json.Marshal(scheduler.SetShareRatio(ratio))
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "debug_update_enable_pre_schedule_ratio":
|
|
|
|
log.Debug("debug_update_enable_pre_schedule_ratio")
|
|
|
|
ratio := 0.95
|
|
|
|
if t, err := strconv.ParseFloat(r.URL.Query().Get("ratio"), 32); err == nil {
|
|
|
|
ratio = t
|
|
|
|
}
|
|
|
|
js, _ := json.Marshal(scheduler.SetPreScheduleRatio(ratio))
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
2020-04-30 08:11:34 +00:00
|
|
|
case "debug_get_predicts":
|
|
|
|
log.Debug("debug_get_predicts")
|
|
|
|
js, _ := json.Marshal(InstanceOfOptimizer().getAllPredicts())
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "debug_get_gpu_utils":
|
|
|
|
log.Debug("debug_get_gpu_utils")
|
|
|
|
js, _ := json.Marshal(InstanceOfOptimizer().getAllGPUUtils())
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
2020-04-13 10:26:40 +00:00
|
|
|
w.Write(js)
|
2019-07-29 06:56:18 +00:00
|
|
|
break
|
|
|
|
|
2020-05-02 13:09:25 +00:00
|
|
|
case "debug_optimizer_feed_dl":
|
|
|
|
log.Debug("debug_optimizer_feed_dl")
|
|
|
|
var job string
|
|
|
|
var seq int
|
|
|
|
var value int
|
|
|
|
job = r.URL.Query().Get("job")
|
2020-05-02 13:19:54 +00:00
|
|
|
if t, err := strconv.Atoi(r.URL.Query().Get("seq")); err == nil {
|
2020-05-02 13:09:25 +00:00
|
|
|
seq = t
|
|
|
|
}
|
2020-05-02 13:19:54 +00:00
|
|
|
if t, err := strconv.Atoi(r.URL.Query().Get("value")); err == nil {
|
2020-05-02 13:09:25 +00:00
|
|
|
value = t
|
|
|
|
}
|
|
|
|
InstanceOfOptimizer().feedData(job, seq, 0, 0, 0, value)
|
|
|
|
js, _ := json.Marshal(OptimizerJobExecutionTime{})
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "debug_optimizer_train_dl":
|
|
|
|
log.Debug("debug_optimizer_train_dl")
|
|
|
|
InstanceOfOptimizer().train(r.URL.Query().Get("job"))
|
|
|
|
js, _ := json.Marshal(OptimizerJobExecutionTime{})
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
break
|
|
|
|
|
|
|
|
case "debug_get_predict_dl":
|
|
|
|
log.Debug("debug_get_predict_dl")
|
2020-05-02 13:19:54 +00:00
|
|
|
if seq, err := strconv.Atoi(r.URL.Query().Get("seq")); err == nil {
|
2020-05-02 13:09:25 +00:00
|
|
|
est, _ := InstanceOfOptimizer().predict(r.URL.Query().Get("job"), seq)
|
|
|
|
js, _ := json.Marshal(est)
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
} else {
|
|
|
|
js, _ := json.Marshal(OptimizerJobExecutionTime{})
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
|
|
w.Write(js)
|
|
|
|
}
|
|
|
|
break
|
|
|
|
|
2019-03-04 09:19:55 +00:00
|
|
|
default:
|
|
|
|
http.Error(w, "Not Found", http.StatusNotFound)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
2019-10-24 12:30:55 +00:00
|
|
|
flag.Parse()
|
2019-10-24 12:25:59 +00:00
|
|
|
/* read configuration */
|
2019-10-24 12:30:55 +00:00
|
|
|
file, err := os.Open(*confFile)
|
2019-10-24 12:25:59 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
/* parse configuration */
|
|
|
|
decoder := json.NewDecoder(file)
|
|
|
|
config := Configuration{}
|
|
|
|
err = decoder.Decode(&config)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2020-04-10 09:34:56 +00:00
|
|
|
/* init jhl */
|
|
|
|
InstanceJobHistoryLogger().init()
|
|
|
|
|
2019-03-04 09:19:55 +00:00
|
|
|
pool = &ResourcePool{}
|
2019-04-29 09:05:15 +00:00
|
|
|
pool.start()
|
2019-03-20 03:14:07 +00:00
|
|
|
|
2019-10-24 12:25:59 +00:00
|
|
|
switch config.SchedulerPolicy {
|
|
|
|
case "FCFS":
|
|
|
|
scheduler = &SchedulerFCFS{}
|
|
|
|
break
|
|
|
|
case "fair":
|
|
|
|
scheduler = &SchedulerFair{}
|
|
|
|
break
|
|
|
|
case "priority":
|
|
|
|
scheduler = &SchedulerPriority{}
|
|
|
|
break
|
|
|
|
default:
|
|
|
|
scheduler = &SchedulerFCFS{}
|
|
|
|
}
|
|
|
|
|
2019-07-10 12:40:43 +00:00
|
|
|
scheduler.Start()
|
2019-03-20 03:14:07 +00:00
|
|
|
|
2019-03-04 09:19:55 +00:00
|
|
|
go func() {
|
2019-10-24 12:25:59 +00:00
|
|
|
start(pool, config)
|
2019-03-04 09:19:55 +00:00
|
|
|
}()
|
|
|
|
|
|
|
|
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
|
|
|
serverAPI(w, r)
|
|
|
|
})
|
|
|
|
|
2019-10-24 12:25:59 +00:00
|
|
|
err = http.ListenAndServe(*addr, nil)
|
2019-03-04 09:19:55 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Fatal("ListenAndServe: ", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|