1
0
mirror of https://github.com/newnius/YAO-scheduler.git synced 2025-12-15 08:16:43 +00:00
This commit is contained in:
2019-04-12 17:21:09 +08:00
parent 804dfa969f
commit 5877310950
5 changed files with 90 additions and 5 deletions

View File

@@ -51,6 +51,9 @@ func (allocator *AllocatorFIFO) start() {
func (allocator *AllocatorFIFO) ack(job *Job) { func (allocator *AllocatorFIFO) ack(job *Job) {
allocator.scheduling.Unlock() allocator.scheduling.Unlock()
}
func (allocator *AllocatorFIFO) running(job *Job) {
for i := range allocator.history { for i := range allocator.history {
if allocator.history[i].Name == job.Name { if allocator.history[i].Name == job.Name {
allocator.history[i].Status = Running allocator.history[i].Status = Running
@@ -134,3 +137,49 @@ func (allocator *AllocatorFIFO) logs(jobName string, taskName string) MsgLog {
func (allocator *AllocatorFIFO) listJobs() MsgJobList { func (allocator *AllocatorFIFO) listJobs() MsgJobList {
return MsgJobList{Code: 0, Jobs: allocator.history} return MsgJobList{Code: 0, Jobs: allocator.history}
} }
func (allocator *AllocatorFIFO) summary() MsgSummary {
summary := MsgSummary{}
summary.Code = 0
finishedJobsCounter := 0
runningJobsCounter := 0
pendingJobsCounter := 0
for _, job := range allocator.history {
switch job.Status {
case Created:
pendingJobsCounter++
case Starting:
pendingJobsCounter++
break
case Running:
runningJobsCounter++
break;
case Finished:
finishedJobsCounter++
case Stopped:
finishedJobsCounter++
}
}
summary.JobsFinished = finishedJobsCounter
summary.JobsPending = pendingJobsCounter
summary.JobsRunning = runningJobsCounter
FreeGPU := 0
UsingGPU := 0
for _, node := range pool.nodes {
for j := range node {
if node[j].MemoryAllocated == 0 {
FreeGPU++
} else {
UsingGPU++
}
}
}
summary.FreeGPU = FreeGPU
summary.UsingGPU = UsingGPU
return summary
}

View File

@@ -7,6 +7,7 @@ import (
"strings" "strings"
"io/ioutil" "io/ioutil"
"encoding/json" "encoding/json"
"fmt"
) )
type JobManager struct { type JobManager struct {
@@ -32,12 +33,24 @@ func (jm *JobManager) start() {
log.Println("Receive resource", resource) log.Println("Receive resource", resource)
jm.resources = append(jm.resources, resource) jm.resources = append(jm.resources, resource)
} }
jm.allocator.ack(&jm.job)
/* bring up containers */ /* bring up containers */
for i := range jm.job.Tasks { for i := range jm.job.Tasks {
var GPUs []string
for _, GPU := range jm.resources[i].Status {
GPUs = append(GPUs, GPU.UUID)
}
v := url.Values{} v := url.Values{}
v.Set("image", jm.job.Image) v.Set("image", jm.job.Tasks[i].Image)
v.Set("cmd", jm.job.Tasks[i].Cmd) v.Set("cmd", jm.job.Tasks[i].Cmd)
v.Set("name", jm.job.Tasks[i].Name)
v.Set("workspace", jm.job.Workspace)
v.Set("gpus", strings.Join(GPUs, ","))
fmt.Print(v.Encode())
resp, err := doRequest("POST", "http://kafka_node1:8000/create", strings.NewReader(v.Encode()), "application/x-www-form-urlencoded", "") resp, err := doRequest("POST", "http://kafka_node1:8000/create", strings.NewReader(v.Encode()), "application/x-www-form-urlencoded", "")
if err != nil { if err != nil {
log.Println(err) log.Println(err)
@@ -63,7 +76,7 @@ func (jm *JobManager) start() {
jm.jobStatus.tasks[jm.job.Tasks[i].Name] = TaskStatus{Id: res.Id} jm.jobStatus.tasks[jm.job.Tasks[i].Name] = TaskStatus{Id: res.Id}
} }
jm.allocator.ack(&jm.job) jm.allocator.running(&jm.job)
/* monitor job execution */ /* monitor job execution */
for { for {

View File

@@ -70,6 +70,14 @@ func serverAPI(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
w.Write(js) w.Write(js)
break break
case "summary":
fmt.Println("summary")
js, _ := json.Marshal(allocator.summary())
w.Header().Set("Content-Type", "application/json")
w.Write(js)
break
default: default:
http.Error(w, "Not Found", http.StatusNotFound) http.Error(w, "Not Found", http.StatusNotFound)
break break

View File

@@ -15,7 +15,6 @@ type Spider struct {
ContentType string ContentType string
Referer string Referer string
Data url.Values Data url.Values
Response *http.Response Response *http.Response
} }

View File

@@ -16,6 +16,21 @@ const (
Finished = 4 Finished = 4
) )
type MsgSubmit struct {
Code int `json:"code"`
Error string `json:"error"`
}
type MsgSummary struct {
Code int `json:"code"`
Error string `json:"error"`
JobsFinished int `json:"jobs_finished"`
JobsRunning int `json:"jobs_running"`
JobsPending int `json:"jobs_pending"`
FreeGPU int `json:"gpu_free"`
UsingGPU int `json:"gpu_using"`
}
type MsgJobList struct { type MsgJobList struct {
Code int `json:"code"` Code int `json:"code"`
Error string `json:"error"` Error string `json:"error"`
@@ -48,6 +63,7 @@ type MsgCreate struct {
type TaskStatus struct { type TaskStatus struct {
Id string `json:"id"` Id string `json:"id"`
Name string `json:"name"`
Image string `json:"image"` Image string `json:"image"`
ImageDigest string `json:"image_digest"` ImageDigest string `json:"image_digest"`
Command string `json:"command"` Command string `json:"command"`
@@ -83,9 +99,8 @@ type MsgAgent struct {
type Job struct { type Job struct {
ID int `json:"id"` ID int `json:"id"`
Name string `json:"name"` Name string `json:"name"`
Image string `json:"image"`
Tasks []Task `json:"tasks"` Tasks []Task `json:"tasks"`
Workspace int `json:"workspace"` Workspace string `json:"workspace"`
Cluster int `json:"virtual_cluster"` Cluster int `json:"virtual_cluster"`
Priority int `json:"priority"` Priority int `json:"priority"`
RunBefore int `json:"run_before"` RunBefore int `json:"run_before"`
@@ -97,6 +112,7 @@ type Job struct {
type Task struct { type Task struct {
Name string `json:"name"` Name string `json:"name"`
Image string `json:"image"`
Cmd string `json:"cmd"` Cmd string `json:"cmd"`
NumberCPU int `json:"cpu_number"` NumberCPU int `json:"cpu_number"`
Memory int `json:"memory"` Memory int `json:"memory"`