2019-03-04 09:19:55 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync"
|
2019-04-29 09:05:15 +00:00
|
|
|
"time"
|
2019-06-05 09:09:22 +00:00
|
|
|
"net/url"
|
|
|
|
"strings"
|
|
|
|
"log"
|
|
|
|
"math/rand"
|
|
|
|
"strconv"
|
2019-06-13 03:13:55 +00:00
|
|
|
"fmt"
|
2019-04-29 09:05:15 +00:00
|
|
|
)
|
2019-03-04 09:19:55 +00:00
|
|
|
|
|
|
|
type ResourcePool struct {
|
2019-04-29 09:05:15 +00:00
|
|
|
mu sync.Mutex
|
2019-04-18 09:25:37 +00:00
|
|
|
nodes map[string]NodeStatus
|
2019-04-29 09:05:15 +00:00
|
|
|
|
2019-04-29 12:57:32 +00:00
|
|
|
history []PoolStatus
|
2019-06-04 03:08:49 +00:00
|
|
|
|
|
|
|
heartBeat map[string]time.Time
|
2019-06-05 09:09:22 +00:00
|
|
|
|
|
|
|
networks map[string]bool
|
|
|
|
networksFree map[string]bool
|
|
|
|
networkMu sync.Mutex
|
2019-04-29 09:05:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (pool *ResourcePool) start() {
|
2019-06-13 02:53:00 +00:00
|
|
|
//TODO: retrieve networks from yao-agent-master in blocking io
|
2019-06-05 09:09:22 +00:00
|
|
|
pool.networks = map[string]bool{}
|
|
|
|
pool.networksFree = map[string]bool{}
|
|
|
|
|
2019-06-04 03:08:49 +00:00
|
|
|
/* check dead nodes */
|
2019-06-04 03:15:12 +00:00
|
|
|
go func() {
|
|
|
|
pool.heartBeat = map[string]time.Time{}
|
|
|
|
|
|
|
|
for {
|
|
|
|
for k, v := range pool.heartBeat {
|
|
|
|
if v.Add(time.Second * 30).Before(time.Now()) {
|
|
|
|
delete(pool.nodes, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
time.Sleep(time.Second * 10)
|
2019-06-04 03:08:49 +00:00
|
|
|
}
|
2019-06-04 03:15:12 +00:00
|
|
|
}()
|
2019-06-04 03:08:49 +00:00
|
|
|
|
|
|
|
/* save pool status periodically */
|
2019-04-29 09:05:15 +00:00
|
|
|
go func() {
|
2019-04-29 12:57:32 +00:00
|
|
|
/* waiting for data */
|
2019-05-06 07:36:31 +00:00
|
|
|
pool.history = []PoolStatus{}
|
2019-04-29 12:57:32 +00:00
|
|
|
time.Sleep(time.Second * 30)
|
2019-04-29 09:05:15 +00:00
|
|
|
for {
|
2019-04-29 12:57:32 +00:00
|
|
|
summary := PoolStatus{}
|
2019-04-29 09:05:15 +00:00
|
|
|
|
|
|
|
UtilCPU := 0.0
|
|
|
|
TotalCPU := 0
|
|
|
|
TotalMem := 0
|
|
|
|
AvailableMem := 0
|
|
|
|
|
|
|
|
TotalGPU := 0
|
|
|
|
UtilGPU := 0
|
|
|
|
TotalMemGPU := 0
|
|
|
|
AvailableMemGPU := 0
|
|
|
|
for _, node := range pool.nodes {
|
2019-04-29 09:17:33 +00:00
|
|
|
UtilCPU += node.UtilCPU
|
2019-04-29 09:05:15 +00:00
|
|
|
TotalCPU += node.NumCPU
|
2019-04-29 09:17:33 +00:00
|
|
|
TotalMem += node.MemTotal
|
|
|
|
AvailableMem += node.MemAvailable
|
2019-04-29 09:05:15 +00:00
|
|
|
|
|
|
|
for _, GPU := range node.Status {
|
|
|
|
UtilGPU += GPU.UtilizationGPU
|
|
|
|
TotalGPU ++
|
|
|
|
TotalMemGPU += GPU.MemoryTotal
|
|
|
|
AvailableMemGPU += GPU.MemoryFree
|
|
|
|
}
|
|
|
|
}
|
2019-04-29 12:57:32 +00:00
|
|
|
summary.TimeStamp = time.Now().Format("2006-01-02 15:04:05")
|
|
|
|
summary.UtilCPU = UtilCPU / (float64(len(pool.nodes)) + 0.001)
|
|
|
|
summary.TotalCPU = TotalCPU
|
|
|
|
summary.TotalMem = TotalMem
|
|
|
|
summary.AvailableMem = AvailableMem
|
|
|
|
summary.TotalGPU = TotalGPU
|
2019-04-29 09:05:15 +00:00
|
|
|
if TotalGPU == 0 {
|
2019-04-29 12:57:32 +00:00
|
|
|
summary.UtilGPU = 0.0
|
2019-04-29 09:05:15 +00:00
|
|
|
} else {
|
2019-04-29 12:57:32 +00:00
|
|
|
summary.UtilGPU = UtilGPU / TotalGPU
|
2019-04-29 09:05:15 +00:00
|
|
|
}
|
2019-04-29 12:57:32 +00:00
|
|
|
summary.TotalMemGPU = TotalMemGPU
|
|
|
|
summary.AvailableMemGPU = AvailableMemGPU
|
2019-04-29 09:05:15 +00:00
|
|
|
|
|
|
|
pool.history = append(pool.history, summary)
|
|
|
|
|
|
|
|
if len(pool.history) > 60 {
|
2019-05-06 07:36:31 +00:00
|
|
|
pool.history = pool.history[len(pool.history)-60:]
|
2019-04-29 09:05:15 +00:00
|
|
|
}
|
|
|
|
time.Sleep(time.Second * 60)
|
|
|
|
}
|
|
|
|
}()
|
2019-03-04 09:19:55 +00:00
|
|
|
}
|
|
|
|
|
2019-04-16 08:59:19 +00:00
|
|
|
func (pool *ResourcePool) update(node NodeStatus) {
|
2019-03-04 09:19:55 +00:00
|
|
|
pool.mu.Lock()
|
|
|
|
defer pool.mu.Unlock()
|
|
|
|
|
2019-03-20 03:14:07 +00:00
|
|
|
status, ok := pool.nodes[node.ClientID]
|
|
|
|
if ok {
|
2019-04-16 08:59:19 +00:00
|
|
|
for i, GPU := range status.Status {
|
|
|
|
if GPU.UUID == node.Status[i].UUID {
|
|
|
|
node.Status[i].MemoryAllocated = GPU.MemoryAllocated
|
2019-03-20 03:14:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-04-16 08:59:19 +00:00
|
|
|
pool.nodes[node.ClientID] = node
|
2019-06-04 03:08:49 +00:00
|
|
|
pool.heartBeat[node.ClientID] = time.Now()
|
2019-03-20 03:14:07 +00:00
|
|
|
|
|
|
|
//log.Println(pool.nodes)
|
2019-03-04 09:19:55 +00:00
|
|
|
}
|
|
|
|
|
2019-04-18 09:25:37 +00:00
|
|
|
func (pool *ResourcePool) getByID(id string) NodeStatus {
|
2019-03-04 09:19:55 +00:00
|
|
|
pool.mu.Lock()
|
|
|
|
defer pool.mu.Unlock()
|
|
|
|
|
|
|
|
status, ok := pool.nodes[id]
|
|
|
|
if ok {
|
|
|
|
return status
|
|
|
|
}
|
2019-04-16 08:59:19 +00:00
|
|
|
return NodeStatus{}
|
2019-03-04 09:19:55 +00:00
|
|
|
}
|
2019-04-29 09:05:15 +00:00
|
|
|
|
|
|
|
func (pool *ResourcePool) list() MsgResource {
|
|
|
|
return MsgResource{Code: 0, Resource: pool.nodes}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (pool *ResourcePool) statusHistory() MsgPoolStatusHistory {
|
|
|
|
return MsgPoolStatusHistory{Code: 0, Data: pool.history}
|
|
|
|
}
|
2019-06-05 09:09:22 +00:00
|
|
|
|
|
|
|
func (pool *ResourcePool) acquireNetwork() string {
|
2019-06-13 02:53:00 +00:00
|
|
|
pool.networkMu.Lock()
|
|
|
|
defer pool.networkMu.Unlock()
|
2019-06-05 09:09:22 +00:00
|
|
|
var network string
|
2019-06-13 03:13:55 +00:00
|
|
|
fmt.Println(pool.networksFree)
|
2019-06-05 09:09:22 +00:00
|
|
|
if len(pool.networksFree) == 0 {
|
|
|
|
for {
|
2019-06-13 02:53:00 +00:00
|
|
|
for {
|
|
|
|
network = "yao-net-" + strconv.Itoa(rand.Intn(999999))
|
|
|
|
if _, ok := pool.networks[network]; !ok {
|
|
|
|
break
|
|
|
|
}
|
2019-06-05 09:09:22 +00:00
|
|
|
}
|
2019-06-13 02:53:00 +00:00
|
|
|
v := url.Values{}
|
|
|
|
v.Set("name", network)
|
2019-06-13 03:30:55 +00:00
|
|
|
resp, err := doRequest("POST", "http://yao-agent-master:8000/create", strings.NewReader(v.Encode()), "application/x-www-form-urlencoded", "")
|
2019-06-13 02:53:00 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Println(err.Error())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
defer resp.Body.Close()
|
|
|
|
pool.networksFree[network] = true
|
|
|
|
pool.networks[network] = true
|
|
|
|
break
|
2019-06-05 09:09:22 +00:00
|
|
|
}
|
|
|
|
}
|
2019-06-13 02:53:00 +00:00
|
|
|
|
2019-06-05 09:09:22 +00:00
|
|
|
for k := range pool.networksFree {
|
|
|
|
network = k
|
|
|
|
delete(pool.networksFree, k)
|
|
|
|
}
|
|
|
|
return network
|
|
|
|
}
|
|
|
|
|
|
|
|
func (pool *ResourcePool) releaseNetwork(network string) {
|
|
|
|
pool.networkMu.Lock()
|
|
|
|
pool.networksFree[network] = true
|
|
|
|
pool.networkMu.Unlock()
|
|
|
|
}
|