2020-04-29 14:18:18 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
from threading import Thread
|
|
|
|
from threading import Lock
|
|
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
import cgi
|
|
|
|
import json
|
|
|
|
from urllib import parse
|
2020-04-29 14:29:32 +00:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
import os
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
from model_tensorflow import train, predict
|
2020-04-29 15:22:56 +00:00
|
|
|
import csv
|
2020-05-02 08:31:23 +00:00
|
|
|
from pandas import DataFrame
|
|
|
|
from pandas import Series
|
|
|
|
from pandas import concat
|
|
|
|
from pandas import read_csv
|
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
|
|
from keras.models import Sequential
|
|
|
|
from keras.layers import Dense
|
|
|
|
from keras.layers import LSTM
|
|
|
|
from math import sqrt
|
|
|
|
import numpy
|
2020-04-29 14:18:18 +00:00
|
|
|
|
|
|
|
|
2020-04-29 14:29:32 +00:00
|
|
|
class Config:
|
2020-05-01 17:34:47 +00:00
|
|
|
feature_columns = list(range(0, 2))
|
2020-05-01 14:52:25 +00:00
|
|
|
label_columns = [1]
|
2020-04-29 14:29:32 +00:00
|
|
|
feature_and_label_columns = feature_columns + label_columns
|
|
|
|
label_in_feature_columns = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)
|
|
|
|
|
|
|
|
predict_day = 1
|
|
|
|
|
2020-05-01 18:25:02 +00:00
|
|
|
input_size = len(feature_columns)
|
2020-04-29 14:29:32 +00:00
|
|
|
output_size = len(label_columns)
|
|
|
|
|
|
|
|
hidden_size = 128
|
|
|
|
lstm_layers = 2
|
|
|
|
dropout_rate = 0.2
|
2020-04-29 16:19:45 +00:00
|
|
|
time_step = 5
|
2020-04-29 14:29:32 +00:00
|
|
|
|
|
|
|
do_train = True
|
|
|
|
do_predict = True
|
|
|
|
add_train = False
|
|
|
|
shuffle_train_data = True
|
|
|
|
|
2020-05-01 08:35:56 +00:00
|
|
|
# train_data_rate = 0.95 #comment yqy
|
2020-05-01 18:25:02 +00:00
|
|
|
train_data_rate = 1 # add yqy
|
2020-04-29 14:29:32 +00:00
|
|
|
valid_data_rate = 0.15
|
|
|
|
|
|
|
|
batch_size = 64
|
|
|
|
learning_rate = 0.001
|
|
|
|
epoch = 20
|
|
|
|
patience = 5
|
|
|
|
random_seed = 42
|
|
|
|
|
|
|
|
do_continue_train = False
|
|
|
|
continue_flag = ""
|
|
|
|
if do_continue_train:
|
|
|
|
shuffle_train_data = False
|
|
|
|
batch_size = 1
|
|
|
|
continue_flag = "continue_"
|
|
|
|
|
2020-05-01 08:35:56 +00:00
|
|
|
train_data_path = "./data/data.csv"
|
2020-04-29 14:29:32 +00:00
|
|
|
model_save_path = "./checkpoint/"
|
|
|
|
figure_save_path = "./figure/"
|
2020-05-01 08:35:56 +00:00
|
|
|
|
2020-04-29 14:29:32 +00:00
|
|
|
do_figure_save = False
|
|
|
|
if not os.path.exists(model_save_path):
|
|
|
|
os.mkdir(model_save_path)
|
|
|
|
if not os.path.exists(figure_save_path):
|
|
|
|
os.mkdir(figure_save_path)
|
|
|
|
|
2020-04-29 14:54:37 +00:00
|
|
|
used_frame = "tensorflow"
|
2020-04-29 14:29:32 +00:00
|
|
|
model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
|
|
|
|
model_name = "model_" + continue_flag + used_frame + model_postfix[used_frame]
|
|
|
|
|
|
|
|
|
|
|
|
class Data:
|
|
|
|
def __init__(self, config):
|
|
|
|
self.config = config
|
|
|
|
self.data, self.data_column_name = self.read_data()
|
|
|
|
|
|
|
|
self.data_num = self.data.shape[0]
|
|
|
|
self.train_num = int(self.data_num * self.config.train_data_rate)
|
|
|
|
|
|
|
|
self.mean = np.mean(self.data, axis=0)
|
2020-05-01 09:09:37 +00:00
|
|
|
self.std = np.std(self.data, axis=0) + 0.0001
|
2020-04-29 14:29:32 +00:00
|
|
|
self.norm_data = (self.data - self.mean) / self.std
|
|
|
|
|
|
|
|
self.start_num_in_test = 0
|
|
|
|
|
|
|
|
def read_data(self):
|
2020-05-01 08:35:56 +00:00
|
|
|
init_data = pd.read_csv(self.config.train_data_path,
|
|
|
|
usecols=self.config.feature_and_label_columns)
|
2020-04-29 14:29:32 +00:00
|
|
|
return init_data.values, init_data.columns.tolist()
|
|
|
|
|
|
|
|
def get_train_and_valid_data(self):
|
|
|
|
feature_data = self.norm_data[:self.train_num]
|
|
|
|
label_data = self.norm_data[self.config.predict_day: self.config.predict_day + self.train_num,
|
|
|
|
self.config.label_in_feature_columns]
|
|
|
|
if not self.config.do_continue_train:
|
|
|
|
train_x = [feature_data[i:i + self.config.time_step] for i in range(self.train_num - self.config.time_step)]
|
|
|
|
train_y = [label_data[i:i + self.config.time_step] for i in range(self.train_num - self.config.time_step)]
|
|
|
|
else:
|
|
|
|
train_x = [
|
|
|
|
feature_data[start_index + i * self.config.time_step: start_index + (i + 1) * self.config.time_step]
|
|
|
|
for start_index in range(self.config.time_step)
|
|
|
|
for i in range((self.train_num - start_index) // self.config.time_step)]
|
|
|
|
train_y = [
|
|
|
|
label_data[start_index + i * self.config.time_step: start_index + (i + 1) * self.config.time_step]
|
|
|
|
for start_index in range(self.config.time_step)
|
|
|
|
for i in range((self.train_num - start_index) // self.config.time_step)]
|
|
|
|
|
|
|
|
train_x, train_y = np.array(train_x), np.array(train_y)
|
|
|
|
|
|
|
|
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
|
|
|
|
random_state=self.config.random_seed,
|
|
|
|
shuffle=self.config.shuffle_train_data)
|
|
|
|
return train_x, valid_x, train_y, valid_y
|
|
|
|
|
|
|
|
def get_test_data(self, return_label_data=False):
|
2020-04-29 17:21:41 +00:00
|
|
|
feature_data = self.norm_data[self.train_num:]
|
2020-04-29 14:29:32 +00:00
|
|
|
self.start_num_in_test = feature_data.shape[0] % self.config.time_step
|
|
|
|
time_step_size = feature_data.shape[0] // self.config.time_step
|
|
|
|
|
2020-04-29 17:21:41 +00:00
|
|
|
test_x = [feature_data[self.start_num_in_test + i * self.config.time_step: self.start_num_in_test + (
|
2020-05-01 18:25:02 +00:00
|
|
|
i + 1) * self.config.time_step]
|
|
|
|
for i in range(time_step_size)]
|
2020-04-29 14:29:32 +00:00
|
|
|
if return_label_data:
|
2020-04-29 17:21:41 +00:00
|
|
|
label_data = self.norm_data[self.train_num + self.start_num_in_test:, self.config.label_in_feature_columns]
|
2020-04-29 14:29:32 +00:00
|
|
|
return np.array(test_x), label_data
|
|
|
|
return np.array(test_x)
|
|
|
|
|
2020-05-01 08:35:56 +00:00
|
|
|
# add yqy
|
|
|
|
def get_test_data_yqy(self, test_data_yqy=None):
|
|
|
|
if test_data_yqy is None:
|
|
|
|
test_data_yqy = []
|
|
|
|
# test_data_yqy=test_data_yqy[1:21]
|
|
|
|
feature_data = (test_data_yqy - self.mean) / self.std
|
2020-05-01 18:25:02 +00:00
|
|
|
test_x = [feature_data]
|
2020-05-01 17:57:46 +00:00
|
|
|
return np.array(test_x)
|
2020-05-01 08:35:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
# add end
|
2020-04-29 14:29:32 +00:00
|
|
|
|
2020-05-01 08:35:56 +00:00
|
|
|
|
2020-05-01 10:18:18 +00:00
|
|
|
def draw_yqy(config2, origin_data, predict_norm_data, mean_yqy, std_yqy):
|
2020-05-01 08:35:56 +00:00
|
|
|
label_norm_data = (origin_data - mean_yqy) / std_yqy
|
2020-04-29 14:29:32 +00:00
|
|
|
assert label_norm_data.shape[0] == predict_norm_data.shape[
|
|
|
|
0], "The element number in origin and predicted data is different"
|
|
|
|
|
2020-05-01 18:25:02 +00:00
|
|
|
print("dsa")
|
2020-05-01 08:35:56 +00:00
|
|
|
# label_norm_data=label_norm_data[:,1]
|
2020-05-01 10:18:18 +00:00
|
|
|
label_name = 'high'
|
2020-05-01 10:22:08 +00:00
|
|
|
label_column_num = 3
|
2020-04-29 14:29:32 +00:00
|
|
|
|
2020-05-01 10:18:18 +00:00
|
|
|
loss = \
|
2020-05-01 14:52:25 +00:00
|
|
|
np.mean((label_norm_data[config.predict_day:, 1:2] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
|
2020-05-01 10:18:18 +00:00
|
|
|
print("The mean squared error of stock {} is ".format(label_name), loss)
|
2020-04-29 14:29:32 +00:00
|
|
|
|
2020-05-01 08:35:56 +00:00
|
|
|
# label_X = range(origin_data.data_num - origin_data.train_num - origin_data.start_num_in_test)
|
|
|
|
# predict_X = [x + config.predict_day for x in label_X]
|
2020-04-29 14:29:32 +00:00
|
|
|
|
2020-05-01 18:25:02 +00:00
|
|
|
print("2")
|
|
|
|
|
|
|
|
print(label_norm_data[:, 1:2])
|
2020-05-01 14:52:25 +00:00
|
|
|
label_data = label_norm_data[:, 1:2] * std_yqy[1:2] + mean_yqy[1:2]
|
2020-05-01 18:25:02 +00:00
|
|
|
print(label_data)
|
2020-04-29 14:29:32 +00:00
|
|
|
|
2020-05-01 18:25:02 +00:00
|
|
|
print(predict_norm_data)
|
2020-05-01 11:11:31 +00:00
|
|
|
predict_data = predict_norm_data * std_yqy[config.label_in_feature_columns] + mean_yqy[
|
|
|
|
config.label_in_feature_columns]
|
2020-05-01 18:25:02 +00:00
|
|
|
print(predict_data)
|
2020-05-01 10:18:18 +00:00
|
|
|
|
2020-05-01 11:11:31 +00:00
|
|
|
print(label_data[:, -1])
|
|
|
|
print(predict_data[:, -1])
|
2020-05-01 08:35:56 +00:00
|
|
|
|
2020-04-29 14:29:32 +00:00
|
|
|
|
2020-04-29 14:55:26 +00:00
|
|
|
PORT_NUMBER = 8080
|
2020-04-29 14:18:18 +00:00
|
|
|
lock = Lock()
|
2020-04-29 14:29:32 +00:00
|
|
|
config = Config()
|
2020-04-29 14:18:18 +00:00
|
|
|
|
|
|
|
|
2020-04-29 15:32:09 +00:00
|
|
|
def train_models():
|
2020-04-29 14:18:18 +00:00
|
|
|
lock.acquire()
|
2020-04-29 14:29:32 +00:00
|
|
|
np.random.seed(config.random_seed)
|
|
|
|
data_gainer = Data(config)
|
|
|
|
|
|
|
|
train_X, valid_X, train_Y, valid_Y = data_gainer.get_train_and_valid_data()
|
2020-04-29 16:31:30 +00:00
|
|
|
|
2020-05-01 18:25:02 +00:00
|
|
|
print(train_X, valid_X, train_Y, valid_Y)
|
|
|
|
print(train_X.shape[0])
|
2020-05-01 17:00:49 +00:00
|
|
|
if train_X.shape[0] < 500:
|
|
|
|
config.batch_size = 32
|
|
|
|
if train_X.shape[0] < 200:
|
|
|
|
config.batch_size = 16
|
2020-04-29 16:31:30 +00:00
|
|
|
|
2020-05-01 18:25:02 +00:00
|
|
|
train(config, train_X, train_Y, valid_X, valid_Y)
|
2020-04-29 14:18:18 +00:00
|
|
|
|
|
|
|
lock.release()
|
|
|
|
|
|
|
|
|
|
|
|
class MyHandler(BaseHTTPRequestHandler):
|
|
|
|
# Handler for the GET requests
|
|
|
|
def do_GET(self):
|
|
|
|
req = parse.urlparse(self.path)
|
|
|
|
query = parse.parse_qs(req.query)
|
|
|
|
|
|
|
|
if req.path == "/ping":
|
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes("pong", "utf-8"))
|
|
|
|
|
|
|
|
elif req.path == "/predict":
|
|
|
|
try:
|
2020-05-01 08:35:56 +00:00
|
|
|
data = {
|
|
|
|
'job': query.get('job')[0],
|
|
|
|
'model': query.get('model')[0],
|
|
|
|
'time': query.get('time')[0],
|
|
|
|
'utilGPU': query.get('utilGPU')[0],
|
|
|
|
'utilCPU': query.get('utilCPU')[0],
|
|
|
|
'pre': 0,
|
|
|
|
'main': 0,
|
|
|
|
'post': 0
|
|
|
|
}
|
|
|
|
|
2020-05-01 14:52:25 +00:00
|
|
|
data = {
|
|
|
|
'seq': query.get('job')[0],
|
|
|
|
'value': query.get('model')[0],
|
|
|
|
}
|
|
|
|
|
2020-05-01 08:35:56 +00:00
|
|
|
with open(config.train_data_path, 'r') as f:
|
2020-05-01 14:52:25 +00:00
|
|
|
df = pd.read_csv(config.train_data_path, usecols=['seq', 'value'])
|
2020-05-01 11:45:26 +00:00
|
|
|
df = df.tail(config.time_step - 1)
|
2020-05-01 11:44:21 +00:00
|
|
|
df = df.append(data, ignore_index=True)
|
2020-05-01 09:41:08 +00:00
|
|
|
df.to_csv('./data/test_data.csv', index=False)
|
2020-05-01 08:35:56 +00:00
|
|
|
|
|
|
|
np.random.seed(config.random_seed)
|
2020-04-29 14:29:32 +00:00
|
|
|
data_gainer = Data(config)
|
2020-05-01 14:52:25 +00:00
|
|
|
test_data_yqy = pd.read_csv("./data/test_data.csv", usecols=list(range(0, 2)))
|
2020-05-01 08:35:56 +00:00
|
|
|
test_data_values = test_data_yqy.values[:]
|
|
|
|
test_X = data_gainer.get_test_data_yqy(test_data_values)
|
2020-04-29 14:29:32 +00:00
|
|
|
pred_result = predict(config, test_X)
|
2020-05-01 08:35:56 +00:00
|
|
|
|
|
|
|
mean = Data(config).mean
|
|
|
|
std = Data(config).std
|
|
|
|
draw_yqy(config, test_data_values, pred_result, mean, std)
|
|
|
|
|
2020-04-29 14:29:32 +00:00
|
|
|
msg = {'code': 1, 'error': "container not exist"}
|
|
|
|
except Exception as e:
|
|
|
|
msg = {'code': 2, 'error': str(e)}
|
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
|
|
|
|
|
2020-04-29 15:32:09 +00:00
|
|
|
elif req.path == "/feed":
|
|
|
|
try:
|
|
|
|
job = query.get('job')[0]
|
|
|
|
model = query.get('model')[0]
|
|
|
|
time = query.get('time')[0]
|
2020-05-01 08:57:04 +00:00
|
|
|
utilGPU = query.get('utilGPU')[0]
|
|
|
|
utilCPU = query.get('utilCPU')[0]
|
2020-04-29 15:32:09 +00:00
|
|
|
pre = query.get('pre')[0]
|
|
|
|
main = query.get('main')[0]
|
|
|
|
post = query.get('post')[0]
|
2020-05-01 08:57:04 +00:00
|
|
|
|
2020-05-01 14:52:25 +00:00
|
|
|
seq = query.get('seq')[0]
|
|
|
|
value = query.get('value')[0]
|
|
|
|
|
2020-04-29 15:32:09 +00:00
|
|
|
with open(config.train_data_path, 'a+', newline='') as csvfile:
|
|
|
|
spamwriter = csv.writer(
|
2020-04-29 15:37:12 +00:00
|
|
|
csvfile, delimiter=',',
|
2020-04-29 15:32:09 +00:00
|
|
|
quotechar='|', quoting=csv.QUOTE_MINIMAL
|
|
|
|
)
|
2020-05-01 14:52:25 +00:00
|
|
|
# spamwriter.writerow([job, model, time, utilGPU, utilCPU, pre, main, post])
|
|
|
|
spamwriter.writerow([seq, value])
|
2020-04-29 15:32:09 +00:00
|
|
|
msg = {'code': 1, 'error': "container not exist"}
|
|
|
|
except Exception as e:
|
|
|
|
msg = {'code': 2, 'error': str(e)}
|
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
|
|
|
|
|
2020-04-29 14:29:32 +00:00
|
|
|
elif req.path == "/train":
|
2020-04-29 15:22:56 +00:00
|
|
|
try:
|
2020-04-29 15:37:12 +00:00
|
|
|
t = Thread(target=train_models, name='train_models', args=())
|
2020-04-29 15:22:56 +00:00
|
|
|
t.start()
|
|
|
|
msg = {'code': 1, 'error': "container not exist"}
|
|
|
|
except Exception as e:
|
|
|
|
msg = {'code': 2, 'error': str(e)}
|
2020-04-29 14:18:18 +00:00
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
|
|
|
|
|
|
|
|
else:
|
|
|
|
self.send_error(404, 'File Not Found: %s' % self.path)
|
|
|
|
|
|
|
|
# Handler for the POST requests
|
|
|
|
def do_POST(self):
|
2020-04-29 14:57:56 +00:00
|
|
|
if self.path == "/train2":
|
2020-04-29 14:18:18 +00:00
|
|
|
form = cgi.FieldStorage(
|
|
|
|
fp=self.rfile,
|
|
|
|
headers=self.headers,
|
|
|
|
environ={
|
|
|
|
'REQUEST_METHOD': 'POST',
|
|
|
|
'CONTENT_TYPE': self.headers['Content-Type'],
|
|
|
|
})
|
|
|
|
try:
|
|
|
|
job = form.getvalue('job')[0]
|
|
|
|
data = form.getvalue('records')[0]
|
|
|
|
records = json.load(data)
|
|
|
|
t = Thread(target=train_models(), name='train_models', args=(job, records,))
|
|
|
|
t.start()
|
|
|
|
msg = {"code": 0, "error": ""}
|
|
|
|
except Exception as e:
|
|
|
|
msg = {"code": 1, "error": str(e)}
|
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
|
|
|
|
|
|
|
|
else:
|
|
|
|
self.send_error(404, 'File Not Found: %s' % self.path)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
try:
|
|
|
|
# Create a web server and define the handler to manage the
|
|
|
|
# incoming request
|
|
|
|
server = HTTPServer(('', PORT_NUMBER), MyHandler)
|
|
|
|
print('Started http server on port ', PORT_NUMBER)
|
|
|
|
|
2020-04-29 15:32:09 +00:00
|
|
|
with open(config.train_data_path, 'w', newline='') as csvfile:
|
|
|
|
spamwriter = csv.writer(
|
2020-04-29 15:37:12 +00:00
|
|
|
csvfile, delimiter=',',
|
2020-04-29 15:32:09 +00:00
|
|
|
quotechar='|', quoting=csv.QUOTE_MINIMAL
|
|
|
|
)
|
2020-05-01 18:25:02 +00:00
|
|
|
#spamwriter.writerow(["job", "model", "time", "utilGPU", "utilCPU", "pre", "main", "post"])
|
2020-05-01 14:52:25 +00:00
|
|
|
spamwriter.writerow(["seq", "value"])
|
2020-04-29 15:32:09 +00:00
|
|
|
|
2020-04-29 14:18:18 +00:00
|
|
|
# Wait forever for incoming http requests
|
|
|
|
server.serve_forever()
|
|
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print('^C received, shutting down the web server')
|
|
|
|
|
|
|
|
server.socket.close()
|