diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 72976c1..6054974 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -3,6 +3,17 @@
+
+
+
+
+
+
+
+
+
+
+
@@ -23,35 +34,37 @@
-
+
+
+
-
+
-
+
-
+
-
+
-
-
+
+
@@ -59,11 +72,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
@@ -71,15 +97,6 @@
-
-
-
-
-
-
-
-
-
@@ -101,9 +118,9 @@
@@ -113,10 +130,9 @@
true
DEFINITION_ORDER
-
-
+
-
+
@@ -137,11 +153,6 @@
-
-
-
-
-
@@ -157,7 +168,7 @@
-
+
@@ -188,15 +199,15 @@
1588152877746
-
+
-
+
-
+
@@ -204,10 +215,10 @@
-
+
-
+
@@ -240,6 +251,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -250,43 +278,48 @@
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/README.md b/README.md
deleted file mode 100644
index d5f8f84..0000000
--- a/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-## Predict stock with LSTM
-
-This project includes training and predicting processes with LSTM for stock data. The characteristics is as fellow:
-
-- Concise and modular
-- Support three mainstream deep learning frameworks of pytorch, keras and tensorflow
-- Parameters, models and frameworks can be highly customized and modified
-- Supports incremental training
-- Support predicting multiple indicators at the same time
-- Support predicting any number of days in the future
-
-
-Chinese introduction can refer to :
-
-
-
-The simultaneous predict results for stock high and low price with pytorch show as follow:
-
-
-
-
\ No newline at end of file
diff --git a/figure/continue_predict_high_with_pytorch.png b/figure/continue_predict_high_with_pytorch.png
deleted file mode 100644
index 33aae01..0000000
Binary files a/figure/continue_predict_high_with_pytorch.png and /dev/null differ
diff --git a/figure/continue_predict_low_with_pytorch.png b/figure/continue_predict_low_with_pytorch.png
deleted file mode 100644
index 812357e..0000000
Binary files a/figure/continue_predict_low_with_pytorch.png and /dev/null differ
diff --git a/figure/predict_high_with_keras.png b/figure/predict_high_with_keras.png
deleted file mode 100644
index eb31295..0000000
Binary files a/figure/predict_high_with_keras.png and /dev/null differ
diff --git a/figure/predict_high_with_pytorch.png b/figure/predict_high_with_pytorch.png
deleted file mode 100644
index f187d7b..0000000
Binary files a/figure/predict_high_with_pytorch.png and /dev/null differ
diff --git a/figure/predict_high_with_tensorflow.png b/figure/predict_high_with_tensorflow.png
deleted file mode 100644
index 2e23e82..0000000
Binary files a/figure/predict_high_with_tensorflow.png and /dev/null differ
diff --git a/figure/predict_low_with_keras.png b/figure/predict_low_with_keras.png
deleted file mode 100644
index 301cf48..0000000
Binary files a/figure/predict_low_with_keras.png and /dev/null differ
diff --git a/figure/predict_low_with_pytorch.png b/figure/predict_low_with_pytorch.png
deleted file mode 100644
index 242b5f3..0000000
Binary files a/figure/predict_low_with_pytorch.png and /dev/null differ
diff --git a/figure/predict_low_with_tensorflow.png b/figure/predict_low_with_tensorflow.png
deleted file mode 100644
index b1a4c41..0000000
Binary files a/figure/predict_low_with_tensorflow.png and /dev/null differ
diff --git a/main.py b/main.py
index dc5dad6..a3211b1 100644
--- a/main.py
+++ b/main.py
@@ -4,8 +4,7 @@ import os
from sklearn.model_selection import train_test_split
frame = "tensorflow"
-from model.model_tensorflow import train, predict
-
+from model_tensorflow import train, predict
class Config:
feature_columns = list([2,5])
@@ -139,24 +138,6 @@ def draw(config, origin_data, predict_norm_data):
print(label_data)
print(predict_data)
- '''
- for i in range(label_column_num):
- plt.figure(i + 1)
- plt.plot(label_X, label_data[:, i], label='label')
- plt.plot(predict_X, predict_data[:, i], label='predict')
- plt.legend(loc='upper right')
- plt.xlabel("Day")
- plt.ylabel("Price")
- plt.title("Predict stock {} price with {}".format(label_name[i], config.used_frame))
- print("The predicted stock {} for the next {} day(s) is: ".format(label_name[i], config.predict_day),
- np.squeeze(predict_data[-config.predict_day:, i]))
- if config.do_figure_save:
- plt.savefig(config.figure_save_path + "{}predict_{}_with_{}.png".format(config.continue_flag, label_name[i],
- config.used_frame))
-
- plt.show()
- '''
-
def main(config):
np.random.seed(config.random_seed)
diff --git a/model/__init__.py b/model/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/model/model_keras.py b/model/model_keras.py
deleted file mode 100644
index fd3ed76..0000000
--- a/model/model_keras.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from keras.layers import Input, Dense, LSTM
-from keras.models import Model
-from keras.callbacks import ModelCheckpoint, EarlyStopping
-
-
-def get_keras_model(config):
- input1 = Input(shape=(config.time_step, config.input_size))
- lstm = input1
- for i in range(config.lstm_layers):
- lstm = LSTM(units=config.hidden_size, dropout=config.dropout_rate, return_sequences=True)(lstm)
- output = Dense(config.output_size)(lstm)
- model = Model(input1, output)
- model.compile(loss='mse', optimizer='adam')
- return model
-
-
-def train(config, train_X, train_Y, valid_X, valid_Y):
- model = get_keras_model(config)
- model.summary()
- if config.add_train:
- model.load_weights(config.model_save_path + config.model_name)
-
- check_point = ModelCheckpoint(filepath=config.model_save_path + config.model_name, monitor='val_loss',
- save_best_only=True, mode='auto')
- early_stop = EarlyStopping(monitor='val_loss', patience=config.patience, mode='auto')
- model.fit(train_X, train_Y, batch_size=config.batch_size, epochs=config.epoch, verbose=2,
- validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
-
-
-def predict(config, test_X):
- model = get_keras_model(config)
- model.load_weights(config.model_save_path + config.model_name)
- result = model.predict(test_X, batch_size=1)
- result = result.reshape((-1, config.output_size))
- return result
diff --git a/model/model_pytorch.py b/model/model_pytorch.py
deleted file mode 100644
index 7a43c1e..0000000
--- a/model/model_pytorch.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import torch
-from torch.nn import Module, LSTM, Linear
-from torch.utils.data import DataLoader, TensorDataset
-import numpy as np
-
-
-class Net(Module):
- def __init__(self, config):
- super(Net, self).__init__()
- self.lstm = LSTM(input_size=config.input_size, hidden_size=config.hidden_size,
- num_layers=config.lstm_layers, batch_first=True, dropout=config.dropout_rate)
- self.linear = Linear(in_features=config.hidden_size, out_features=config.output_size)
-
- def forward(self, x, hidden=None):
- lstm_out, hidden = self.lstm(x, hidden)
- linear_out = self.linear(lstm_out)
- return linear_out, hidden
-
-
-def train(config, train_X, train_Y, valid_X, valid_Y):
- train_X, train_Y = torch.from_numpy(train_X).float(), torch.from_numpy(train_Y).float()
- train_loader = DataLoader(TensorDataset(train_X, train_Y), batch_size=config.batch_size)
-
- valid_X, valid_Y = torch.from_numpy(valid_X).float(), torch.from_numpy(valid_Y).float()
- valid_loader = DataLoader(TensorDataset(valid_X, valid_Y), batch_size=config.batch_size)
-
- model = Net(config)
- if config.add_train:
- model.load_state_dict(torch.load(config.model_save_path + config.model_name))
- optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
- criterion = torch.nn.MSELoss()
-
- valid_loss_min = float("inf")
- bad_epoch = 0
- for epoch in range(config.epoch):
- print("Epoch {}/{}".format(epoch, config.epoch))
- model.train()
- train_loss_array = []
- hidden_train = None
- for i, _data in enumerate(train_loader):
- _train_X, _train_Y = _data
- optimizer.zero_grad()
- pred_Y, hidden_train = model(_train_X, hidden_train)
-
- if not config.do_continue_train:
- hidden_train = None
- else:
- h_0, c_0 = hidden_train
- h_0.detach_(), c_0.detach_()
- hidden_train = (h_0, c_0)
- loss = criterion(pred_Y, _train_Y)
- loss.backward()
- optimizer.step()
- train_loss_array.append(loss.item())
-
- model.eval()
- valid_loss_array = []
- hidden_valid = None
- for _valid_X, _valid_Y in valid_loader:
- pred_Y, hidden_valid = model(_valid_X, hidden_valid)
- if not config.do_continue_train: hidden_valid = None
- loss = criterion(pred_Y, _valid_Y)
- valid_loss_array.append(loss.item())
-
- valid_loss_cur = np.mean(valid_loss_array)
- print("The train loss is {:.4f}. ".format(np.mean(train_loss_array)),
- "The valid loss is {:.4f}.".format(valid_loss_cur))
-
- if valid_loss_cur < valid_loss_min:
- valid_loss_min = valid_loss_cur
- bad_epoch = 0
- torch.save(model.state_dict(), config.model_save_path + config.model_name)
- else:
- bad_epoch += 1
- if bad_epoch >= config.patience:
- print(" The training stops early in epoch {}".format(epoch))
- break
-
-
-def predict(config, test_X):
- test_X = torch.from_numpy(test_X).float()
- test_set = TensorDataset(test_X)
- test_loader = DataLoader(test_set, batch_size=1)
-
- model = Net(config)
- model.load_state_dict(torch.load(config.model_save_path + config.model_name))
-
- result = torch.Tensor()
-
- model.eval()
- hidden_predict = None
- for _data in test_loader:
- data_X = _data[0]
- pred_X, hidden_predict = model(data_X, hidden_predict)
- cur_pred = torch.squeeze(pred_X, dim=0)
- result = torch.cat((result, cur_pred), dim=0)
-
- return result.detach().numpy()
diff --git a/model/model_tensorflow.py b/model_tensorflow.py
similarity index 100%
rename from model/model_tensorflow.py
rename to model_tensorflow.py
diff --git a/requirements.txt b/requirements.txt
index b1e153c..9f8b3a2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,3 @@ tensorflow==1.15.2
matplotlib>=3.0.2
numpy>=1.14.6
scipy>=1.1.0
-torch>=1.1.0
diff --git a/serve.py b/serve.py
index 6cfb959..1bce038 100644
--- a/serve.py
+++ b/serve.py
@@ -1,87 +1,89 @@
-import tensorflow as tf
-import numpy as np
-import tushare as ts
-import pandas as pd
-from sklearn.preprocessing import MinMaxScaler
+#!/usr/bin/python
+from threading import Thread
+from threading import Lock
+from http.server import BaseHTTPRequestHandler, HTTPServer
+import cgi
+import json
+from urllib import parse
-timesteps = seq_length = 7
-data_dim = 5
-output_dim = 1
+PORT_NUMBER = 8000
-stock_data = ts.get_k_data('600000', start='2015-01-01', end='2017-12-01')
-xy = stock_data[['open', 'close', 'high', 'low', 'volume']]
-
-# xy_new = pd.DataFrame()
-# scaler = MinMaxScaler()
-
-# scaler.fit(xy)
-# t = scaler.transform(xy)
-
-# for col in xy.columns:
-# xy_new.ix[:, col] = t[col]
-
-x = xy
-y = xy[['close']]
-dataX = []
-dataY = []
-for i in range(0, len(y) - seq_length):
- _x = x[i:i + seq_length]
- _y = y.loc[i + seq_length]
- #print(_x, "->", _y)
- dataX.append(_x)
- dataY.append(_y)
-
-x_real = np.vstack(dataX).reshape(-1, seq_length, data_dim)
-y_real = np.vstack(dataY).reshape(-1, output_dim)
-print(x_real.shape)
-print(y_real.shape)
-dataX = x_real
-dataY = y_real
-
-train_size = int(len(dataY) * 0.7)
-test_size = len(dataY) - train_size
-trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
-trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
-
-X = tf.placeholder(tf.float32, [None, seq_length, data_dim])
-Y = tf.placeholder(tf.float32, [None, 1])
+lock = Lock()
-def add_layer(inputs, in_size, out_size, activation_function=None):
- inputs = tf.reshape(inputs, [-1, in_size])
- Weights = tf.Variable(tf.random_normal([in_size, out_size]))
- biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
- Wx_plus_b = tf.matmul(inputs, Weights) + biases
- if activation_function is None:
- outputs = Wx_plus_b
- else:
- outputs = activation_function(Wx_plus_b)
- return outputs
+def train_models():
+ lock.acquire()
+
+ lock.release()
-outsize_first = 5
-l1 = add_layer(X, data_dim, outsize_first, activation_function=tf.nn.relu)
-l1_output = tf.reshape(l1, [-1, seq_length, outsize_first])
+class MyHandler(BaseHTTPRequestHandler):
+ # Handler for the GET requests
+ def do_GET(self):
+ req = parse.urlparse(self.path)
+ query = parse.parse_qs(req.query)
-cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=output_dim, state_is_tuple=True)
-outputs, _states = tf.nn.dynamic_rnn(cell, l1_output, dtype=tf.float32)
-Y_pred = outputs[:, -1]
+ if req.path == "/ping":
+ self.send_response(200)
+ self.send_header('Content-type', 'application/json')
+ self.end_headers()
+ self.wfile.write(bytes("pong", "utf-8"))
-loss = tf.reduce_sum(tf.square(Y_pred - Y))
+ elif req.path == "/predict":
+ try:
+ job = query.get('job')[0]
+ gpu_model = query.get('gpu_model')[0]
+ time = query.get('time')[0]
+ msg = {'code': 1, 'error': "container not exist"}
+ except Exception as e:
+ msg = {'code': 2, 'error': str(e)}
+ self.send_response(200)
+ self.send_header('Content-type', 'application/json')
+ self.end_headers()
+ self.wfile.write(bytes(json.dumps(msg), "utf-8"))
-optimizer = tf.train.GradientDescentOptimizer(0.01)
-train = optimizer.minimize(loss)
+ else:
+ self.send_error(404, 'File Not Found: %s' % self.path)
-sess = tf.Session()
-sess.run(tf.global_variables_initializer())
-for i in range(100):
- _, l = sess.run(
- [train, loss],
- feed_dict={X: trainX, Y: trainY}
- )
- #print(i, l)
+ # Handler for the POST requests
+ def do_POST(self):
+ if self.path == "/train":
+ form = cgi.FieldStorage(
+ fp=self.rfile,
+ headers=self.headers,
+ environ={
+ 'REQUEST_METHOD': 'POST',
+ 'CONTENT_TYPE': self.headers['Content-Type'],
+ })
+ try:
+ job = form.getvalue('job')[0]
+ data = form.getvalue('records')[0]
+ records = json.load(data)
+ t = Thread(target=train_models(), name='train_models', args=(job, records,))
+ t.start()
+ msg = {"code": 0, "error": ""}
+ except Exception as e:
+ msg = {"code": 1, "error": str(e)}
+ self.send_response(200)
+ self.send_header('Content-type', 'application/json')
+ self.end_headers()
+ self.wfile.write(bytes(json.dumps(msg), "utf-8"))
-testPredict = sess.run(Y_pred, feed_dict={X: testX})
+ else:
+ self.send_error(404, 'File Not Found: %s' % self.path)
-print(testY)
-print(testPredict)
+
+if __name__ == '__main__':
+ try:
+ # Create a web server and define the handler to manage the
+ # incoming request
+ server = HTTPServer(('', PORT_NUMBER), MyHandler)
+ print('Started http server on port ', PORT_NUMBER)
+
+ # Wait forever for incoming http requests
+ server.serve_forever()
+
+ except KeyboardInterrupt:
+ print('^C received, shutting down the web server')
+
+ server.socket.close()
diff --git a/test.py b/test.py
deleted file mode 100644
index 8da029a..0000000
--- a/test.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import pandas as pd
-import numpy as np
-import tensorflow as tf
-import tushare as ts
-
-rnn_unit = 10
-input_size = 7
-output_size = 1
-lr = 0.0006
-
-stock_data = ts.get_k_data('600000', start='2015-01-01', end='2017-12-01')
-data = stock_data.iloc[:, 2:10].values
-
-
-# ——————————获取训练集——————————
-def get_train_data(batch_size=60, time_step=20, train_begin=0, train_end=5800):
- batch_index = []
- data_train = data[train_begin:train_end]
- normalized_train_data = (data_train - np.mean(data_train, axis=0)) / np.std(data_train, axis=0) # 标准化
- train_x, train_y = [], [] # 训练集x和y初定义
- for i in range(len(normalized_train_data) - time_step):
- if i % batch_size == 0:
- batch_index.append(i)
- x = normalized_train_data[i:i + time_step, :7]
- y = normalized_train_data[i:i + time_step, 7, np.newaxis]
- train_x.append(x.tolist())
- train_y.append(y.tolist())
- batch_index.append((len(normalized_train_data) - time_step))
- return batch_index, train_x, train_y
-
-
-# ——————————获取测试集——————————
-def get_test_data(time_step=20, test_begin=5800):
- data_test = data[test_begin:]
- mean = np.mean(data_test, axis=0)
- std = np.std(data_test, axis=0)
- normalized_test_data = (data_test - mean) / std # 标准化
- size = (len(normalized_test_data) + time_step - 1) // time_step # 有size个sample
- test_x, test_y = [], []
- for i in range(size - 1):
- x = normalized_test_data[i * time_step:(i + 1) * time_step, :7]
- y = normalized_test_data[i * time_step:(i + 1) * time_step, 7]
- test_x.append(x.tolist())
- test_y.extend(y)
- test_x.append((normalized_test_data[(i + 1) * time_step:, :7]).tolist())
- test_y.extend((normalized_test_data[(i + 1) * time_step:, 7]).tolist())
- return mean, std, test_x, test_y
-
-
-# ——————————————————定义神经网络变量——————————————————
-def lstm(X):
- batch_size = tf.shape(X)[0]
- time_step = tf.shape(X)[1]
- w_in = weights['in']
- b_in = biases['in']
- input = tf.reshape(X, [-1, input_size]) # 需要将tensor转成2维进行计算,计算后的结果作为隐藏层的输入
- input_rnn = tf.matmul(input, w_in) + b_in
- input_rnn = tf.reshape(input_rnn, [-1, time_step, rnn_unit]) # 将tensor转成3维,作为lstm cell的输入
- cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_unit)
- init_state = cell.zero_state(batch_size, dtype=tf.float32)
- output_rnn, final_states = tf.nn.dynamic_rnn(cell, input_rnn, initial_state=init_state,
- dtype=tf.float32) # output_rnn是记录lstm每个输出节点的结果,final_states是最后一个cell的结果
- output = tf.reshape(output_rnn, [-1, rnn_unit]) # 作为输出层的输入
- w_out = weights['out']
- b_out = biases['out']
- pred = tf.matmul(output, w_out) + b_out
- return pred, final_states
-
-
-# ——————————————————训练模型——————————————————
-def train_lstm(batch_size=80, time_step=15, train_begin=0, train_end=5800):
- X = tf.placeholder(tf.float32, shape=[None, time_step, input_size])
- Y = tf.placeholder(tf.float32, shape=[None, time_step, output_size])
- batch_index, train_x, train_y = get_train_data(batch_size, time_step, train_begin, train_end)
- pred, _ = lstm(X)
- # 损失函数
- loss = tf.reduce_mean(tf.square(tf.reshape(pred, [-1]) - tf.reshape(Y, [-1])))
- train_op = tf.train.AdamOptimizer(lr).minimize(loss)
- saver = tf.train.Saver(tf.global_variables(), max_to_keep=15)
- module_file = tf.train.latest_checkpoint()
- with tf.Session() as sess:
- # sess.run(tf.global_variables_initializer())
- saver.restore(sess, module_file)
- # 重复训练2000次
- for i in range(2000):
- for step in range(len(batch_index) - 1):
- _, loss_ = sess.run([train_op, loss], feed_dict={X: train_x[batch_index[step]:batch_index[step + 1]],
- Y: train_y[batch_index[step]:batch_index[step + 1]]})
- print(i, loss_)
- if i % 200 == 0:
- print("保存模型:", saver.save(sess, 'stock2.model', global_step=i))
-
-
-# ————————————————预测模型————————————————————
-def prediction(time_step=20):
- X = tf.placeholder(tf.float32, shape=[None, time_step, input_size])
- mean, std, test_x, test_y = get_test_data(time_step)
- pred, _ = lstm(X)
- saver = tf.train.Saver(tf.global_variables())
- with tf.Session() as sess:
- # 参数恢复
- module_file = tf.train.latest_checkpoint()
- saver.restore(sess, module_file)
- test_predict = []
- for step in range(len(test_x) - 1):
- prob = sess.run(pred, feed_dict={X: [test_x[step]]})
- predict = prob.reshape((-1))
- test_predict.extend(predict)
- test_y = np.array(test_y) * std[7] + mean[7]
- test_predict = np.array(test_predict) * std[7] + mean[7]
- acc = np.average(np.abs(test_predict - test_y[:len(test_predict)]) / test_y[:len(test_predict)])