1
0
mirror of https://github.com/newnius/YAO-optimizer.git synced 2025-06-06 06:41:55 +00:00

add files

This commit is contained in:
Newnius 2020-04-29 18:35:22 +08:00
parent 348d580f11
commit 3e58e8ac13
23 changed files with 7106 additions and 1 deletions

6
.idea/misc.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" languageLevel="JDK_10" default="false" project-jdk-name="Python 3.7" project-jdk-type="Python SDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/yao-optimizer.iml" filepath="$PROJECT_DIR$/yao-optimizer.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

317
.idea/workspace.xml Normal file
View File

@ -0,0 +1,317 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="0aedafd8-e57e-462a-beda-65af0b91f3df" name="Default Changelist" comment="" />
<ignored path="$PROJECT_DIR$/out/" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FUSProjectUsageTrigger">
<session id="570274097">
<usages-collector id="statistics.lifecycle.project">
<counts>
<entry key="project.open.time.3" value="1" />
<entry key="project.opened" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="py" value="2" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="Python" value="2" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.edit">
<counts>
<entry key="py" value="159" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="Python" value="159" />
</counts>
</usages-collector>
</session>
</component>
<component name="FileEditorManager">
<leaf>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/serve.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="431">
<caret line="81" column="16" selection-start-line="81" selection-start-column="16" selection-end-line="81" selection-end-column="16" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="150">
<caret line="52" column="30" lean-forward="true" selection-start-line="52" selection-start-column="30" selection-end-line="52" selection-end-column="30" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>lstm</find>
</findStrings>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/test.py" />
<option value="$PROJECT_DIR$/serve.py" />
</list>
</option>
</component>
<component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
<component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
<component name="JsGulpfileManager">
<detection-done>true</detection-done>
<sorting>DEFINITION_ORDER</sorting>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="y" value="23" />
<option name="width" value="1280" />
<option name="height" value="704" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="AndroidView" />
<pane id="PackagesPane" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="yao-optimizer" type="b2602c69:ProjectViewProjectNode" />
<item name="yao-optimizer" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="aspect.path.notification.shown" value="true" />
<property name="com.android.tools.idea.instantapp.provision.ProvisionBeforeRunTaskProvider.myTimeStamp" value="1588156437493" />
<property name="go.gopath.indexing.explicitly.defined" value="true" />
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
<property name="nodejs_npm_path_reset_for_default_project" value="true" />
<property name="project.structure.last.edited" value="Project" />
<property name="project.structure.proportion" value="0.0" />
<property name="project.structure.side.proportion" value="0.0" />
<property name="settings.editor.selected.configurable" value="http.proxy" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="0aedafd8-e57e-462a-beda-65af0b91f3df" name="Default Changelist" comment="" />
<created>1588152877746</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1588152877746</updated>
<workItem from="1588152880522" duration="2801000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="2801000" />
</component>
<component name="ToolWindowManager">
<frame x="0" y="23" width="1280" height="704" extended-state="6" />
<editor active="true" />
<layout>
<window_info id="Designer" order="0" />
<window_info id="UI Designer" order="1" />
<window_info id="Capture Tool" order="2" />
<window_info id="Favorites" order="3" side_tool="true" />
<window_info id="Image Layers" order="4" />
<window_info active="true" content_ui="combo" id="Project" order="5" visible="true" weight="0.24959612" />
<window_info id="Structure" order="6" side_tool="true" weight="0.25" />
<window_info anchor="bottom" id="Version Control" order="0" />
<window_info anchor="bottom" id="Terminal" order="1" />
<window_info anchor="bottom" id="Event Log" order="2" side_tool="true" />
<window_info anchor="bottom" id="Database Changes" order="3" show_stripe_button="false" />
<window_info anchor="bottom" id="Docker" order="4" show_stripe_button="false" />
<window_info anchor="bottom" id="Message" order="5" />
<window_info anchor="bottom" id="Find" order="6" />
<window_info anchor="bottom" id="Run" order="7" />
<window_info anchor="bottom" id="Debug" order="8" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="9" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="10" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="11" />
<window_info anchor="right" id="Theme Preview" order="0" />
<window_info anchor="right" id="Capture Analysis" order="1" />
<window_info anchor="right" id="Palette&#9;" order="2" />
<window_info anchor="right" id="SciView" order="3" />
<window_info anchor="right" id="Database" order="4" />
<window_info anchor="right" id="Maven Projects" order="5" />
<window_info anchor="right" id="Palette" order="6" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="7" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="8" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="9" weight="0.25" />
</layout>
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="150">
<caret line="52" column="30" lean-forward="true" selection-start-line="52" selection-start-column="30" selection-end-line="52" selection-end-column="30" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/serve.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="431">
<caret line="81" column="16" selection-start-line="81" selection-start-column="16" selection-end-line="81" selection-end-column="16" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
<component name="masterDetails">
<states>
<state key="ArtifactsStructureConfigurable.UI">
<settings>
<artifact-editor />
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="FacetStructureConfigurable.UI">
<settings>
<last-edited>No facets are configured</last-edited>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="GlobalLibrariesConfigurable.UI">
<settings>
<last-edited>Python 3.7 interpreter library</last-edited>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="JdkListConfigurable.UI">
<settings>
<last-edited>1.8</last-edited>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="ModuleStructureConfigurable.UI">
<settings>
<last-edited>yao-optimizer</last-edited>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
<option value="0.6" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="ProjectJDKs.UI">
<settings>
<last-edited>1.8</last-edited>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="ProjectLibrariesConfigurable.UI">
<settings>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
</states>
</component>
</project>

View File

@ -1,2 +1,21 @@
# yao-optimizer
## Predict stock with LSTM
This project includes training and predicting processes with LSTM for stock data. The characteristics is as fellow:
- Concise and modular
- Support three mainstream deep learning frameworks of pytorch, keras and tensorflow
- Parameters, models and frameworks can be highly customized and modified
- Supports incremental training
- Support predicting multiple indicators at the same time
- Support predicting any number of days in the future
Chinese introduction can refer to : <https://blog.csdn.net/songyunli1111/article/details/78513811>
The simultaneous predict results for stock high and low price with pytorch show as follow:
![predict_high_with_pytorch](https://github.com/hichenway/stock_predict_with_LSTM/blob/master/figure/predict_high_with_pytorch.png)
![predict_low_with_pytorch](https://github.com/hichenway/stock_predict_with_LSTM/blob/master/figure/predict_low_with_pytorch.png)

6110
data/stock_data.csv Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

193
main.py Normal file
View File

@ -0,0 +1,193 @@
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
frame = "pytorch"
if frame == "pytorch":
from model.model_pytorch import train, predict
elif frame == "keras":
from model.model_keras import train, predict
elif frame == "tensorflow":
from model.model_tensorflow import train, predict
else:
raise Exception("Wrong frame seletion")
class Config:
feature_columns = list(range(2, 9))
label_columns = [4, 5]
feature_and_label_columns = feature_columns + label_columns
label_in_feature_columns = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)
predict_day = 1
input_size = len(feature_columns)
output_size = len(label_columns)
hidden_size = 128
lstm_layers = 2
dropout_rate = 0.2
time_step = 20
do_train = True
do_predict = True
add_train = False
shuffle_train_data = True
train_data_rate = 0.95
valid_data_rate = 0.15
batch_size = 64
learning_rate = 0.001
epoch = 20
patience = 5
random_seed = 42
do_continue_train = False
continue_flag = ""
if do_continue_train:
shuffle_train_data = False
batch_size = 1
continue_flag = "continue_"
train_data_path = "./data/stock_data.csv"
model_save_path = "./checkpoint/"
figure_save_path = "./figure/"
do_figure_save = False
if not os.path.exists(model_save_path):
os.mkdir(model_save_path)
if not os.path.exists(figure_save_path):
os.mkdir(figure_save_path)
used_frame = frame
model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
model_name = "model_" + continue_flag + used_frame + model_postfix[used_frame]
class Data:
def __init__(self, config):
self.config = config
self.data, self.data_column_name = self.read_data()
self.data_num = self.data.shape[0]
self.train_num = int(self.data_num * self.config.train_data_rate)
self.mean = np.mean(self.data, axis=0)
self.std = np.std(self.data, axis=0)
self.norm_data = (self.data - self.mean) / self.std
self.start_num_in_test = 0
def read_data(self):
init_data = pd.read_csv(self.config.train_data_path,
usecols=self.config.feature_and_label_columns)
return init_data.values, init_data.columns.tolist()
def get_train_and_valid_data(self):
feature_data = self.norm_data[:self.train_num]
label_data = self.norm_data[self.config.predict_day: self.config.predict_day + self.train_num,
self.config.label_in_feature_columns]
if not self.config.do_continue_train:
train_x = [feature_data[i:i + self.config.time_step] for i in range(self.train_num - self.config.time_step)]
train_y = [label_data[i:i + self.config.time_step] for i in range(self.train_num - self.config.time_step)]
else:
train_x = [
feature_data[start_index + i * self.config.time_step: start_index + (i + 1) * self.config.time_step]
for start_index in range(self.config.time_step)
for i in range((self.train_num - start_index) // self.config.time_step)]
train_y = [
label_data[start_index + i * self.config.time_step: start_index + (i + 1) * self.config.time_step]
for start_index in range(self.config.time_step)
for i in range((self.train_num - start_index) // self.config.time_step)]
train_x, train_y = np.array(train_x), np.array(train_y)
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
random_state=self.config.random_seed,
shuffle=self.config.shuffle_train_data)
return train_x, valid_x, train_y, valid_y
def get_test_data(self, return_label_data=False):
feature_data = self.norm_data[self.train_num:]
self.start_num_in_test = feature_data.shape[0] % self.config.time_step
time_step_size = feature_data.shape[0] // self.config.time_step
test_x = [feature_data[self.start_num_in_test + i * self.config.time_step: self.start_num_in_test + (
i + 1) * self.config.time_step]
for i in range(time_step_size)]
if return_label_data:
label_data = self.norm_data[self.train_num + self.start_num_in_test:, self.config.label_in_feature_columns]
return np.array(test_x), label_data
return np.array(test_x)
def draw(config: Config, origin_data: Data, predict_norm_data: np.ndarray):
label_norm_data = origin_data.norm_data[origin_data.train_num + origin_data.start_num_in_test:,
config.label_in_feature_columns]
assert label_norm_data.shape[0] == predict_norm_data.shape[
0], "The element number in origin and predicted data is different"
label_name = [origin_data.data_column_name[i] for i in config.label_in_feature_columns]
label_column_num = len(config.label_columns)
loss = np.mean((label_norm_data[config.predict_day:] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
print("The mean squared error of stock {} is ".format(label_name), loss)
label_X = range(origin_data.data_num - origin_data.train_num - origin_data.start_num_in_test)
predict_X = [x + config.predict_day for x in label_X]
label_data = label_norm_data * origin_data.std[config.label_in_feature_columns] + \
origin_data.mean[config.label_in_feature_columns]
predict_data = predict_norm_data * origin_data.std[config.label_in_feature_columns] + \
origin_data.mean[config.label_in_feature_columns]
for i in range(label_column_num):
plt.figure(i + 1)
plt.plot(label_X, label_data[:, i], label='label')
plt.plot(predict_X, predict_data[:, i], label='predict')
plt.legend(loc='upper right')
plt.xlabel("Day")
plt.ylabel("Price")
plt.title("Predict stock {} price with {}".format(label_name[i], config.used_frame))
print("The predicted stock {} for the next {} day(s) is: ".format(label_name[i], config.predict_day),
np.squeeze(predict_data[-config.predict_day:, i]))
if config.do_figure_save:
plt.savefig(config.figure_save_path + "{}predict_{}_with_{}.png".format(config.continue_flag, label_name[i],
config.used_frame))
plt.show()
def main(config):
np.random.seed(config.random_seed)
data_gainer = Data(config)
if config.do_train:
train_X, valid_X, train_Y, valid_Y = data_gainer.get_train_and_valid_data()
train(config, train_X, train_Y, valid_X, valid_Y)
if config.do_predict:
test_X, test_Y = data_gainer.get_test_data(return_label_data=True)
pred_result = predict(config, test_X)
draw(config, data_gainer, pred_result)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
# parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
# parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
# parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
# parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
args = parser.parse_args()
con = Config()
for key in dir(args):
if not key.startswith("_"):
setattr(con, key, getattr(args, key))
main(con)

0
model/__init__.py Normal file
View File

35
model/model_keras.py Normal file
View File

@ -0,0 +1,35 @@
from keras.layers import Input, Dense, LSTM
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping
def get_keras_model(config):
input1 = Input(shape=(config.time_step, config.input_size))
lstm = input1
for i in range(config.lstm_layers):
lstm = LSTM(units=config.hidden_size, dropout=config.dropout_rate, return_sequences=True)(lstm)
output = Dense(config.output_size)(lstm)
model = Model(input1, output)
model.compile(loss='mse', optimizer='adam')
return model
def train(config, train_X, train_Y, valid_X, valid_Y):
model = get_keras_model(config)
model.summary()
if config.add_train:
model.load_weights(config.model_save_path + config.model_name)
check_point = ModelCheckpoint(filepath=config.model_save_path + config.model_name, monitor='val_loss',
save_best_only=True, mode='auto')
early_stop = EarlyStopping(monitor='val_loss', patience=config.patience, mode='auto')
model.fit(train_X, train_Y, batch_size=config.batch_size, epochs=config.epoch, verbose=2,
validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
def predict(config, test_X):
model = get_keras_model(config)
model.load_weights(config.model_save_path + config.model_name)
result = model.predict(test_X, batch_size=1)
result = result.reshape((-1, config.output_size))
return result

98
model/model_pytorch.py Normal file
View File

@ -0,0 +1,98 @@
import torch
from torch.nn import Module, LSTM, Linear
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
class Net(Module):
def __init__(self, config):
super(Net, self).__init__()
self.lstm = LSTM(input_size=config.input_size, hidden_size=config.hidden_size,
num_layers=config.lstm_layers, batch_first=True, dropout=config.dropout_rate)
self.linear = Linear(in_features=config.hidden_size, out_features=config.output_size)
def forward(self, x, hidden=None):
lstm_out, hidden = self.lstm(x, hidden)
linear_out = self.linear(lstm_out)
return linear_out, hidden
def train(config, train_X, train_Y, valid_X, valid_Y):
train_X, train_Y = torch.from_numpy(train_X).float(), torch.from_numpy(train_Y).float()
train_loader = DataLoader(TensorDataset(train_X, train_Y), batch_size=config.batch_size)
valid_X, valid_Y = torch.from_numpy(valid_X).float(), torch.from_numpy(valid_Y).float()
valid_loader = DataLoader(TensorDataset(valid_X, valid_Y), batch_size=config.batch_size)
model = Net(config)
if config.add_train:
model.load_state_dict(torch.load(config.model_save_path + config.model_name))
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
criterion = torch.nn.MSELoss()
valid_loss_min = float("inf")
bad_epoch = 0
for epoch in range(config.epoch):
print("Epoch {}/{}".format(epoch, config.epoch))
model.train()
train_loss_array = []
hidden_train = None
for i, _data in enumerate(train_loader):
_train_X, _train_Y = _data
optimizer.zero_grad()
pred_Y, hidden_train = model(_train_X, hidden_train)
if not config.do_continue_train:
hidden_train = None
else:
h_0, c_0 = hidden_train
h_0.detach_(), c_0.detach_()
hidden_train = (h_0, c_0)
loss = criterion(pred_Y, _train_Y)
loss.backward()
optimizer.step()
train_loss_array.append(loss.item())
model.eval()
valid_loss_array = []
hidden_valid = None
for _valid_X, _valid_Y in valid_loader:
pred_Y, hidden_valid = model(_valid_X, hidden_valid)
if not config.do_continue_train: hidden_valid = None
loss = criterion(pred_Y, _valid_Y)
valid_loss_array.append(loss.item())
valid_loss_cur = np.mean(valid_loss_array)
print("The train loss is {:.4f}. ".format(np.mean(train_loss_array)),
"The valid loss is {:.4f}.".format(valid_loss_cur))
if valid_loss_cur < valid_loss_min:
valid_loss_min = valid_loss_cur
bad_epoch = 0
torch.save(model.state_dict(), config.model_save_path + config.model_name)
else:
bad_epoch += 1
if bad_epoch >= config.patience:
print(" The training stops early in epoch {}".format(epoch))
break
def predict(config, test_X):
test_X = torch.from_numpy(test_X).float()
test_set = TensorDataset(test_X)
test_loader = DataLoader(test_set, batch_size=1)
model = Net(config)
model.load_state_dict(torch.load(config.model_save_path + config.model_name))
result = torch.Tensor()
model.eval()
hidden_predict = None
for _data in test_loader:
data_X = _data[0]
pred_X, hidden_predict = model(data_X, hidden_predict)
cur_pred = torch.squeeze(pred_X, dim=0)
result = torch.cat((result, cur_pred), dim=0)
return result.detach().numpy()

97
model/model_tensorflow.py Normal file
View File

@ -0,0 +1,97 @@
import tensorflow as tf
import numpy as np
class Model:
def __init__(self, config):
self.config = config
self.placeholders()
self.net()
self.operate()
def placeholders(self):
self.X = tf.placeholder(tf.float32, [None, self.config.time_step, self.config.input_size])
self.Y = tf.placeholder(tf.float32, [None, self.config.time_step, self.config.output_size])
def net(self):
def dropout_cell():
basicLstm = tf.nn.rnn_cell.LSTMCell(self.config.hidden_size)
dropoutLstm = tf.nn.rnn_cell.DropoutWrapper(basicLstm, output_keep_prob=1 - self.config.dropout_rate)
return dropoutLstm
cell = tf.nn.rnn_cell.MultiRNNCell([dropout_cell() for _ in range(self.config.lstm_layers)])
output_rnn, _ = tf.nn.dynamic_rnn(cell=cell, inputs=self.X, dtype=tf.float32)
# shape of output_rnn is: [batch_size, time_step, hidden_size]
self.pred = tf.layers.dense(inputs=output_rnn, units=self.config.output_size)
def operate(self):
self.loss = tf.reduce_mean(tf.square(tf.reshape(self.pred, [-1]) - tf.reshape(self.Y, [-1])))
self.optim = tf.train.AdamOptimizer(self.config.learning_rate).minimize(self.loss)
self.saver = tf.train.Saver(tf.global_variables())
def train(config, train_X, train_Y, valid_X, valid_Y):
with tf.variable_scope("stock_predict"):
model = Model(config)
train_len = len(train_X)
valid_len = len(valid_X)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
valid_loss_min = float("inf")
bad_epoch = 0
for epoch in range(config.epoch):
print("Epoch {}/{}".format(epoch, config.epoch))
# 训练
train_loss_array = []
for step in range(train_len // config.batch_size):
feed_dict = {model.X: train_X[step * config.batch_size: (step + 1) * config.batch_size],
model.Y: train_Y[step * config.batch_size: (step + 1) * config.batch_size]}
train_loss, _ = sess.run([model.loss, model.optim], feed_dict=feed_dict)
train_loss_array.append(train_loss)
# 验证与早停
valid_loss_array = []
for step in range(valid_len // config.batch_size):
feed_dict = {model.X: valid_X[step * config.batch_size: (step + 1) * config.batch_size],
model.Y: valid_Y[step * config.batch_size: (step + 1) * config.batch_size]}
valid_loss = sess.run(model.loss, feed_dict=feed_dict)
valid_loss_array.append(valid_loss)
valid_loss_cur = np.mean(valid_loss_array)
print("The train loss is {:.4f}. ".format(np.mean(train_loss_array)),
"The valid loss is {:.4f}.".format(valid_loss_cur))
if valid_loss_cur < valid_loss_min:
valid_loss_min = valid_loss_cur
bad_epoch = 0
path = model.saver.save(sess, config.model_save_path + config.model_name)
print(path)
else:
bad_epoch += 1
if bad_epoch >= config.patience:
print(" The training stops early in epoch {}".format(epoch))
break
def predict(config, test_X):
config.dropout_rate = 1
with tf.variable_scope("stock_predict", reuse=tf.AUTO_REUSE):
model = Model(config)
test_len = len(test_X)
with tf.Session() as sess:
module_file = tf.train.latest_checkpoint(config.model_save_path)
model.saver.restore(sess, module_file)
result = np.zeros((test_len * config.time_step, config.output_size))
for step in range(test_len):
feed_dict = {model.X: test_X[step: (step + 1)]}
test_pred = sess.run(model.pred, feed_dict=feed_dict)
result[step * config.time_step: (step + 1) * config.time_step] = test_pred[0, :, :]
return result

9
requirements.txt Normal file
View File

@ -0,0 +1,9 @@
sklearn
pandas
argparse
keras==2.2.4
tensorflow==1.15.2
matplotlib>=3.0.2
numpy>=1.14.6
scipy>=1.1.0
torch>=1.1.0

87
serve.py Normal file
View File

@ -0,0 +1,87 @@
import tensorflow as tf
import numpy as np
import tushare as ts
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
timesteps = seq_length = 7
data_dim = 5
output_dim = 1
stock_data = ts.get_k_data('600000', start='2015-01-01', end='2017-12-01')
xy = stock_data[['open', 'close', 'high', 'low', 'volume']]
# xy_new = pd.DataFrame()
# scaler = MinMaxScaler()
# scaler.fit(xy)
# t = scaler.transform(xy)
# for col in xy.columns:
# xy_new.ix[:, col] = t[col]
x = xy
y = xy[['close']]
dataX = []
dataY = []
for i in range(0, len(y) - seq_length):
_x = x[i:i + seq_length]
_y = y.loc[i + seq_length]
#print(_x, "->", _y)
dataX.append(_x)
dataY.append(_y)
x_real = np.vstack(dataX).reshape(-1, seq_length, data_dim)
y_real = np.vstack(dataY).reshape(-1, output_dim)
print(x_real.shape)
print(y_real.shape)
dataX = x_real
dataY = y_real
train_size = int(len(dataY) * 0.7)
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
X = tf.placeholder(tf.float32, [None, seq_length, data_dim])
Y = tf.placeholder(tf.float32, [None, 1])
def add_layer(inputs, in_size, out_size, activation_function=None):
inputs = tf.reshape(inputs, [-1, in_size])
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
Wx_plus_b = tf.matmul(inputs, Weights) + biases
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b)
return outputs
outsize_first = 5
l1 = add_layer(X, data_dim, outsize_first, activation_function=tf.nn.relu)
l1_output = tf.reshape(l1, [-1, seq_length, outsize_first])
cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=output_dim, state_is_tuple=True)
outputs, _states = tf.nn.dynamic_rnn(cell, l1_output, dtype=tf.float32)
Y_pred = outputs[:, -1]
loss = tf.reduce_sum(tf.square(Y_pred - Y))
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(100):
_, l = sess.run(
[train, loss],
feed_dict={X: trainX, Y: trainY}
)
#print(i, l)
testPredict = sess.run(Y_pred, feed_dict={X: testX})
print(testY)
print(testPredict)

111
test.py Normal file
View File

@ -0,0 +1,111 @@
import pandas as pd
import numpy as np
import tensorflow as tf
import tushare as ts
rnn_unit = 10
input_size = 7
output_size = 1
lr = 0.0006
stock_data = ts.get_k_data('600000', start='2015-01-01', end='2017-12-01')
data = stock_data.iloc[:, 2:10].values
# ——————————获取训练集——————————
def get_train_data(batch_size=60, time_step=20, train_begin=0, train_end=5800):
batch_index = []
data_train = data[train_begin:train_end]
normalized_train_data = (data_train - np.mean(data_train, axis=0)) / np.std(data_train, axis=0) # 标准化
train_x, train_y = [], [] # 训练集x和y初定义
for i in range(len(normalized_train_data) - time_step):
if i % batch_size == 0:
batch_index.append(i)
x = normalized_train_data[i:i + time_step, :7]
y = normalized_train_data[i:i + time_step, 7, np.newaxis]
train_x.append(x.tolist())
train_y.append(y.tolist())
batch_index.append((len(normalized_train_data) - time_step))
return batch_index, train_x, train_y
# ——————————获取测试集——————————
def get_test_data(time_step=20, test_begin=5800):
data_test = data[test_begin:]
mean = np.mean(data_test, axis=0)
std = np.std(data_test, axis=0)
normalized_test_data = (data_test - mean) / std # 标准化
size = (len(normalized_test_data) + time_step - 1) // time_step # 有size个sample
test_x, test_y = [], []
for i in range(size - 1):
x = normalized_test_data[i * time_step:(i + 1) * time_step, :7]
y = normalized_test_data[i * time_step:(i + 1) * time_step, 7]
test_x.append(x.tolist())
test_y.extend(y)
test_x.append((normalized_test_data[(i + 1) * time_step:, :7]).tolist())
test_y.extend((normalized_test_data[(i + 1) * time_step:, 7]).tolist())
return mean, std, test_x, test_y
# ——————————————————定义神经网络变量——————————————————
def lstm(X):
batch_size = tf.shape(X)[0]
time_step = tf.shape(X)[1]
w_in = weights['in']
b_in = biases['in']
input = tf.reshape(X, [-1, input_size]) # 需要将tensor转成2维进行计算计算后的结果作为隐藏层的输入
input_rnn = tf.matmul(input, w_in) + b_in
input_rnn = tf.reshape(input_rnn, [-1, time_step, rnn_unit]) # 将tensor转成3维作为lstm cell的输入
cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_unit)
init_state = cell.zero_state(batch_size, dtype=tf.float32)
output_rnn, final_states = tf.nn.dynamic_rnn(cell, input_rnn, initial_state=init_state,
dtype=tf.float32) # output_rnn是记录lstm每个输出节点的结果final_states是最后一个cell的结果
output = tf.reshape(output_rnn, [-1, rnn_unit]) # 作为输出层的输入
w_out = weights['out']
b_out = biases['out']
pred = tf.matmul(output, w_out) + b_out
return pred, final_states
# ——————————————————训练模型——————————————————
def train_lstm(batch_size=80, time_step=15, train_begin=0, train_end=5800):
X = tf.placeholder(tf.float32, shape=[None, time_step, input_size])
Y = tf.placeholder(tf.float32, shape=[None, time_step, output_size])
batch_index, train_x, train_y = get_train_data(batch_size, time_step, train_begin, train_end)
pred, _ = lstm(X)
# 损失函数
loss = tf.reduce_mean(tf.square(tf.reshape(pred, [-1]) - tf.reshape(Y, [-1])))
train_op = tf.train.AdamOptimizer(lr).minimize(loss)
saver = tf.train.Saver(tf.global_variables(), max_to_keep=15)
module_file = tf.train.latest_checkpoint()
with tf.Session() as sess:
# sess.run(tf.global_variables_initializer())
saver.restore(sess, module_file)
# 重复训练2000次
for i in range(2000):
for step in range(len(batch_index) - 1):
_, loss_ = sess.run([train_op, loss], feed_dict={X: train_x[batch_index[step]:batch_index[step + 1]],
Y: train_y[batch_index[step]:batch_index[step + 1]]})
print(i, loss_)
if i % 200 == 0:
print("保存模型:", saver.save(sess, 'stock2.model', global_step=i))
# ————————————————预测模型————————————————————
def prediction(time_step=20):
X = tf.placeholder(tf.float32, shape=[None, time_step, input_size])
mean, std, test_x, test_y = get_test_data(time_step)
pred, _ = lstm(X)
saver = tf.train.Saver(tf.global_variables())
with tf.Session() as sess:
# 参数恢复
module_file = tf.train.latest_checkpoint()
saver.restore(sess, module_file)
test_predict = []
for step in range(len(test_x) - 1):
prob = sess.run(pred, feed_dict={X: [test_x[step]]})
predict = prob.reshape((-1))
test_predict.extend(predict)
test_y = np.array(test_y) * std[7] + mean[7]
test_predict = np.array(test_predict) * std[7] + mean[7]
acc = np.average(np.abs(test_predict - test_y[:len(test_predict)]) / test_y[:len(test_predict)])

9
yao-optimizer.iml Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="WEB_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>