add files
6
.idea/misc.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_10" default="false" project-jdk-name="Python 3.7" project-jdk-type="Python SDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
</project>
|
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/yao-optimizer.iml" filepath="$PROJECT_DIR$/yao-optimizer.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
317
.idea/workspace.xml
Normal file
@ -0,0 +1,317 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="0aedafd8-e57e-462a-beda-65af0b91f3df" name="Default Changelist" comment="" />
|
||||
<ignored path="$PROJECT_DIR$/out/" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="FUSProjectUsageTrigger">
|
||||
<session id="570274097">
|
||||
<usages-collector id="statistics.lifecycle.project">
|
||||
<counts>
|
||||
<entry key="project.open.time.3" value="1" />
|
||||
<entry key="project.opened" value="1" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.open">
|
||||
<counts>
|
||||
<entry key="py" value="2" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.open">
|
||||
<counts>
|
||||
<entry key="Python" value="2" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.edit">
|
||||
<counts>
|
||||
<entry key="py" value="159" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.edit">
|
||||
<counts>
|
||||
<entry key="Python" value="159" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
</session>
|
||||
</component>
|
||||
<component name="FileEditorManager">
|
||||
<leaf>
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/serve.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="431">
|
||||
<caret line="81" column="16" selection-start-line="81" selection-start-column="16" selection-end-line="81" selection-end-column="16" />
|
||||
<folding>
|
||||
<element signature="e#0#23#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/test.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="150">
|
||||
<caret line="52" column="30" lean-forward="true" selection-start-line="52" selection-start-column="30" selection-end-line="52" selection-end-column="30" />
|
||||
<folding>
|
||||
<element signature="e#0#19#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
</component>
|
||||
<component name="FileTemplateManagerImpl">
|
||||
<option name="RECENT_TEMPLATES">
|
||||
<list>
|
||||
<option value="Python Script" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="FindInProjectRecents">
|
||||
<findStrings>
|
||||
<find>lstm</find>
|
||||
</findStrings>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="IdeDocumentHistory">
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/test.py" />
|
||||
<option value="$PROJECT_DIR$/serve.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
|
||||
<component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
|
||||
<component name="JsGulpfileManager">
|
||||
<detection-done>true</detection-done>
|
||||
<sorting>DEFINITION_ORDER</sorting>
|
||||
</component>
|
||||
<component name="ProjectFrameBounds" extendedState="6">
|
||||
<option name="y" value="23" />
|
||||
<option name="width" value="1280" />
|
||||
<option name="height" value="704" />
|
||||
</component>
|
||||
<component name="ProjectView">
|
||||
<navigator proportions="" version="1">
|
||||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="AndroidView" />
|
||||
<pane id="PackagesPane" />
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<expand>
|
||||
<path>
|
||||
<item name="yao-optimizer" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="yao-optimizer" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
</expand>
|
||||
<select />
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="WebServerToolWindowFactoryState" value="false" />
|
||||
<property name="aspect.path.notification.shown" value="true" />
|
||||
<property name="com.android.tools.idea.instantapp.provision.ProvisionBeforeRunTaskProvider.myTimeStamp" value="1588156437493" />
|
||||
<property name="go.gopath.indexing.explicitly.defined" value="true" />
|
||||
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
|
||||
<property name="nodejs_npm_path_reset_for_default_project" value="true" />
|
||||
<property name="project.structure.last.edited" value="Project" />
|
||||
<property name="project.structure.proportion" value="0.0" />
|
||||
<property name="project.structure.side.proportion" value="0.0" />
|
||||
<property name="settings.editor.selected.configurable" value="http.proxy" />
|
||||
</component>
|
||||
<component name="RunDashboard">
|
||||
<option name="ruleStates">
|
||||
<list>
|
||||
<RuleState>
|
||||
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
|
||||
</RuleState>
|
||||
<RuleState>
|
||||
<option name="name" value="StatusDashboardGroupingRule" />
|
||||
</RuleState>
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="SvnConfiguration">
|
||||
<configuration />
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="0aedafd8-e57e-462a-beda-65af0b91f3df" name="Default Changelist" comment="" />
|
||||
<created>1588152877746</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1588152877746</updated>
|
||||
<workItem from="1588152880522" duration="2801000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TimeTrackingManager">
|
||||
<option name="totallyTimeSpent" value="2801000" />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="0" y="23" width="1280" height="704" extended-state="6" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info id="Designer" order="0" />
|
||||
<window_info id="UI Designer" order="1" />
|
||||
<window_info id="Capture Tool" order="2" />
|
||||
<window_info id="Favorites" order="3" side_tool="true" />
|
||||
<window_info id="Image Layers" order="4" />
|
||||
<window_info active="true" content_ui="combo" id="Project" order="5" visible="true" weight="0.24959612" />
|
||||
<window_info id="Structure" order="6" side_tool="true" weight="0.25" />
|
||||
<window_info anchor="bottom" id="Version Control" order="0" />
|
||||
<window_info anchor="bottom" id="Terminal" order="1" />
|
||||
<window_info anchor="bottom" id="Event Log" order="2" side_tool="true" />
|
||||
<window_info anchor="bottom" id="Database Changes" order="3" show_stripe_button="false" />
|
||||
<window_info anchor="bottom" id="Docker" order="4" show_stripe_button="false" />
|
||||
<window_info anchor="bottom" id="Message" order="5" />
|
||||
<window_info anchor="bottom" id="Find" order="6" />
|
||||
<window_info anchor="bottom" id="Run" order="7" />
|
||||
<window_info anchor="bottom" id="Debug" order="8" weight="0.4" />
|
||||
<window_info anchor="bottom" id="Cvs" order="9" weight="0.25" />
|
||||
<window_info anchor="bottom" id="Inspection" order="10" weight="0.4" />
|
||||
<window_info anchor="bottom" id="TODO" order="11" />
|
||||
<window_info anchor="right" id="Theme Preview" order="0" />
|
||||
<window_info anchor="right" id="Capture Analysis" order="1" />
|
||||
<window_info anchor="right" id="Palette	" order="2" />
|
||||
<window_info anchor="right" id="SciView" order="3" />
|
||||
<window_info anchor="right" id="Database" order="4" />
|
||||
<window_info anchor="right" id="Maven Projects" order="5" />
|
||||
<window_info anchor="right" id="Palette" order="6" />
|
||||
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="7" type="SLIDING" weight="0.4" />
|
||||
<window_info anchor="right" id="Ant Build" order="8" weight="0.25" />
|
||||
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="9" weight="0.25" />
|
||||
</layout>
|
||||
</component>
|
||||
<component name="TypeScriptGeneratedFilesManager">
|
||||
<option name="version" value="1" />
|
||||
</component>
|
||||
<component name="VcsContentAnnotationSettings">
|
||||
<option name="myLimit" value="2678400000" />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/test.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="150">
|
||||
<caret line="52" column="30" lean-forward="true" selection-start-line="52" selection-start-column="30" selection-end-line="52" selection-end-column="30" />
|
||||
<folding>
|
||||
<element signature="e#0#19#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/serve.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="431">
|
||||
<caret line="81" column="16" selection-start-line="81" selection-start-column="16" selection-end-line="81" selection-end-column="16" />
|
||||
<folding>
|
||||
<element signature="e#0#23#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
<component name="masterDetails">
|
||||
<states>
|
||||
<state key="ArtifactsStructureConfigurable.UI">
|
||||
<settings>
|
||||
<artifact-editor />
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
<state key="FacetStructureConfigurable.UI">
|
||||
<settings>
|
||||
<last-edited>No facets are configured</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
<state key="GlobalLibrariesConfigurable.UI">
|
||||
<settings>
|
||||
<last-edited>Python 3.7 interpreter library</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
<state key="JdkListConfigurable.UI">
|
||||
<settings>
|
||||
<last-edited>1.8</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
<state key="ModuleStructureConfigurable.UI">
|
||||
<settings>
|
||||
<last-edited>yao-optimizer</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
<option value="0.6" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
<state key="ProjectJDKs.UI">
|
||||
<settings>
|
||||
<last-edited>1.8</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
<state key="ProjectLibrariesConfigurable.UI">
|
||||
<settings>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
</states>
|
||||
</component>
|
||||
</project>
|
21
README.md
@ -1,2 +1,21 @@
|
||||
# yao-optimizer
|
||||
## Predict stock with LSTM
|
||||
|
||||
This project includes training and predicting processes with LSTM for stock data. The characteristics is as fellow:
|
||||
|
||||
- Concise and modular
|
||||
- Support three mainstream deep learning frameworks of pytorch, keras and tensorflow
|
||||
- Parameters, models and frameworks can be highly customized and modified
|
||||
- Supports incremental training
|
||||
- Support predicting multiple indicators at the same time
|
||||
- Support predicting any number of days in the future
|
||||
|
||||
|
||||
Chinese introduction can refer to : <https://blog.csdn.net/songyunli1111/article/details/78513811>
|
||||
|
||||
|
||||
|
||||
The simultaneous predict results for stock high and low price with pytorch show as follow:
|
||||
|
||||

|
||||
|
||||

|
6110
data/stock_data.csv
Normal file
BIN
figure/continue_predict_high_with_pytorch.png
Normal file
After Width: | Height: | Size: 44 KiB |
BIN
figure/continue_predict_low_with_pytorch.png
Normal file
After Width: | Height: | Size: 45 KiB |
BIN
figure/predict_high_with_keras.png
Normal file
After Width: | Height: | Size: 43 KiB |
BIN
figure/predict_high_with_pytorch.png
Normal file
After Width: | Height: | Size: 39 KiB |
BIN
figure/predict_high_with_tensorflow.png
Normal file
After Width: | Height: | Size: 45 KiB |
BIN
figure/predict_low_with_keras.png
Normal file
After Width: | Height: | Size: 43 KiB |
BIN
figure/predict_low_with_pytorch.png
Normal file
After Width: | Height: | Size: 40 KiB |
BIN
figure/predict_low_with_tensorflow.png
Normal file
After Width: | Height: | Size: 47 KiB |
193
main.py
Normal file
@ -0,0 +1,193 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
frame = "pytorch"
|
||||
if frame == "pytorch":
|
||||
from model.model_pytorch import train, predict
|
||||
elif frame == "keras":
|
||||
from model.model_keras import train, predict
|
||||
elif frame == "tensorflow":
|
||||
from model.model_tensorflow import train, predict
|
||||
else:
|
||||
raise Exception("Wrong frame seletion")
|
||||
|
||||
|
||||
class Config:
|
||||
feature_columns = list(range(2, 9))
|
||||
label_columns = [4, 5]
|
||||
feature_and_label_columns = feature_columns + label_columns
|
||||
label_in_feature_columns = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)
|
||||
|
||||
predict_day = 1
|
||||
|
||||
input_size = len(feature_columns)
|
||||
output_size = len(label_columns)
|
||||
|
||||
hidden_size = 128
|
||||
lstm_layers = 2
|
||||
dropout_rate = 0.2
|
||||
time_step = 20
|
||||
|
||||
do_train = True
|
||||
do_predict = True
|
||||
add_train = False
|
||||
shuffle_train_data = True
|
||||
|
||||
train_data_rate = 0.95
|
||||
valid_data_rate = 0.15
|
||||
|
||||
batch_size = 64
|
||||
learning_rate = 0.001
|
||||
epoch = 20
|
||||
patience = 5
|
||||
random_seed = 42
|
||||
|
||||
do_continue_train = False
|
||||
continue_flag = ""
|
||||
if do_continue_train:
|
||||
shuffle_train_data = False
|
||||
batch_size = 1
|
||||
continue_flag = "continue_"
|
||||
|
||||
train_data_path = "./data/stock_data.csv"
|
||||
model_save_path = "./checkpoint/"
|
||||
figure_save_path = "./figure/"
|
||||
do_figure_save = False
|
||||
if not os.path.exists(model_save_path):
|
||||
os.mkdir(model_save_path)
|
||||
if not os.path.exists(figure_save_path):
|
||||
os.mkdir(figure_save_path)
|
||||
|
||||
used_frame = frame
|
||||
model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
|
||||
model_name = "model_" + continue_flag + used_frame + model_postfix[used_frame]
|
||||
|
||||
|
||||
class Data:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.data, self.data_column_name = self.read_data()
|
||||
|
||||
self.data_num = self.data.shape[0]
|
||||
self.train_num = int(self.data_num * self.config.train_data_rate)
|
||||
|
||||
self.mean = np.mean(self.data, axis=0)
|
||||
self.std = np.std(self.data, axis=0)
|
||||
self.norm_data = (self.data - self.mean) / self.std
|
||||
|
||||
self.start_num_in_test = 0
|
||||
|
||||
def read_data(self):
|
||||
init_data = pd.read_csv(self.config.train_data_path,
|
||||
usecols=self.config.feature_and_label_columns)
|
||||
return init_data.values, init_data.columns.tolist()
|
||||
|
||||
def get_train_and_valid_data(self):
|
||||
feature_data = self.norm_data[:self.train_num]
|
||||
label_data = self.norm_data[self.config.predict_day: self.config.predict_day + self.train_num,
|
||||
self.config.label_in_feature_columns]
|
||||
if not self.config.do_continue_train:
|
||||
train_x = [feature_data[i:i + self.config.time_step] for i in range(self.train_num - self.config.time_step)]
|
||||
train_y = [label_data[i:i + self.config.time_step] for i in range(self.train_num - self.config.time_step)]
|
||||
else:
|
||||
train_x = [
|
||||
feature_data[start_index + i * self.config.time_step: start_index + (i + 1) * self.config.time_step]
|
||||
for start_index in range(self.config.time_step)
|
||||
for i in range((self.train_num - start_index) // self.config.time_step)]
|
||||
train_y = [
|
||||
label_data[start_index + i * self.config.time_step: start_index + (i + 1) * self.config.time_step]
|
||||
for start_index in range(self.config.time_step)
|
||||
for i in range((self.train_num - start_index) // self.config.time_step)]
|
||||
|
||||
train_x, train_y = np.array(train_x), np.array(train_y)
|
||||
|
||||
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
|
||||
random_state=self.config.random_seed,
|
||||
shuffle=self.config.shuffle_train_data)
|
||||
return train_x, valid_x, train_y, valid_y
|
||||
|
||||
def get_test_data(self, return_label_data=False):
|
||||
feature_data = self.norm_data[self.train_num:]
|
||||
self.start_num_in_test = feature_data.shape[0] % self.config.time_step
|
||||
time_step_size = feature_data.shape[0] // self.config.time_step
|
||||
|
||||
test_x = [feature_data[self.start_num_in_test + i * self.config.time_step: self.start_num_in_test + (
|
||||
i + 1) * self.config.time_step]
|
||||
for i in range(time_step_size)]
|
||||
if return_label_data:
|
||||
label_data = self.norm_data[self.train_num + self.start_num_in_test:, self.config.label_in_feature_columns]
|
||||
return np.array(test_x), label_data
|
||||
return np.array(test_x)
|
||||
|
||||
|
||||
def draw(config: Config, origin_data: Data, predict_norm_data: np.ndarray):
|
||||
label_norm_data = origin_data.norm_data[origin_data.train_num + origin_data.start_num_in_test:,
|
||||
config.label_in_feature_columns]
|
||||
assert label_norm_data.shape[0] == predict_norm_data.shape[
|
||||
0], "The element number in origin and predicted data is different"
|
||||
|
||||
label_name = [origin_data.data_column_name[i] for i in config.label_in_feature_columns]
|
||||
label_column_num = len(config.label_columns)
|
||||
|
||||
loss = np.mean((label_norm_data[config.predict_day:] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
|
||||
print("The mean squared error of stock {} is ".format(label_name), loss)
|
||||
|
||||
label_X = range(origin_data.data_num - origin_data.train_num - origin_data.start_num_in_test)
|
||||
predict_X = [x + config.predict_day for x in label_X]
|
||||
|
||||
label_data = label_norm_data * origin_data.std[config.label_in_feature_columns] + \
|
||||
origin_data.mean[config.label_in_feature_columns]
|
||||
|
||||
predict_data = predict_norm_data * origin_data.std[config.label_in_feature_columns] + \
|
||||
origin_data.mean[config.label_in_feature_columns]
|
||||
|
||||
for i in range(label_column_num):
|
||||
plt.figure(i + 1)
|
||||
plt.plot(label_X, label_data[:, i], label='label')
|
||||
plt.plot(predict_X, predict_data[:, i], label='predict')
|
||||
plt.legend(loc='upper right')
|
||||
plt.xlabel("Day")
|
||||
plt.ylabel("Price")
|
||||
plt.title("Predict stock {} price with {}".format(label_name[i], config.used_frame))
|
||||
print("The predicted stock {} for the next {} day(s) is: ".format(label_name[i], config.predict_day),
|
||||
np.squeeze(predict_data[-config.predict_day:, i]))
|
||||
if config.do_figure_save:
|
||||
plt.savefig(config.figure_save_path + "{}predict_{}_with_{}.png".format(config.continue_flag, label_name[i],
|
||||
config.used_frame))
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
def main(config):
|
||||
np.random.seed(config.random_seed)
|
||||
data_gainer = Data(config)
|
||||
|
||||
if config.do_train:
|
||||
train_X, valid_X, train_Y, valid_Y = data_gainer.get_train_and_valid_data()
|
||||
train(config, train_X, train_Y, valid_X, valid_Y)
|
||||
|
||||
if config.do_predict:
|
||||
test_X, test_Y = data_gainer.get_test_data(return_label_data=True)
|
||||
pred_result = predict(config, test_X)
|
||||
draw(config, data_gainer, pred_result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
# parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
|
||||
# parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
|
||||
# parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
|
||||
# parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
|
||||
args = parser.parse_args()
|
||||
|
||||
con = Config()
|
||||
for key in dir(args):
|
||||
if not key.startswith("_"):
|
||||
setattr(con, key, getattr(args, key))
|
||||
|
||||
main(con)
|
0
model/__init__.py
Normal file
35
model/model_keras.py
Normal file
@ -0,0 +1,35 @@
|
||||
from keras.layers import Input, Dense, LSTM
|
||||
from keras.models import Model
|
||||
from keras.callbacks import ModelCheckpoint, EarlyStopping
|
||||
|
||||
|
||||
def get_keras_model(config):
|
||||
input1 = Input(shape=(config.time_step, config.input_size))
|
||||
lstm = input1
|
||||
for i in range(config.lstm_layers):
|
||||
lstm = LSTM(units=config.hidden_size, dropout=config.dropout_rate, return_sequences=True)(lstm)
|
||||
output = Dense(config.output_size)(lstm)
|
||||
model = Model(input1, output)
|
||||
model.compile(loss='mse', optimizer='adam')
|
||||
return model
|
||||
|
||||
|
||||
def train(config, train_X, train_Y, valid_X, valid_Y):
|
||||
model = get_keras_model(config)
|
||||
model.summary()
|
||||
if config.add_train:
|
||||
model.load_weights(config.model_save_path + config.model_name)
|
||||
|
||||
check_point = ModelCheckpoint(filepath=config.model_save_path + config.model_name, monitor='val_loss',
|
||||
save_best_only=True, mode='auto')
|
||||
early_stop = EarlyStopping(monitor='val_loss', patience=config.patience, mode='auto')
|
||||
model.fit(train_X, train_Y, batch_size=config.batch_size, epochs=config.epoch, verbose=2,
|
||||
validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
|
||||
|
||||
|
||||
def predict(config, test_X):
|
||||
model = get_keras_model(config)
|
||||
model.load_weights(config.model_save_path + config.model_name)
|
||||
result = model.predict(test_X, batch_size=1)
|
||||
result = result.reshape((-1, config.output_size))
|
||||
return result
|
98
model/model_pytorch.py
Normal file
@ -0,0 +1,98 @@
|
||||
import torch
|
||||
from torch.nn import Module, LSTM, Linear
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Net(Module):
|
||||
def __init__(self, config):
|
||||
super(Net, self).__init__()
|
||||
self.lstm = LSTM(input_size=config.input_size, hidden_size=config.hidden_size,
|
||||
num_layers=config.lstm_layers, batch_first=True, dropout=config.dropout_rate)
|
||||
self.linear = Linear(in_features=config.hidden_size, out_features=config.output_size)
|
||||
|
||||
def forward(self, x, hidden=None):
|
||||
lstm_out, hidden = self.lstm(x, hidden)
|
||||
linear_out = self.linear(lstm_out)
|
||||
return linear_out, hidden
|
||||
|
||||
|
||||
def train(config, train_X, train_Y, valid_X, valid_Y):
|
||||
train_X, train_Y = torch.from_numpy(train_X).float(), torch.from_numpy(train_Y).float()
|
||||
train_loader = DataLoader(TensorDataset(train_X, train_Y), batch_size=config.batch_size)
|
||||
|
||||
valid_X, valid_Y = torch.from_numpy(valid_X).float(), torch.from_numpy(valid_Y).float()
|
||||
valid_loader = DataLoader(TensorDataset(valid_X, valid_Y), batch_size=config.batch_size)
|
||||
|
||||
model = Net(config)
|
||||
if config.add_train:
|
||||
model.load_state_dict(torch.load(config.model_save_path + config.model_name))
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
|
||||
criterion = torch.nn.MSELoss()
|
||||
|
||||
valid_loss_min = float("inf")
|
||||
bad_epoch = 0
|
||||
for epoch in range(config.epoch):
|
||||
print("Epoch {}/{}".format(epoch, config.epoch))
|
||||
model.train()
|
||||
train_loss_array = []
|
||||
hidden_train = None
|
||||
for i, _data in enumerate(train_loader):
|
||||
_train_X, _train_Y = _data
|
||||
optimizer.zero_grad()
|
||||
pred_Y, hidden_train = model(_train_X, hidden_train)
|
||||
|
||||
if not config.do_continue_train:
|
||||
hidden_train = None
|
||||
else:
|
||||
h_0, c_0 = hidden_train
|
||||
h_0.detach_(), c_0.detach_()
|
||||
hidden_train = (h_0, c_0)
|
||||
loss = criterion(pred_Y, _train_Y)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
train_loss_array.append(loss.item())
|
||||
|
||||
model.eval()
|
||||
valid_loss_array = []
|
||||
hidden_valid = None
|
||||
for _valid_X, _valid_Y in valid_loader:
|
||||
pred_Y, hidden_valid = model(_valid_X, hidden_valid)
|
||||
if not config.do_continue_train: hidden_valid = None
|
||||
loss = criterion(pred_Y, _valid_Y)
|
||||
valid_loss_array.append(loss.item())
|
||||
|
||||
valid_loss_cur = np.mean(valid_loss_array)
|
||||
print("The train loss is {:.4f}. ".format(np.mean(train_loss_array)),
|
||||
"The valid loss is {:.4f}.".format(valid_loss_cur))
|
||||
|
||||
if valid_loss_cur < valid_loss_min:
|
||||
valid_loss_min = valid_loss_cur
|
||||
bad_epoch = 0
|
||||
torch.save(model.state_dict(), config.model_save_path + config.model_name)
|
||||
else:
|
||||
bad_epoch += 1
|
||||
if bad_epoch >= config.patience:
|
||||
print(" The training stops early in epoch {}".format(epoch))
|
||||
break
|
||||
|
||||
|
||||
def predict(config, test_X):
|
||||
test_X = torch.from_numpy(test_X).float()
|
||||
test_set = TensorDataset(test_X)
|
||||
test_loader = DataLoader(test_set, batch_size=1)
|
||||
|
||||
model = Net(config)
|
||||
model.load_state_dict(torch.load(config.model_save_path + config.model_name))
|
||||
|
||||
result = torch.Tensor()
|
||||
|
||||
model.eval()
|
||||
hidden_predict = None
|
||||
for _data in test_loader:
|
||||
data_X = _data[0]
|
||||
pred_X, hidden_predict = model(data_X, hidden_predict)
|
||||
cur_pred = torch.squeeze(pred_X, dim=0)
|
||||
result = torch.cat((result, cur_pred), dim=0)
|
||||
|
||||
return result.detach().numpy()
|
97
model/model_tensorflow.py
Normal file
@ -0,0 +1,97 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Model:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
self.placeholders()
|
||||
self.net()
|
||||
self.operate()
|
||||
|
||||
def placeholders(self):
|
||||
self.X = tf.placeholder(tf.float32, [None, self.config.time_step, self.config.input_size])
|
||||
self.Y = tf.placeholder(tf.float32, [None, self.config.time_step, self.config.output_size])
|
||||
|
||||
def net(self):
|
||||
def dropout_cell():
|
||||
basicLstm = tf.nn.rnn_cell.LSTMCell(self.config.hidden_size)
|
||||
dropoutLstm = tf.nn.rnn_cell.DropoutWrapper(basicLstm, output_keep_prob=1 - self.config.dropout_rate)
|
||||
return dropoutLstm
|
||||
|
||||
cell = tf.nn.rnn_cell.MultiRNNCell([dropout_cell() for _ in range(self.config.lstm_layers)])
|
||||
|
||||
output_rnn, _ = tf.nn.dynamic_rnn(cell=cell, inputs=self.X, dtype=tf.float32)
|
||||
|
||||
# shape of output_rnn is: [batch_size, time_step, hidden_size]
|
||||
self.pred = tf.layers.dense(inputs=output_rnn, units=self.config.output_size)
|
||||
|
||||
def operate(self):
|
||||
self.loss = tf.reduce_mean(tf.square(tf.reshape(self.pred, [-1]) - tf.reshape(self.Y, [-1])))
|
||||
self.optim = tf.train.AdamOptimizer(self.config.learning_rate).minimize(self.loss)
|
||||
self.saver = tf.train.Saver(tf.global_variables())
|
||||
|
||||
|
||||
def train(config, train_X, train_Y, valid_X, valid_Y):
|
||||
with tf.variable_scope("stock_predict"):
|
||||
model = Model(config)
|
||||
|
||||
train_len = len(train_X)
|
||||
valid_len = len(valid_X)
|
||||
with tf.Session() as sess:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
|
||||
valid_loss_min = float("inf")
|
||||
bad_epoch = 0
|
||||
for epoch in range(config.epoch):
|
||||
print("Epoch {}/{}".format(epoch, config.epoch))
|
||||
# 训练
|
||||
train_loss_array = []
|
||||
for step in range(train_len // config.batch_size):
|
||||
feed_dict = {model.X: train_X[step * config.batch_size: (step + 1) * config.batch_size],
|
||||
model.Y: train_Y[step * config.batch_size: (step + 1) * config.batch_size]}
|
||||
train_loss, _ = sess.run([model.loss, model.optim], feed_dict=feed_dict)
|
||||
train_loss_array.append(train_loss)
|
||||
|
||||
# 验证与早停
|
||||
valid_loss_array = []
|
||||
for step in range(valid_len // config.batch_size):
|
||||
feed_dict = {model.X: valid_X[step * config.batch_size: (step + 1) * config.batch_size],
|
||||
model.Y: valid_Y[step * config.batch_size: (step + 1) * config.batch_size]}
|
||||
valid_loss = sess.run(model.loss, feed_dict=feed_dict)
|
||||
valid_loss_array.append(valid_loss)
|
||||
|
||||
valid_loss_cur = np.mean(valid_loss_array)
|
||||
print("The train loss is {:.4f}. ".format(np.mean(train_loss_array)),
|
||||
"The valid loss is {:.4f}.".format(valid_loss_cur))
|
||||
|
||||
if valid_loss_cur < valid_loss_min:
|
||||
valid_loss_min = valid_loss_cur
|
||||
bad_epoch = 0
|
||||
path = model.saver.save(sess, config.model_save_path + config.model_name)
|
||||
print(path)
|
||||
else:
|
||||
bad_epoch += 1
|
||||
if bad_epoch >= config.patience:
|
||||
print(" The training stops early in epoch {}".format(epoch))
|
||||
break
|
||||
|
||||
|
||||
def predict(config, test_X):
|
||||
config.dropout_rate = 1
|
||||
|
||||
with tf.variable_scope("stock_predict", reuse=tf.AUTO_REUSE):
|
||||
model = Model(config)
|
||||
|
||||
test_len = len(test_X)
|
||||
with tf.Session() as sess:
|
||||
module_file = tf.train.latest_checkpoint(config.model_save_path)
|
||||
model.saver.restore(sess, module_file)
|
||||
|
||||
result = np.zeros((test_len * config.time_step, config.output_size))
|
||||
for step in range(test_len):
|
||||
feed_dict = {model.X: test_X[step: (step + 1)]}
|
||||
test_pred = sess.run(model.pred, feed_dict=feed_dict)
|
||||
result[step * config.time_step: (step + 1) * config.time_step] = test_pred[0, :, :]
|
||||
return result
|
9
requirements.txt
Normal file
@ -0,0 +1,9 @@
|
||||
sklearn
|
||||
pandas
|
||||
argparse
|
||||
keras==2.2.4
|
||||
tensorflow==1.15.2
|
||||
matplotlib>=3.0.2
|
||||
numpy>=1.14.6
|
||||
scipy>=1.1.0
|
||||
torch>=1.1.0
|
87
serve.py
Normal file
@ -0,0 +1,87 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import tushare as ts
|
||||
import pandas as pd
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
|
||||
timesteps = seq_length = 7
|
||||
data_dim = 5
|
||||
output_dim = 1
|
||||
|
||||
stock_data = ts.get_k_data('600000', start='2015-01-01', end='2017-12-01')
|
||||
xy = stock_data[['open', 'close', 'high', 'low', 'volume']]
|
||||
|
||||
# xy_new = pd.DataFrame()
|
||||
# scaler = MinMaxScaler()
|
||||
|
||||
# scaler.fit(xy)
|
||||
# t = scaler.transform(xy)
|
||||
|
||||
# for col in xy.columns:
|
||||
# xy_new.ix[:, col] = t[col]
|
||||
|
||||
x = xy
|
||||
y = xy[['close']]
|
||||
dataX = []
|
||||
dataY = []
|
||||
for i in range(0, len(y) - seq_length):
|
||||
_x = x[i:i + seq_length]
|
||||
_y = y.loc[i + seq_length]
|
||||
#print(_x, "->", _y)
|
||||
dataX.append(_x)
|
||||
dataY.append(_y)
|
||||
|
||||
x_real = np.vstack(dataX).reshape(-1, seq_length, data_dim)
|
||||
y_real = np.vstack(dataY).reshape(-1, output_dim)
|
||||
print(x_real.shape)
|
||||
print(y_real.shape)
|
||||
dataX = x_real
|
||||
dataY = y_real
|
||||
|
||||
train_size = int(len(dataY) * 0.7)
|
||||
test_size = len(dataY) - train_size
|
||||
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
|
||||
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
|
||||
|
||||
X = tf.placeholder(tf.float32, [None, seq_length, data_dim])
|
||||
Y = tf.placeholder(tf.float32, [None, 1])
|
||||
|
||||
|
||||
def add_layer(inputs, in_size, out_size, activation_function=None):
|
||||
inputs = tf.reshape(inputs, [-1, in_size])
|
||||
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
|
||||
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
|
||||
Wx_plus_b = tf.matmul(inputs, Weights) + biases
|
||||
if activation_function is None:
|
||||
outputs = Wx_plus_b
|
||||
else:
|
||||
outputs = activation_function(Wx_plus_b)
|
||||
return outputs
|
||||
|
||||
|
||||
outsize_first = 5
|
||||
l1 = add_layer(X, data_dim, outsize_first, activation_function=tf.nn.relu)
|
||||
l1_output = tf.reshape(l1, [-1, seq_length, outsize_first])
|
||||
|
||||
cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=output_dim, state_is_tuple=True)
|
||||
outputs, _states = tf.nn.dynamic_rnn(cell, l1_output, dtype=tf.float32)
|
||||
Y_pred = outputs[:, -1]
|
||||
|
||||
loss = tf.reduce_sum(tf.square(Y_pred - Y))
|
||||
|
||||
optimizer = tf.train.GradientDescentOptimizer(0.01)
|
||||
train = optimizer.minimize(loss)
|
||||
|
||||
sess = tf.Session()
|
||||
sess.run(tf.global_variables_initializer())
|
||||
for i in range(100):
|
||||
_, l = sess.run(
|
||||
[train, loss],
|
||||
feed_dict={X: trainX, Y: trainY}
|
||||
)
|
||||
#print(i, l)
|
||||
|
||||
testPredict = sess.run(Y_pred, feed_dict={X: testX})
|
||||
|
||||
print(testY)
|
||||
print(testPredict)
|
111
test.py
Normal file
@ -0,0 +1,111 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tushare as ts
|
||||
|
||||
rnn_unit = 10
|
||||
input_size = 7
|
||||
output_size = 1
|
||||
lr = 0.0006
|
||||
|
||||
stock_data = ts.get_k_data('600000', start='2015-01-01', end='2017-12-01')
|
||||
data = stock_data.iloc[:, 2:10].values
|
||||
|
||||
|
||||
# ——————————获取训练集——————————
|
||||
def get_train_data(batch_size=60, time_step=20, train_begin=0, train_end=5800):
|
||||
batch_index = []
|
||||
data_train = data[train_begin:train_end]
|
||||
normalized_train_data = (data_train - np.mean(data_train, axis=0)) / np.std(data_train, axis=0) # 标准化
|
||||
train_x, train_y = [], [] # 训练集x和y初定义
|
||||
for i in range(len(normalized_train_data) - time_step):
|
||||
if i % batch_size == 0:
|
||||
batch_index.append(i)
|
||||
x = normalized_train_data[i:i + time_step, :7]
|
||||
y = normalized_train_data[i:i + time_step, 7, np.newaxis]
|
||||
train_x.append(x.tolist())
|
||||
train_y.append(y.tolist())
|
||||
batch_index.append((len(normalized_train_data) - time_step))
|
||||
return batch_index, train_x, train_y
|
||||
|
||||
|
||||
# ——————————获取测试集——————————
|
||||
def get_test_data(time_step=20, test_begin=5800):
|
||||
data_test = data[test_begin:]
|
||||
mean = np.mean(data_test, axis=0)
|
||||
std = np.std(data_test, axis=0)
|
||||
normalized_test_data = (data_test - mean) / std # 标准化
|
||||
size = (len(normalized_test_data) + time_step - 1) // time_step # 有size个sample
|
||||
test_x, test_y = [], []
|
||||
for i in range(size - 1):
|
||||
x = normalized_test_data[i * time_step:(i + 1) * time_step, :7]
|
||||
y = normalized_test_data[i * time_step:(i + 1) * time_step, 7]
|
||||
test_x.append(x.tolist())
|
||||
test_y.extend(y)
|
||||
test_x.append((normalized_test_data[(i + 1) * time_step:, :7]).tolist())
|
||||
test_y.extend((normalized_test_data[(i + 1) * time_step:, 7]).tolist())
|
||||
return mean, std, test_x, test_y
|
||||
|
||||
|
||||
# ——————————————————定义神经网络变量——————————————————
|
||||
def lstm(X):
|
||||
batch_size = tf.shape(X)[0]
|
||||
time_step = tf.shape(X)[1]
|
||||
w_in = weights['in']
|
||||
b_in = biases['in']
|
||||
input = tf.reshape(X, [-1, input_size]) # 需要将tensor转成2维进行计算,计算后的结果作为隐藏层的输入
|
||||
input_rnn = tf.matmul(input, w_in) + b_in
|
||||
input_rnn = tf.reshape(input_rnn, [-1, time_step, rnn_unit]) # 将tensor转成3维,作为lstm cell的输入
|
||||
cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_unit)
|
||||
init_state = cell.zero_state(batch_size, dtype=tf.float32)
|
||||
output_rnn, final_states = tf.nn.dynamic_rnn(cell, input_rnn, initial_state=init_state,
|
||||
dtype=tf.float32) # output_rnn是记录lstm每个输出节点的结果,final_states是最后一个cell的结果
|
||||
output = tf.reshape(output_rnn, [-1, rnn_unit]) # 作为输出层的输入
|
||||
w_out = weights['out']
|
||||
b_out = biases['out']
|
||||
pred = tf.matmul(output, w_out) + b_out
|
||||
return pred, final_states
|
||||
|
||||
|
||||
# ——————————————————训练模型——————————————————
|
||||
def train_lstm(batch_size=80, time_step=15, train_begin=0, train_end=5800):
|
||||
X = tf.placeholder(tf.float32, shape=[None, time_step, input_size])
|
||||
Y = tf.placeholder(tf.float32, shape=[None, time_step, output_size])
|
||||
batch_index, train_x, train_y = get_train_data(batch_size, time_step, train_begin, train_end)
|
||||
pred, _ = lstm(X)
|
||||
# 损失函数
|
||||
loss = tf.reduce_mean(tf.square(tf.reshape(pred, [-1]) - tf.reshape(Y, [-1])))
|
||||
train_op = tf.train.AdamOptimizer(lr).minimize(loss)
|
||||
saver = tf.train.Saver(tf.global_variables(), max_to_keep=15)
|
||||
module_file = tf.train.latest_checkpoint()
|
||||
with tf.Session() as sess:
|
||||
# sess.run(tf.global_variables_initializer())
|
||||
saver.restore(sess, module_file)
|
||||
# 重复训练2000次
|
||||
for i in range(2000):
|
||||
for step in range(len(batch_index) - 1):
|
||||
_, loss_ = sess.run([train_op, loss], feed_dict={X: train_x[batch_index[step]:batch_index[step + 1]],
|
||||
Y: train_y[batch_index[step]:batch_index[step + 1]]})
|
||||
print(i, loss_)
|
||||
if i % 200 == 0:
|
||||
print("保存模型:", saver.save(sess, 'stock2.model', global_step=i))
|
||||
|
||||
|
||||
# ————————————————预测模型————————————————————
|
||||
def prediction(time_step=20):
|
||||
X = tf.placeholder(tf.float32, shape=[None, time_step, input_size])
|
||||
mean, std, test_x, test_y = get_test_data(time_step)
|
||||
pred, _ = lstm(X)
|
||||
saver = tf.train.Saver(tf.global_variables())
|
||||
with tf.Session() as sess:
|
||||
# 参数恢复
|
||||
module_file = tf.train.latest_checkpoint()
|
||||
saver.restore(sess, module_file)
|
||||
test_predict = []
|
||||
for step in range(len(test_x) - 1):
|
||||
prob = sess.run(pred, feed_dict={X: [test_x[step]]})
|
||||
predict = prob.reshape((-1))
|
||||
test_predict.extend(predict)
|
||||
test_y = np.array(test_y) * std[7] + mean[7]
|
||||
test_predict = np.array(test_predict) * std[7] + mean[7]
|
||||
acc = np.average(np.abs(test_predict - test_y[:len(test_predict)]) / test_y[:len(test_predict)])
|
9
yao-optimizer.iml
Normal file
@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="WEB_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|