1
0
mirror of https://github.com/newnius/YAO-optimizer.git synced 2025-06-06 06:41:55 +00:00

update, add random forest

This commit is contained in:
Newnius 2020-06-24 22:55:42 +08:00
parent 0c6f6eec11
commit bf46dae01e
4 changed files with 633 additions and 283 deletions

View File

@ -3,7 +3,6 @@
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="0aedafd8-e57e-462a-beda-65af0b91f3df" name="Default Changelist" comment=""> <list default="true" id="0aedafd8-e57e-462a-beda-65af0b91f3df" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
</list> </list>
<ignored path="$PROJECT_DIR$/out/" /> <ignored path="$PROJECT_DIR$/out/" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
@ -16,10 +15,11 @@
<session id="570274097"> <session id="570274097">
<usages-collector id="statistics.lifecycle.project"> <usages-collector id="statistics.lifecycle.project">
<counts> <counts>
<entry key="project.closed" value="2" /> <entry key="project.closed" value="3" />
<entry key="project.open.time.1" value="3" /> <entry key="project.open.time.1" value="3" />
<entry key="project.open.time.2" value="1" />
<entry key="project.open.time.3" value="1" /> <entry key="project.open.time.3" value="1" />
<entry key="project.opened" value="4" /> <entry key="project.opened" value="5" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.extensions.open"> <usages-collector id="statistics.file.extensions.open">
@ -30,7 +30,7 @@
<entry key="iml" value="1" /> <entry key="iml" value="1" />
<entry key="md" value="2" /> <entry key="md" value="2" />
<entry key="png" value="7" /> <entry key="png" value="7" />
<entry key="py" value="7" /> <entry key="py" value="11" />
<entry key="sh" value="3" /> <entry key="sh" value="3" />
<entry key="txt" value="2" /> <entry key="txt" value="2" />
</counts> </counts>
@ -43,7 +43,7 @@
<entry key="Image" value="7" /> <entry key="Image" value="7" />
<entry key="Markdown" value="2" /> <entry key="Markdown" value="2" />
<entry key="PLAIN_TEXT" value="8" /> <entry key="PLAIN_TEXT" value="8" />
<entry key="Python" value="7" /> <entry key="Python" value="11" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.extensions.edit"> <usages-collector id="statistics.file.extensions.edit">
@ -51,8 +51,8 @@
<entry key="Dockerfile" value="81" /> <entry key="Dockerfile" value="81" />
<entry key="csv" value="9" /> <entry key="csv" value="9" />
<entry key="gitignore" value="12" /> <entry key="gitignore" value="12" />
<entry key="md" value="232" /> <entry key="md" value="234" />
<entry key="py" value="4400" /> <entry key="py" value="5489" />
<entry key="sh" value="5" /> <entry key="sh" value="5" />
</counts> </counts>
</usages-collector> </usages-collector>
@ -60,100 +60,29 @@
<counts> <counts>
<entry key="Bash" value="5" /> <entry key="Bash" value="5" />
<entry key="Dockerfile" value="81" /> <entry key="Dockerfile" value="81" />
<entry key="Markdown" value="232" /> <entry key="Markdown" value="234" />
<entry key="PLAIN_TEXT" value="21" /> <entry key="PLAIN_TEXT" value="21" />
<entry key="Python" value="4400" /> <entry key="Python" value="5489" />
</counts> </counts>
</usages-collector> </usages-collector>
</session> </session>
</component> </component>
<component name="FileEditorManager"> <component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/serve_rf.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="165">
<caret line="11" column="16" lean-forward="true" selection-start-line="11" selection-start-column="16" selection-end-line="11" selection-end-column="16" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/main.py"> <entry file="file://$PROJECT_DIR$/rf.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="90"> <state relative-caret-position="90">
<caret line="6" selection-start-line="6" selection-end-line="6" /> <caret line="7" column="36" lean-forward="true" selection-start-line="7" selection-start-column="36" selection-end-line="7" selection-end-column="36" />
<folding>
<element signature="e#0#19#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/.gitignore">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="30">
<caret line="2" selection-start-line="2" selection-end-line="2" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/serve.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="414">
<caret line="211" column="31" selection-start-line="211" selection-start-column="31" selection-end-line="211" selection-end-column="31" />
<folding>
<element signature="e#18#46#0" expanded="true" />
<marker date="1588426314177" expanded="true" signature="1271:1274" ph="..." />
<marker date="1588426314177" expanded="true" signature="3519:3521" ph="..." />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/Dockerfile">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" column="7" selection-start-line="17" selection-start-column="7" selection-end-line="17" selection-end-column="7" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/train.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="960">
<caret line="64" column="22" selection-start-line="64" selection-start-column="12" selection-end-line="64" selection-end-column="22" />
<folding>
<element signature="e#0#28#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
<state split_layout="SPLIT">
<first_editor relative-caret-position="336">
<caret line="31" column="26" selection-start-line="31" selection-start-column="26" selection-end-line="31" selection-end-column="26" />
</first_editor>
<second_editor />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/bootstrap.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="30">
<caret line="2" column="22" selection-start-line="2" selection-start-column="22" selection-end-line="2" selection-end-column="22" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/model_tensorflow.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="480">
<caret line="32" column="58" selection-start-line="32" selection-start-column="58" selection-end-line="32" selection-end-column="58" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state> </state>
</provider> </provider>
</entry> </entry>
@ -185,6 +114,10 @@
<find>12</find> <find>12</find>
<find>32</find> <find>32</find>
<find>forecast_lstm</find> <find>forecast_lstm</find>
<find>csv</find>
<find>joblib</find>
<find>traceback</find>
<find>models[job]</find>
</findStrings> </findStrings>
</component> </component>
<component name="Git.Settings"> <component name="Git.Settings">
@ -204,8 +137,10 @@
<option value="$PROJECT_DIR$/data/data2.csv" /> <option value="$PROJECT_DIR$/data/data2.csv" />
<option value="$PROJECT_DIR$/data/data3.csv" /> <option value="$PROJECT_DIR$/data/data3.csv" />
<option value="$PROJECT_DIR$/train.py" /> <option value="$PROJECT_DIR$/train.py" />
<option value="$PROJECT_DIR$/serve.py" />
<option value="$PROJECT_DIR$/README.md" /> <option value="$PROJECT_DIR$/README.md" />
<option value="$PROJECT_DIR$/rf.py" />
<option value="$PROJECT_DIR$/serve.py" />
<option value="$PROJECT_DIR$/serve_rf.py" />
</list> </list>
</option> </option>
</component> </component>
@ -225,6 +160,7 @@
</navigator> </navigator>
<panes> <panes>
<pane id="Scope" /> <pane id="Scope" />
<pane id="PackagesPane" />
<pane id="AndroidView" /> <pane id="AndroidView" />
<pane id="ProjectPane"> <pane id="ProjectPane">
<subPane> <subPane>
@ -242,13 +178,12 @@
<select /> <select />
</subPane> </subPane>
</pane> </pane>
<pane id="PackagesPane" />
</panes> </panes>
</component> </component>
<component name="PropertiesComponent"> <component name="PropertiesComponent">
<property name="WebServerToolWindowFactoryState" value="false" /> <property name="WebServerToolWindowFactoryState" value="false" />
<property name="aspect.path.notification.shown" value="true" /> <property name="aspect.path.notification.shown" value="true" />
<property name="com.android.tools.idea.instantapp.provision.ProvisionBeforeRunTaskProvider.myTimeStamp" value="1588422524280" /> <property name="com.android.tools.idea.instantapp.provision.ProvisionBeforeRunTaskProvider.myTimeStamp" value="1593010427833" />
<property name="go.gopath.indexing.explicitly.defined" value="true" /> <property name="go.gopath.indexing.explicitly.defined" value="true" />
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" /> <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
<property name="nodejs_npm_path_reset_for_default_project" value="true" /> <property name="nodejs_npm_path_reset_for_default_project" value="true" />
@ -259,13 +194,13 @@
<property name="settings.editor.selected.configurable" value="http.proxy" /> <property name="settings.editor.selected.configurable" value="http.proxy" />
</component> </component>
<component name="RecentsManager"> <component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$" />
</key>
<key name="MoveFile.RECENT_KEYS"> <key name="MoveFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$" /> <recent name="$PROJECT_DIR$" />
<recent name="$PROJECT_DIR$/model" /> <recent name="$PROJECT_DIR$/model" />
</key> </key>
<key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$" />
</key>
</component> </component>
<component name="RunDashboard"> <component name="RunDashboard">
<option name="ruleStates"> <option name="ruleStates">
@ -292,12 +227,13 @@
<workItem from="1588152880522" duration="16973000" /> <workItem from="1588152880522" duration="16973000" />
<workItem from="1588319878551" duration="41219000" /> <workItem from="1588319878551" duration="41219000" />
<workItem from="1588426002721" duration="336000" /> <workItem from="1588426002721" duration="336000" />
<workItem from="1588427782140" duration="237000" /> <workItem from="1588427782140" duration="326000" />
<workItem from="1592809729651" duration="9303000" />
</task> </task>
<servers /> <servers />
</component> </component>
<component name="TimeTrackingManager"> <component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="58765000" /> <option name="totallyTimeSpent" value="68157000" />
</component> </component>
<component name="ToolWindowManager"> <component name="ToolWindowManager">
<frame x="0" y="0" width="1280" height="800" extended-state="0" /> <frame x="0" y="0" width="1280" height="800" extended-state="0" />
@ -308,10 +244,10 @@
<window_info id="Capture Tool" order="2" /> <window_info id="Capture Tool" order="2" />
<window_info id="Favorites" order="3" side_tool="true" /> <window_info id="Favorites" order="3" side_tool="true" />
<window_info id="Image Layers" order="4" /> <window_info id="Image Layers" order="4" />
<window_info content_ui="combo" id="Project" order="5" sideWeight="0.49898168" visible="true" weight="0.26171243" /> <window_info active="true" content_ui="combo" id="Project" order="5" sideWeight="0.49898168" visible="true" weight="0.2657512" />
<window_info id="Structure" order="6" sideWeight="0.50101835" side_tool="true" weight="0.24959612" /> <window_info id="Structure" order="6" sideWeight="0.50101835" side_tool="true" weight="0.24959612" />
<window_info anchor="bottom" id="Version Control" order="0" /> <window_info anchor="bottom" id="Version Control" order="0" />
<window_info active="true" anchor="bottom" id="Terminal" order="1" visible="true" weight="0.32739726" /> <window_info anchor="bottom" id="Terminal" order="1" visible="true" weight="0.32739726" />
<window_info anchor="bottom" id="Event Log" order="2" side_tool="true" /> <window_info anchor="bottom" id="Event Log" order="2" side_tool="true" />
<window_info anchor="bottom" id="Database Changes" order="3" show_stripe_button="false" /> <window_info anchor="bottom" id="Database Changes" order="3" show_stripe_button="false" />
<window_info anchor="bottom" id="Docker" order="4" show_stripe_button="false" /> <window_info anchor="bottom" id="Docker" order="4" show_stripe_button="false" />
@ -382,7 +318,7 @@
</entry> </entry>
<entry file="file://$PROJECT_DIR$/main.py"> <entry file="file://$PROJECT_DIR$/main.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="90"> <state relative-caret-position="30">
<caret line="6" selection-start-line="6" selection-end-line="6" /> <caret line="6" selection-start-line="6" selection-end-line="6" />
<folding> <folding>
<element signature="e#0#19#0" expanded="true" /> <element signature="e#0#19#0" expanded="true" />
@ -406,7 +342,7 @@
</entry> </entry>
<entry file="file://$PROJECT_DIR$/train.py"> <entry file="file://$PROJECT_DIR$/train.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="960"> <state relative-caret-position="810">
<caret line="64" column="22" selection-start-line="64" selection-start-column="12" selection-end-line="64" selection-end-column="22" /> <caret line="64" column="22" selection-start-line="64" selection-start-column="12" selection-end-line="64" selection-end-column="22" />
<folding> <folding>
<element signature="e#0#28#0" expanded="true" /> <element signature="e#0#28#0" expanded="true" />
@ -423,7 +359,7 @@
</entry> </entry>
<entry file="file://$PROJECT_DIR$/model_tensorflow.py"> <entry file="file://$PROJECT_DIR$/model_tensorflow.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="480"> <state relative-caret-position="465">
<caret line="32" column="58" selection-start-line="32" selection-start-column="58" selection-end-line="32" selection-end-column="58" /> <caret line="32" column="58" selection-start-line="32" selection-start-column="58" selection-end-line="32" selection-end-column="58" />
<folding> <folding>
<element signature="e#0#23#0" expanded="true" /> <element signature="e#0#23#0" expanded="true" />
@ -431,25 +367,38 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
<state split_layout="SPLIT">
<first_editor relative-caret-position="480">
<caret line="32" selection-start-line="32" selection-end-line="32" />
</first_editor>
<second_editor />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/serve.py"> <entry file="file://$PROJECT_DIR$/serve.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="414"> <state relative-caret-position="210">
<caret line="211" column="31" selection-start-line="211" selection-start-column="31" selection-end-line="211" selection-end-column="31" /> <caret line="14" column="27" lean-forward="true" selection-start-line="14" selection-start-column="27" selection-end-line="14" selection-end-column="27" />
<folding> <folding>
<element signature="e#18#46#0" expanded="true" />
<marker date="1588426314177" expanded="true" signature="1271:1274" ph="..." /> <marker date="1588426314177" expanded="true" signature="1271:1274" ph="..." />
<marker date="1588426314177" expanded="true" signature="3519:3521" ph="..." /> <marker date="1588426314177" expanded="true" signature="3519:3521" ph="..." />
</folding> </folding>
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/README.md"> <entry file="file://$PROJECT_DIR$/rf.py">
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]"> <provider selected="true" editor-type-id="text-editor">
<state split_layout="SPLIT"> <state relative-caret-position="90">
<first_editor relative-caret-position="336"> <caret line="7" column="36" lean-forward="true" selection-start-line="7" selection-start-column="36" selection-end-line="7" selection-end-column="36" />
<caret line="31" column="26" selection-start-line="31" selection-start-column="26" selection-end-line="31" selection-end-column="26" /> </state>
</first_editor> </provider>
<second_editor /> </entry>
<entry file="file://$PROJECT_DIR$/serve_rf.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="165">
<caret line="11" column="16" lean-forward="true" selection-start-line="11" selection-start-column="16" selection-end-line="11" selection-end-column="16" />
</state> </state>
</provider> </provider>
</entry> </entry>

149
rf.py Normal file
View File

@ -0,0 +1,149 @@
# _*_coding:utf-8_*_
import numpy as np
import pandas as pd
def load_data(trainfile, testfile):
traindata = pd.read_csv(trainfile)
testdata = pd.read_csv(testfile)
feature_data = traindata.iloc[:, 1:-1]
label_data = traindata.iloc[:, -1]
test_feature = testdata.iloc[:, 1:]
return feature_data, label_data, test_feature
def random_forest_train(feature_data, label_data, test_feature, submitfile):
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
params = {
'n_estimators': 70,
'max_depth': 13,
'min_samples_split': 10,
'min_samples_leaf': 5, # 10
'max_features': 7
}
print(X_test)
model = RandomForestRegressor(**params)
model.fit(X_train, y_train)
# 对测试集进行预测
y_pred = model.predict(X_test)
# 计算准确率
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
print(RMSE)
submit = pd.read_csv(submitfile)
print(submit)
print(model.predict(test_feature))
submit['CPU'] = model.predict(test_feature)
submit.to_csv('my_random_forest_prediction1.csv', index=False)
print(submit)
def random_forest_parameter_tuning1(feature_data, label_data, test_feature):
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.23)
param_test1 = {
'n_estimators': range(10, 71, 10)
}
model = GridSearchCV(estimator=RandomForestRegressor(
min_samples_split=100, min_samples_leaf=20, max_depth=8, max_features='sqrt',
random_state=10), param_grid=param_test1, cv=5
)
model.fit(X_train, y_train)
# 对测试集进行预测
y_pred = model.predict(X_test)
# 计算准确率
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
print(RMSE)
return model.best_score_, model.best_params_
def random_forest_parameter_tuning2(feature_data, label_data, test_feature):
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.23)
param_test2 = {
'max_depth': range(3, 14, 2),
'min_samples_split': range(50, 201, 20)
}
model = GridSearchCV(estimator=RandomForestRegressor(
n_estimators=70, min_samples_leaf=20, max_features='sqrt', oob_score=True,
random_state=10), param_grid=param_test2, cv=5
)
model.fit(X_train, y_train)
# 对测试集进行预测
y_pred = model.predict(X_test)
# 计算准确率
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
print(RMSE)
return model.best_score_, model.best_params_
def random_forest_parameter_tuning3(feature_data, label_data, test_feature):
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.23)
param_test3 = {
'min_samples_split': range(10, 90, 20),
'min_samples_leaf': range(10, 60, 10),
}
model = GridSearchCV(estimator=RandomForestRegressor(
n_estimators=70, max_depth=13, max_features='sqrt', oob_score=True,
random_state=10), param_grid=param_test3, cv=5
)
model.fit(X_train, y_train)
# 对测试集进行预测
y_pred = model.predict(X_test)
# 计算准确率
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
print(RMSE)
return model.best_score_, model.best_params_
def random_forest_parameter_tuning4(feature_data, label_data, test_feature):
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.23)
param_test4 = {
'max_features': range(3, 9, 2)
}
model = GridSearchCV(estimator=RandomForestRegressor(
n_estimators=70, max_depth=13, min_samples_split=10, min_samples_leaf=10, oob_score=True,
random_state=10), param_grid=param_test4, cv=5
)
model.fit(X_train, y_train)
# 对测试集进行预测
y_pred = model.predict(X_test)
# 计算准确率
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
print(RMSE)
return model.best_score_, model.best_params_
if __name__ == '__main__':
trainfile = 'data/train.csv'
testfile = 'data/test.csv'
submitfile = 'data/sample_submit.csv'
feature_data, label_data, test_feature = load_data(trainfile, testfile)
random_forest_train(feature_data, label_data, test_feature, submitfile)

280
serve.py
View File

@ -6,187 +6,106 @@ import cgi
import json import json
from urllib import parse from urllib import parse
import pandas as pd import pandas as pd
import numpy as np
import csv import csv
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
import numpy
import random import random
import traceback import traceback
from keras.models import load_model import pickle
from sklearn.externals import joblib import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
PORT_NUMBER = 8080 PORT_NUMBER = int(os.getenv('Port', 8080))
lock = Lock() lock = Lock()
models = {} models = {}
# frame a sequence as a supervised learning problem def load_data(trainfile, testfile):
def timeseries_to_supervised(data, lag=1): traindata = pd.read_csv(trainfile)
df = DataFrame(data) testdata = pd.read_csv(testfile)
columns = [df.shift(i) for i in range(1, lag + 1)] feature_data = traindata.iloc[:, 1:-1]
columns.append(df) label_data = traindata.iloc[:, -1]
df = concat(columns, axis=1) test_feature = testdata.iloc[:, 1:]
df = df.drop(0) return feature_data, label_data, test_feature
return df
# create a differenced series
def difference(dataset, interval=1):
diff = list()
for i in range(interval, len(dataset)):
value = dataset[i] - dataset[i - interval]
diff.append(value)
return Series(diff)
# invert differenced value
def inverse_difference(history, yhat, interval=1):
return yhat + history[-interval]
# inverse scaling for a forecasted value
def invert_scale(scaler, X, yhat):
new_row = [x for x in X] + [yhat]
array = numpy.array(new_row)
array = array.reshape(1, len(array))
inverted = scaler.inverse_transform(array)
return inverted[0, -1]
# fit an LSTM network to training data
def fit_lstm(train, batch_size2, nb_epoch, neurons):
X, y = train[:, 0:-1], train[:, -1]
X = X.reshape(X.shape[0], 1, X.shape[1])
model = Sequential()
model.add(LSTM(neurons, batch_input_shape=(batch_size2, X.shape[1], X.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(nb_epoch):
model.fit(X, y, epochs=1, batch_size=batch_size2, verbose=0, shuffle=False)
# loss = model.evaluate(X, y)
# print("Epoch {}/{}, loss = {}".format(i, nb_epoch, loss))
print("Epoch {}/{}".format(i, nb_epoch))
model.reset_states()
return model
def train_models(job): def train_models(job):
lock.acquire() if job not in models or 'features' not in models[job]:
if job not in models: return
models[job] = {
'lock': Lock()
}
lock.release()
models[job]['lock'].acquire() models[job]['lock'].acquire()
# load dataset for label in models[job]['labels']:
series = read_csv('./data/' + job + '.csv', header=0, index_col=0, squeeze=True) trainfile = './data/' + job + '_' + label + '.csv'
traindata = pd.read_csv(trainfile)
feature_data = traindata.iloc[:, 1:-1]
label_data = traindata.iloc[:, -1]
# transform data to be stationary X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
raw_values = series.values params = {
diff_values = difference(raw_values, 1) 'n_estimators': 70,
# transform data to be supervised learning 'max_depth': 13,
lag = 4 'min_samples_split': 10,
supervised = timeseries_to_supervised(diff_values, lag) 'min_samples_leaf': 5, # 10
supervised_values = supervised.values 'max_features': len(models[job]['features']) - 1 # 7
}
# print(params)
model = RandomForestRegressor(**params)
model.fit(X_train, y_train)
batch_size = 32 # save the model to disk
if supervised_values.shape[0] < 100: modelname = './data/' + job + '_' + label + '.sav'
batch_size = 16 pickle.dump(model, open(modelname, 'wb'))
if supervised_values.shape[0] < 60:
batch_size = 8
# split data into train and test-sets # 对测试集进行预测
train = supervised_values y_pred = model.predict(X_test)
# transform the scale of the data # 计算准确率
MSE = mean_squared_error(y_test, y_pred)
# scale data to [-1, 1] RMSE = np.sqrt(MSE)
# fit scaler print('RMSE of ' + job + ' is ' + str(RMSE))
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train)
# transform train
train = train.reshape(train.shape[0], train.shape[1])
train_scaled = scaler.transform(train)
# fit the model
t1 = train.shape[0] % batch_size
train_trimmed = train_scaled[t1:, :]
model = fit_lstm(train_trimmed, batch_size, 30, 4)
model.save('./data/checkpoint-' + job)
scaler_filename = './data/checkpoint-' + job + "-scaler.save"
joblib.dump(scaler, scaler_filename)
models[job]['batch_size'] = batch_size
models[job]['lock'].release() models[job]['lock'].release()
def predict(job, seq): def predict(job, features):
if job not in models or 'batch_size' not in models[job]: if job not in models or 'features' not in models[job]:
return -1, False return -1, False
batch_size = int(models[job]['batch_size']) values = [job]
for feature in models[job]['features']:
values.append(features[feature])
data = { datafile = './data/' + job + '.' + str(random.randint(1000, 9999)) + '.csv'
'seq': seq, t = ['job']
'value': 0, t.extend(models[job]['features'])
} with open(datafile, 'w', newline='') as csvfile:
model = load_model('./data/checkpoint-' + job) spamwriter = csv.writer(
scaler_filename = './data/checkpoint-' + job + "-scaler.save" csvfile, delimiter=',',
scaler = joblib.load(scaler_filename) quotechar='|', quoting=csv.QUOTE_MINIMAL
)
spamwriter.writerow(t)
file = './data/' + job + '.' + str(random.randint(1000, 9999)) + '.csv' with open(datafile, 'a+', newline='') as csvfile:
df = pd.read_csv('./data/' + job + '.csv', usecols=['seq', 'value']) spamwriter = csv.writer(
df = df.tail(batch_size * 2 - 1) csvfile, delimiter=',',
df = df.append(data, ignore_index=True) quotechar='|', quoting=csv.QUOTE_MINIMAL
df.to_csv(file, index=False) )
spamwriter.writerow(values)
# load dataset testdata = pd.read_csv(datafile)
df = read_csv(file, header=0, index_col=0, squeeze=True) test_feature = testdata.iloc[:, 1:]
# transform data to be stationary predictions = {}
raw_values = df.values for label in models[job]['labels']:
diff_values = difference(raw_values, 1) # load the model from disk
modelfile = './data/' + job + '_' + label + '.sav'
model = pickle.load(open(modelfile, 'rb'))
preds = model.predict(test_feature)
predictions[label] = preds[0]
# transform data to be supervised learning if os.path.exists(datafile):
lag = 4 os.remove(datafile)
supervised = timeseries_to_supervised(diff_values, lag) return predictions, True
supervised_values = supervised[-batch_size:]
test = supervised_values.values
test = test.reshape(test.shape[0], test.shape[1])
test_scaled = scaler.transform(test)
# forecast the entire training dataset to build up state for forecasting
test_reshaped = test_scaled[:, 0:-1]
test_reshaped = test_reshaped.reshape(len(test_reshaped), 1, lag)
output = model.predict(test_reshaped, batch_size=batch_size)
predictions = list()
for i in range(len(output)):
yhat = output[i, 0]
X = test_scaled[i, 0:-1]
# invert scaling
yhat = invert_scale(scaler, X, yhat)
# invert differencing
yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i)
# store forecast
predictions.append(yhat)
# report performance
rmse = sqrt(mean_squared_error(raw_values[-batch_size:], predictions))
print(predictions, raw_values[-batch_size:])
return predictions[-1], True
class MyHandler(BaseHTTPRequestHandler): class MyHandler(BaseHTTPRequestHandler):
@ -204,15 +123,15 @@ class MyHandler(BaseHTTPRequestHandler):
elif req.path == "/predict": elif req.path == "/predict":
try: try:
job = query.get('job')[0] job = query.get('job')[0]
seq = query.get('seq')[0] features = json.loads(query.get('features')[0])
msg = {'code': 0, 'error': ""} msg = {'code': 0, 'error': ""}
pred, success = predict(job, int(seq)) pred, success = predict(job, features)
if not success: if not success:
msg = {'code': 2, 'error': "Job " + job + " not exist"} msg = {'code': 2, 'error': "Job " + job + " not exist"}
else: else:
msg = {'code': 0, 'error': "", "total": int(pred)} msg = {'code': 0, 'error': "", "labels": json.dumps(pred)}
except Exception as e: except Exception as e:
track = traceback.format_exc() track = traceback.format_exc()
print(track) print(track)
@ -226,26 +145,50 @@ class MyHandler(BaseHTTPRequestHandler):
elif req.path == "/feed": elif req.path == "/feed":
try: try:
job = query.get('job')[0] job = query.get('job')[0]
seq = query.get('seq')[0] features = json.loads(query.get('features')[0])
value = query.get('value')[0] labels = json.loads(query.get('labels')[0])
if int(seq) == 1: lock.acquire()
with open('./data/' + job + '.csv', 'w', newline='') as csvfile: flag = False
if job not in models:
models[job] = {
'lock': Lock(),
'features': list(features.keys()),
'labels': list(labels.keys())
}
flag = True
lock.release()
models[job]['lock'].acquire()
for label in models[job]['labels']:
values = [job]
for feature in models[job]['features']:
values.append(features[feature])
values.append(labels[label])
if flag:
t = ['job']
t.extend(models[job]['features'])
t.append(label)
with open('./data/' + job + '_' + label + '.csv', 'w', newline='') as csvfile:
spamwriter = csv.writer(
csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL
)
spamwriter.writerow(t)
with open('./data/' + job + '_' + label + '.csv', 'a+', newline='') as csvfile:
spamwriter = csv.writer( spamwriter = csv.writer(
csvfile, delimiter=',', csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL quotechar='|', quoting=csv.QUOTE_MINIMAL
) )
spamwriter.writerow(["seq", "value"]) spamwriter.writerow(values)
with open('./data/' + job + '.csv', 'a+', newline='') as csvfile: models[job]['lock'].release()
spamwriter = csv.writer(
csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL
)
spamwriter.writerow([seq, value])
msg = {'code': 0, 'error': ""} msg = {'code': 0, 'error': ""}
except Exception as e: except Exception as e:
msg = {'code': 1, 'error': str(e)} msg = {'code': 1, 'error': str(e)}
track = traceback.format_exc()
print(track)
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header('Content-type', 'application/json')
self.end_headers() self.end_headers()
@ -289,7 +232,6 @@ class MyHandler(BaseHTTPRequestHandler):
self.send_header('Content-type', 'application/json') self.send_header('Content-type', 'application/json')
self.end_headers() self.end_headers()
self.wfile.write(bytes(json.dumps(msg), "utf-8")) self.wfile.write(bytes(json.dumps(msg), "utf-8"))
else: else:
self.send_error(404, 'File Not Found: %s' % self.path) self.send_error(404, 'File Not Found: %s' % self.path)

310
serve_lstm.py Normal file
View File

@ -0,0 +1,310 @@
#!/usr/bin/python
from threading import Thread
from threading import Lock
from http.server import BaseHTTPRequestHandler, HTTPServer
import cgi
import json
from urllib import parse
import pandas as pd
import csv
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
import numpy
import random
import traceback
from keras.models import load_model
from sklearn.externals import joblib
PORT_NUMBER = 8080
lock = Lock()
models = {}
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
df = DataFrame(data)
columns = [df.shift(i) for i in range(1, lag + 1)]
columns.append(df)
df = concat(columns, axis=1)
df = df.drop(0)
return df
# create a differenced series
def difference(dataset, interval=1):
diff = list()
for i in range(interval, len(dataset)):
value = dataset[i] - dataset[i - interval]
diff.append(value)
return Series(diff)
# invert differenced value
def inverse_difference(history, yhat, interval=1):
return yhat + history[-interval]
# inverse scaling for a forecasted value
def invert_scale(scaler, X, yhat):
new_row = [x for x in X] + [yhat]
array = numpy.array(new_row)
array = array.reshape(1, len(array))
inverted = scaler.inverse_transform(array)
return inverted[0, -1]
# fit an LSTM network to training data
def fit_lstm(train, batch_size2, nb_epoch, neurons):
X, y = train[:, 0:-1], train[:, -1]
X = X.reshape(X.shape[0], 1, X.shape[1])
model = Sequential()
model.add(LSTM(neurons, batch_input_shape=(batch_size2, X.shape[1], X.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(nb_epoch):
model.fit(X, y, epochs=1, batch_size=batch_size2, verbose=0, shuffle=False)
# loss = model.evaluate(X, y)
# print("Epoch {}/{}, loss = {}".format(i, nb_epoch, loss))
print("Epoch {}/{}".format(i, nb_epoch))
model.reset_states()
return model
def train_models(job):
lock.acquire()
if job not in models:
models[job] = {
'lock': Lock()
}
lock.release()
models[job]['lock'].acquire()
# load dataset
series = read_csv('./data/' + job + '.csv', header=0, index_col=0, squeeze=True)
# transform data to be stationary
raw_values = series.values
diff_values = difference(raw_values, 1)
# transform data to be supervised learning
lag = 4
supervised = timeseries_to_supervised(diff_values, lag)
supervised_values = supervised.values
batch_size = 32
if supervised_values.shape[0] < 100:
batch_size = 16
if supervised_values.shape[0] < 60:
batch_size = 8
# split data into train and test-sets
train = supervised_values
# transform the scale of the data
# scale data to [-1, 1]
# fit scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train)
# transform train
train = train.reshape(train.shape[0], train.shape[1])
train_scaled = scaler.transform(train)
# fit the model
t1 = train.shape[0] % batch_size
train_trimmed = train_scaled[t1:, :]
model = fit_lstm(train_trimmed, batch_size, 30, 4)
model.save('./data/checkpoint-' + job)
scaler_filename = './data/checkpoint-' + job + "-scaler.save"
joblib.dump(scaler, scaler_filename)
models[job]['batch_size'] = batch_size
models[job]['lock'].release()
def predict(job, seq):
if job not in models or 'batch_size' not in models[job]:
return -1, False
batch_size = int(models[job]['batch_size'])
data = {
'seq': seq,
'value': 0,
}
model = load_model('./data/checkpoint-' + job)
scaler_filename = './data/checkpoint-' + job + "-scaler.save"
scaler = joblib.load(scaler_filename)
file = './data/' + job + '.' + str(random.randint(1000, 9999)) + '.csv'
df = pd.read_csv('./data/' + job + '.csv', usecols=['seq', 'value'])
df = df.tail(batch_size * 2 - 1)
df = df.append(data, ignore_index=True)
df.to_csv(file, index=False)
# load dataset
df = read_csv(file, header=0, index_col=0, squeeze=True)
# transform data to be stationary
raw_values = df.values
diff_values = difference(raw_values, 1)
# transform data to be supervised learning
lag = 4
supervised = timeseries_to_supervised(diff_values, lag)
supervised_values = supervised[-batch_size:]
test = supervised_values.values
test = test.reshape(test.shape[0], test.shape[1])
test_scaled = scaler.transform(test)
# forecast the entire training dataset to build up state for forecasting
test_reshaped = test_scaled[:, 0:-1]
test_reshaped = test_reshaped.reshape(len(test_reshaped), 1, lag)
output = model.predict(test_reshaped, batch_size=batch_size)
predictions = list()
for i in range(len(output)):
yhat = output[i, 0]
X = test_scaled[i, 0:-1]
# invert scaling
yhat = invert_scale(scaler, X, yhat)
# invert differencing
yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i)
# store forecast
predictions.append(yhat)
# report performance
rmse = sqrt(mean_squared_error(raw_values[-batch_size:], predictions))
print(predictions, raw_values[-batch_size:])
return predictions[-1], True
class MyHandler(BaseHTTPRequestHandler):
# Handler for the GET requests
def do_GET(self):
req = parse.urlparse(self.path)
query = parse.parse_qs(req.query)
if req.path == "/ping":
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(bytes("pong", "utf-8"))
elif req.path == "/predict":
try:
job = query.get('job')[0]
seq = query.get('seq')[0]
msg = {'code': 0, 'error': ""}
pred, success = predict(job, int(seq))
if not success:
msg = {'code': 2, 'error': "Job " + job + " not exist"}
else:
msg = {'code': 0, 'error': "", "total": int(pred)}
except Exception as e:
track = traceback.format_exc()
print(track)
msg = {'code': 1, 'error': str(e)}
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
elif req.path == "/feed":
try:
job = query.get('job')[0]
seq = query.get('seq')[0]
value = query.get('value')[0]
if int(seq) == 1:
with open('./data/' + job + '.csv', 'w', newline='') as csvfile:
spamwriter = csv.writer(
csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL
)
spamwriter.writerow(["seq", "value"])
with open('./data/' + job + '.csv', 'a+', newline='') as csvfile:
spamwriter = csv.writer(
csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL
)
spamwriter.writerow([seq, value])
msg = {'code': 0, 'error': ""}
except Exception as e:
msg = {'code': 1, 'error': str(e)}
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
elif req.path == "/train":
try:
job = query.get('job')[0]
t = Thread(target=train_models, name='train_models', args=(job,))
t.start()
msg = {'code': 0, 'error': ""}
except Exception as e:
msg = {'code': 1, 'error': str(e)}
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
else:
self.send_error(404, 'File Not Found: %s' % self.path)
# Handler for the POST requests
def do_POST(self):
if self.path == "/train2":
form = cgi.FieldStorage(
fp=self.rfile,
headers=self.headers,
environ={
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': self.headers['Content-Type'],
})
try:
job = form.getvalue('job')[0]
seq = form.getvalue('seq')[0]
t = Thread(target=train_models(), name='train_models', args=(job, seq,))
t.start()
msg = {"code": 0, "error": ""}
except Exception as e:
msg = {"code": 1, "error": str(e)}
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
else:
self.send_error(404, 'File Not Found: %s' % self.path)
if __name__ == '__main__':
try:
# Create a web server and define the handler to manage the
# incoming request
server = HTTPServer(('', PORT_NUMBER), MyHandler)
print('Started http server on port ', PORT_NUMBER)
# Wait forever for incoming http requests
server.serve_forever()
except KeyboardInterrupt:
print('^C received, shutting down the web server')
server.socket.close()