mirror of
https://github.com/newnius/YAO-optimizer.git
synced 2025-06-06 06:41:55 +00:00
update
This commit is contained in:
parent
3ccc32945c
commit
e4a9ceafe5
@ -3,6 +3,8 @@
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="0aedafd8-e57e-462a-beda-65af0b91f3df" name="Default Changelist" comment="">
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/rf.py" beforeDir="false" afterPath="$PROJECT_DIR$/rf.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/serve.py" beforeDir="false" afterPath="$PROJECT_DIR$/serve.py" afterDir="false" />
|
||||
</list>
|
||||
<ignored path="$PROJECT_DIR$/out/" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
@ -53,7 +55,7 @@
|
||||
<entry key="dummy" value="5" />
|
||||
<entry key="gitignore" value="12" />
|
||||
<entry key="md" value="234" />
|
||||
<entry key="py" value="5489" />
|
||||
<entry key="py" value="6046" />
|
||||
<entry key="sh" value="5" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
@ -63,7 +65,7 @@
|
||||
<entry key="Dockerfile" value="81" />
|
||||
<entry key="Markdown" value="234" />
|
||||
<entry key="PLAIN_TEXT" value="26" />
|
||||
<entry key="Python" value="5489" />
|
||||
<entry key="Python" value="6046" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
</session>
|
||||
@ -73,8 +75,8 @@
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/serve.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="284">
|
||||
<caret line="135" column="46" lean-forward="true" selection-start-line="135" selection-start-column="46" selection-end-line="135" selection-end-column="46" />
|
||||
<state relative-caret-position="-1610">
|
||||
<caret line="67" lean-forward="true" selection-start-line="67" selection-end-line="67" />
|
||||
<folding>
|
||||
<element signature="e#18#46#0" expanded="true" />
|
||||
</folding>
|
||||
@ -85,8 +87,12 @@
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/rf.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="90">
|
||||
<caret line="7" column="36" lean-forward="true" selection-start-line="7" selection-start-column="36" selection-end-line="7" selection-end-column="36" />
|
||||
<state relative-caret-position="277">
|
||||
<caret line="258" column="22" lean-forward="true" selection-start-line="258" selection-start-column="22" selection-end-line="258" selection-end-column="22" />
|
||||
<folding>
|
||||
<element signature="e#21#39#0" expanded="true" />
|
||||
<marker date="1593330072988" expanded="true" signature="1440:1442" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@ -135,6 +141,7 @@
|
||||
<find>joblib</find>
|
||||
<find>traceback</find>
|
||||
<find>models[job]</find>
|
||||
<find>range</find>
|
||||
</findStrings>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
@ -155,9 +162,9 @@
|
||||
<option value="$PROJECT_DIR$/data/data3.csv" />
|
||||
<option value="$PROJECT_DIR$/train.py" />
|
||||
<option value="$PROJECT_DIR$/README.md" />
|
||||
<option value="$PROJECT_DIR$/serve_rf.py" />
|
||||
<option value="$PROJECT_DIR$/rf.py" />
|
||||
<option value="$PROJECT_DIR$/serve.py" />
|
||||
<option value="$PROJECT_DIR$/serve_rf.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
@ -200,7 +207,7 @@
|
||||
<component name="PropertiesComponent">
|
||||
<property name="WebServerToolWindowFactoryState" value="false" />
|
||||
<property name="aspect.path.notification.shown" value="true" />
|
||||
<property name="com.android.tools.idea.instantapp.provision.ProvisionBeforeRunTaskProvider.myTimeStamp" value="1593014440494" />
|
||||
<property name="com.android.tools.idea.instantapp.provision.ProvisionBeforeRunTaskProvider.myTimeStamp" value="1593445354606" />
|
||||
<property name="go.gopath.indexing.explicitly.defined" value="true" />
|
||||
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
|
||||
<property name="nodejs_npm_path_reset_for_default_project" value="true" />
|
||||
@ -245,12 +252,12 @@
|
||||
<workItem from="1588319878551" duration="41219000" />
|
||||
<workItem from="1588426002721" duration="336000" />
|
||||
<workItem from="1588427782140" duration="326000" />
|
||||
<workItem from="1592809729651" duration="9925000" />
|
||||
<workItem from="1592809729651" duration="19564000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TimeTrackingManager">
|
||||
<option name="totallyTimeSpent" value="68779000" />
|
||||
<option name="totallyTimeSpent" value="78418000" />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="0" y="0" width="1280" height="800" extended-state="0" />
|
||||
@ -394,13 +401,6 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/rf.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="90">
|
||||
<caret line="7" column="36" lean-forward="true" selection-start-line="7" selection-start-column="36" selection-end-line="7" selection-end-column="36" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/serve_lstm.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="90">
|
||||
@ -412,10 +412,21 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/rf.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="277">
|
||||
<caret line="258" column="22" lean-forward="true" selection-start-line="258" selection-start-column="22" selection-end-line="258" selection-end-column="22" />
|
||||
<folding>
|
||||
<element signature="e#21#39#0" expanded="true" />
|
||||
<marker date="1593330072988" expanded="true" signature="1440:1442" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/serve.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="284">
|
||||
<caret line="135" column="46" lean-forward="true" selection-start-line="135" selection-start-column="46" selection-end-line="135" selection-end-column="46" />
|
||||
<state relative-caret-position="-1610">
|
||||
<caret line="67" lean-forward="true" selection-start-line="67" selection-end-line="67" />
|
||||
<folding>
|
||||
<element signature="e#18#46#0" expanded="true" />
|
||||
</folding>
|
||||
|
148
rf.py
148
rf.py
@ -1,6 +1,7 @@
|
||||
# _*_coding:utf-8_*_
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
|
||||
def load_data(trainfile, testfile):
|
||||
@ -8,11 +9,12 @@ def load_data(trainfile, testfile):
|
||||
testdata = pd.read_csv(testfile)
|
||||
feature_data = traindata.iloc[:, 1:-1]
|
||||
label_data = traindata.iloc[:, -1]
|
||||
test_feature = testdata.iloc[:, 1:]
|
||||
return feature_data, label_data, test_feature
|
||||
test_feature = testdata.iloc[:, 1:-1]
|
||||
test_label = testdata.iloc[:, -1]
|
||||
return feature_data, label_data, test_feature, test_label
|
||||
|
||||
|
||||
def random_forest_train(feature_data, label_data, test_feature, submitfile):
|
||||
def random_forest_train(feature_data, label_data, test_feature):
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
@ -23,9 +25,9 @@ def random_forest_train(feature_data, label_data, test_feature, submitfile):
|
||||
'max_depth': 13,
|
||||
'min_samples_split': 10,
|
||||
'min_samples_leaf': 5, # 10
|
||||
'max_features': 7
|
||||
'max_features': len(X_train.columns)
|
||||
}
|
||||
print(X_test)
|
||||
# print(X_test)
|
||||
model = RandomForestRegressor(**params)
|
||||
model.fit(X_train, y_train)
|
||||
# 对测试集进行预测
|
||||
@ -33,23 +35,115 @@ def random_forest_train(feature_data, label_data, test_feature, submitfile):
|
||||
# 计算准确率
|
||||
MSE = mean_squared_error(y_test, y_pred)
|
||||
RMSE = np.sqrt(MSE)
|
||||
print(RMSE)
|
||||
|
||||
# print(abs(y_test - y_pred) / y_test)
|
||||
# print(RMSE)
|
||||
'''
|
||||
submit = pd.read_csv(submitfile)
|
||||
print(submit)
|
||||
print(model.predict(test_feature))
|
||||
submit['CPU'] = model.predict(test_feature)
|
||||
submit.to_csv('my_random_forest_prediction1.csv', index=False)
|
||||
print(submit)
|
||||
print(model.predict(test_feature))
|
||||
'''
|
||||
return model.predict(test_feature)
|
||||
|
||||
|
||||
def random_forest_parameter_tuning1(feature_data, label_data, test_feature):
|
||||
def linear_regression_train(feature_data, label_data, test_feature):
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
||||
params = {}
|
||||
# print(X_test)
|
||||
model = LinearRegression(**params)
|
||||
model.fit(X_train, y_train)
|
||||
# 对测试集进行预测
|
||||
y_pred = model.predict(X_test)
|
||||
# 计算准确率
|
||||
MSE = mean_squared_error(y_test, y_pred)
|
||||
RMSE = np.sqrt(MSE)
|
||||
# print(abs(y_test - y_pred) / y_test)
|
||||
# print(RMSE)
|
||||
return model.predict(test_feature)
|
||||
|
||||
|
||||
def adaboost_train(feature_data, label_data, test_feature):
|
||||
from sklearn.ensemble import AdaBoostRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
||||
params = {}
|
||||
# print(X_test)
|
||||
model = AdaBoostRegressor(**params)
|
||||
model.fit(X_train, y_train)
|
||||
# 对测试集进行预测
|
||||
y_pred = model.predict(X_test)
|
||||
# 计算准确率
|
||||
MSE = mean_squared_error(y_test, y_pred)
|
||||
RMSE = np.sqrt(MSE)
|
||||
# print(abs(y_test - y_pred) / y_test)
|
||||
# print(RMSE)
|
||||
return model.predict(test_feature)
|
||||
|
||||
|
||||
def gbdt_train(feature_data, label_data, test_feature):
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
||||
params = {
|
||||
'loss': 'ls',
|
||||
'n_estimators': 70,
|
||||
'max_depth': 13,
|
||||
'min_samples_split': 10,
|
||||
'min_samples_leaf': 5, # 10
|
||||
'max_features': len(X_train.columns)
|
||||
}
|
||||
# print(X_test)
|
||||
model = GradientBoostingRegressor(**params)
|
||||
model.fit(X_train, y_train)
|
||||
# 对测试集进行预测
|
||||
y_pred = model.predict(X_test)
|
||||
# 计算准确率
|
||||
MSE = mean_squared_error(y_test, y_pred)
|
||||
RMSE = np.sqrt(MSE)
|
||||
# print(abs(y_test - y_pred) / y_test)
|
||||
# print(RMSE)
|
||||
return model.predict(test_feature)
|
||||
|
||||
|
||||
def decision_tree_train(feature_data, label_data, test_feature):
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
||||
params = {
|
||||
'max_depth': 13,
|
||||
}
|
||||
# print(X_test)
|
||||
model = DecisionTreeRegressor(**params)
|
||||
model.fit(X_train, y_train)
|
||||
# 对测试集进行预测
|
||||
y_pred = model.predict(X_test)
|
||||
# 计算准确率
|
||||
MSE = mean_squared_error(y_test, y_pred)
|
||||
RMSE = np.sqrt(MSE)
|
||||
# print(abs(y_test - y_pred) / y_test)
|
||||
# print(RMSE)
|
||||
return model.predict(test_feature)
|
||||
|
||||
|
||||
def random_forest_parameter_tuning1(feature_data, label_data):
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.23)
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
||||
param_test1 = {
|
||||
'n_estimators': range(10, 71, 10)
|
||||
}
|
||||
@ -67,13 +161,13 @@ def random_forest_parameter_tuning1(feature_data, label_data, test_feature):
|
||||
return model.best_score_, model.best_params_
|
||||
|
||||
|
||||
def random_forest_parameter_tuning2(feature_data, label_data, test_feature):
|
||||
def random_forest_parameter_tuning2(feature_data, label_data):
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.23)
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
||||
param_test2 = {
|
||||
'max_depth': range(3, 14, 2),
|
||||
'min_samples_split': range(50, 201, 20)
|
||||
@ -98,7 +192,7 @@ def random_forest_parameter_tuning3(feature_data, label_data, test_feature):
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.23)
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
||||
param_test3 = {
|
||||
'min_samples_split': range(10, 90, 20),
|
||||
'min_samples_leaf': range(10, 60, 10),
|
||||
@ -123,7 +217,7 @@ def random_forest_parameter_tuning4(feature_data, label_data, test_feature):
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.23)
|
||||
X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
||||
param_test4 = {
|
||||
'max_features': range(3, 9, 2)
|
||||
}
|
||||
@ -142,8 +236,28 @@ def random_forest_parameter_tuning4(feature_data, label_data, test_feature):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
algorithm = os.getenv('algorithm', 'rf')
|
||||
trainfile = 'data/train.csv'
|
||||
testfile = 'data/test.csv'
|
||||
submitfile = 'data/sample_submit.csv'
|
||||
feature_data, label_data, test_feature = load_data(trainfile, testfile)
|
||||
random_forest_train(feature_data, label_data, test_feature, submitfile)
|
||||
feature_data, label_data, test_feature, test_label = load_data(trainfile, testfile)
|
||||
if algorithm == 'lr':
|
||||
y_pred = linear_regression_train(feature_data, label_data, test_feature)
|
||||
elif algorithm == 'ada':
|
||||
y_pred = adaboost_train(feature_data, label_data, test_feature)
|
||||
elif algorithm == 'gbdt':
|
||||
y_pred = adaboost_train(feature_data, label_data, test_feature)
|
||||
elif algorithm == 'tree':
|
||||
y_pred = decision_tree_train(feature_data, label_data, test_feature)
|
||||
else:
|
||||
y_pred = random_forest_train(feature_data, label_data, test_feature)
|
||||
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
MSE = mean_squared_error(test_label, y_pred)
|
||||
RMSE = np.sqrt(MSE)
|
||||
var = np.var(test_label)
|
||||
r2 = 1 - MSE / var
|
||||
# print(abs(test_label - y_pred) / test_label)
|
||||
print(RMSE, r2)
|
||||
for i in range(20):
|
||||
print("{},{},{}".format(test_label[i], y_pred[i], (y_pred[i] - test_label[i]) / test_label[i]))
|
||||
|
5
serve.py
5
serve.py
@ -62,7 +62,7 @@ def train_models(job):
|
||||
# 计算准确率
|
||||
MSE = mean_squared_error(y_test, y_pred)
|
||||
RMSE = np.sqrt(MSE)
|
||||
print('RMSE of ' + job + ' is ' + str(RMSE))
|
||||
print('RMSE of {}:{} is {}'.format(job, label, str(RMSE)))
|
||||
|
||||
models[job]['lock'].release()
|
||||
|
||||
@ -181,7 +181,8 @@ class MyHandler(BaseHTTPRequestHandler):
|
||||
csvfile, delimiter=',',
|
||||
quotechar='|', quoting=csv.QUOTE_MINIMAL
|
||||
)
|
||||
spamwriter.writerow(values)
|
||||
for i in range(5):
|
||||
spamwriter.writerow(values)
|
||||
|
||||
models[job]['lock'].release()
|
||||
msg = {'code': 0, 'error': ""}
|
||||
|
Loading…
Reference in New Issue
Block a user