diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 1e04210..8f5cea4 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,7 +1,11 @@ - + + + + + @@ -214,7 +205,7 @@ - + @@ -259,26 +250,25 @@ - + - - - + - + @@ -319,34 +309,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -364,13 +326,6 @@ - - - - - - - @@ -381,13 +336,6 @@ - - - - - - - @@ -398,13 +346,17 @@ - - - - - - - + + + + + + + + + + + @@ -419,20 +371,54 @@ - + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -442,8 +428,19 @@ - - + + + + + + + + + + + + + diff --git a/compare.sh b/compare.sh index 3cd65bd..52d9752 100755 --- a/compare.sh +++ b/compare.sh @@ -10,19 +10,33 @@ fi awk 'FNR>1' ${FILE} | shuf > ./data/train.csv head -n 1 ${FILE} > ./tmp.csv head -n 1 ${FILE} > ./data/test.csv -cat ./data/train.csv >> ./tmp.csv -tail -n 50 tmp.csv >> ./data/test.csv +head -n -50 ./data/train.csv >> ./tmp.csv +tail -n 50 ./data/train.csv >> ./data/test.csv +cnt=`wc ${FILE} | awk '{print $1}'` +step=50 +maxn=$((cnt / step * step + 1)) -for step in {51..501..50} -do +step=51 +while [[ ${step} -le ${maxn} ]]; do echo "step=${step}" tail -n ${step} tmp.csv > ./data/train.csv - algorithm=lr python3 rf.py - algorithm=rf python3 rf.py - algorithm=tree python3 rf.py - algorithm=ada python3 rf.py - algorithm=gbdt python3 rf.py + echo 'lr:' + display_diff=0 algorithm=lr python3 rf.py + + echo 'rf:' + display_diff=0 algorithm=rf python3 rf.py + + echo 'dt:' + display_diff=0 algorithm=dt python3 rf.py + + echo 'ada:' + display_diff=0 algorithm=ada python3 rf.py + + echo 'gbdt:' + display_diff=0 algorithm=gbdt python3 rf.py + echo -e "\n" + step=$(($step + 50)) done rm ./data/train.csv diff --git a/rf.py b/rf.py index a61c63d..ead9666 100644 --- a/rf.py +++ b/rf.py @@ -246,7 +246,7 @@ if __name__ == '__main__': y_pred = adaboost_train(feature_data, label_data, test_feature) elif algorithm == 'gbdt': y_pred = adaboost_train(feature_data, label_data, test_feature) - elif algorithm == 'tree': + elif algorithm == 'dt': y_pred = decision_tree_train(feature_data, label_data, test_feature) else: y_pred = random_forest_train(feature_data, label_data, test_feature) @@ -259,5 +259,7 @@ if __name__ == '__main__': r2 = 1 - MSE / var # print(abs(test_label - y_pred) / test_label) print(RMSE, r2) - for i in range(20): - print("{},{},{}".format(test_label[i], y_pred[i], (y_pred[i] - test_label[i]) / test_label[i])) + display_diff = os.getenv('display_diff', '0') + if display_diff == '1': + for i in range(20): + print("{},{},{}".format(test_label[i], y_pred[i], (y_pred[i] - test_label[i]) / test_label[i])) diff --git a/serve.py b/serve.py index df8701f..54e8b72 100644 --- a/serve.py +++ b/serve.py @@ -185,8 +185,7 @@ class MyHandler(BaseHTTPRequestHandler): csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL ) - for i in range(5): - spamwriter.writerow(values) + spamwriter.writerow(values) models[job]['lock'].release() msg = {'code': 0, 'error': ""}