2020-04-29 14:18:18 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
from threading import Thread
|
|
|
|
from threading import Lock
|
|
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
import cgi
|
|
|
|
import json
|
|
|
|
from urllib import parse
|
2020-04-29 14:29:32 +00:00
|
|
|
import pandas as pd
|
2020-06-24 14:55:42 +00:00
|
|
|
import numpy as np
|
2020-04-29 15:22:56 +00:00
|
|
|
import csv
|
2020-05-02 10:32:07 +00:00
|
|
|
import random
|
2020-05-02 10:37:37 +00:00
|
|
|
import traceback
|
2020-06-24 14:55:42 +00:00
|
|
|
import pickle
|
|
|
|
import os
|
|
|
|
from sklearn.ensemble import RandomForestRegressor
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
from sklearn.metrics import mean_squared_error
|
2020-04-29 14:18:18 +00:00
|
|
|
|
2020-06-24 14:55:42 +00:00
|
|
|
PORT_NUMBER = int(os.getenv('Port', 8080))
|
2020-04-29 14:18:18 +00:00
|
|
|
lock = Lock()
|
2020-05-02 09:16:18 +00:00
|
|
|
models = {}
|
|
|
|
|
|
|
|
|
2020-06-24 14:55:42 +00:00
|
|
|
def load_data(trainfile, testfile):
|
|
|
|
traindata = pd.read_csv(trainfile)
|
|
|
|
testdata = pd.read_csv(testfile)
|
|
|
|
feature_data = traindata.iloc[:, 1:-1]
|
|
|
|
label_data = traindata.iloc[:, -1]
|
|
|
|
test_feature = testdata.iloc[:, 1:]
|
|
|
|
return feature_data, label_data, test_feature
|
2020-05-02 09:16:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
def train_models(job):
|
2020-06-24 14:55:42 +00:00
|
|
|
if job not in models or 'features' not in models[job]:
|
|
|
|
return
|
2020-05-02 09:16:18 +00:00
|
|
|
models[job]['lock'].acquire()
|
2020-07-01 03:42:52 +00:00
|
|
|
try:
|
|
|
|
for label in models[job]['labels']:
|
|
|
|
trainfile = './data/' + job + '_' + label + '.csv'
|
|
|
|
traindata = pd.read_csv(trainfile)
|
|
|
|
feature_data = traindata.iloc[:, 1:-1]
|
|
|
|
label_data = traindata.iloc[:, -1]
|
2020-05-02 09:16:18 +00:00
|
|
|
|
2020-07-10 07:21:27 +00:00
|
|
|
x_train, x_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01)
|
2020-07-01 03:42:52 +00:00
|
|
|
params = {
|
|
|
|
'n_estimators': 70,
|
|
|
|
'max_depth': 13,
|
|
|
|
'min_samples_split': 10,
|
|
|
|
'min_samples_leaf': 5, # 10
|
|
|
|
'max_features': len(models[job]['features']) - 1 # 7
|
|
|
|
}
|
|
|
|
# print(params)
|
|
|
|
model = RandomForestRegressor(**params)
|
2020-07-10 07:21:27 +00:00
|
|
|
model.fit(x_train, y_train)
|
2020-06-24 14:55:42 +00:00
|
|
|
|
2020-07-01 03:42:52 +00:00
|
|
|
# save the model to disk
|
|
|
|
modelname = './data/' + job + '_' + label + '.sav'
|
|
|
|
pickle.dump(model, open(modelname, 'wb'))
|
2020-05-02 09:16:18 +00:00
|
|
|
|
2020-07-01 03:42:52 +00:00
|
|
|
# 对测试集进行预测
|
2020-07-10 07:21:27 +00:00
|
|
|
y_pred = model.predict(x_test)
|
2020-07-01 03:42:52 +00:00
|
|
|
# 计算准确率
|
|
|
|
MSE = mean_squared_error(y_test, y_pred)
|
|
|
|
RMSE = np.sqrt(MSE)
|
|
|
|
print('RMSE of {}:{} is {}'.format(job, label, str(RMSE)))
|
|
|
|
except Exception as e:
|
|
|
|
print(traceback.format_exc())
|
|
|
|
print(str(e))
|
2020-05-02 09:16:18 +00:00
|
|
|
|
|
|
|
models[job]['lock'].release()
|
|
|
|
|
|
|
|
|
2020-06-24 14:55:42 +00:00
|
|
|
def predict(job, features):
|
|
|
|
if job not in models or 'features' not in models[job]:
|
2020-05-02 09:16:18 +00:00
|
|
|
return -1, False
|
|
|
|
|
2020-06-24 14:55:42 +00:00
|
|
|
values = [job]
|
|
|
|
for feature in models[job]['features']:
|
2020-07-10 07:21:27 +00:00
|
|
|
if feature in features:
|
|
|
|
values.append(features[feature])
|
|
|
|
else:
|
|
|
|
values.append(0)
|
2020-06-24 14:55:42 +00:00
|
|
|
|
2020-07-09 14:50:39 +00:00
|
|
|
testfile = './data/' + job + '.' + str(random.randint(1000, 9999)) + '.csv'
|
2020-06-24 14:55:42 +00:00
|
|
|
t = ['job']
|
|
|
|
t.extend(models[job]['features'])
|
2020-07-09 14:50:39 +00:00
|
|
|
with open(testfile, 'w', newline='') as csvfile:
|
2020-06-24 14:55:42 +00:00
|
|
|
spamwriter = csv.writer(
|
|
|
|
csvfile, delimiter=',',
|
|
|
|
quotechar='|', quoting=csv.QUOTE_MINIMAL
|
|
|
|
)
|
|
|
|
spamwriter.writerow(t)
|
|
|
|
|
2020-07-09 14:50:39 +00:00
|
|
|
with open(testfile, 'a+', newline='') as csvfile:
|
2020-06-24 14:55:42 +00:00
|
|
|
spamwriter = csv.writer(
|
|
|
|
csvfile, delimiter=',',
|
|
|
|
quotechar='|', quoting=csv.QUOTE_MINIMAL
|
|
|
|
)
|
|
|
|
spamwriter.writerow(values)
|
|
|
|
|
2020-07-09 14:50:39 +00:00
|
|
|
testdata = pd.read_csv(testfile)
|
2020-06-24 14:55:42 +00:00
|
|
|
test_feature = testdata.iloc[:, 1:]
|
|
|
|
|
|
|
|
predictions = {}
|
|
|
|
for label in models[job]['labels']:
|
|
|
|
# load the model from disk
|
|
|
|
modelfile = './data/' + job + '_' + label + '.sav'
|
2020-06-30 08:59:25 +00:00
|
|
|
if not os.path.exists(modelfile):
|
2020-07-09 14:50:39 +00:00
|
|
|
if os.path.exists(testfile):
|
|
|
|
os.remove(testfile)
|
2020-06-30 08:59:25 +00:00
|
|
|
return -1, False
|
2020-06-24 14:55:42 +00:00
|
|
|
model = pickle.load(open(modelfile, 'rb'))
|
|
|
|
preds = model.predict(test_feature)
|
|
|
|
predictions[label] = preds[0]
|
|
|
|
|
2020-07-09 14:50:39 +00:00
|
|
|
if os.path.exists(testfile):
|
|
|
|
os.remove(testfile)
|
2020-06-24 14:55:42 +00:00
|
|
|
return predictions, True
|
2020-05-02 09:16:18 +00:00
|
|
|
|
2020-04-29 14:18:18 +00:00
|
|
|
|
|
|
|
class MyHandler(BaseHTTPRequestHandler):
|
|
|
|
# Handler for the GET requests
|
|
|
|
def do_GET(self):
|
|
|
|
req = parse.urlparse(self.path)
|
|
|
|
query = parse.parse_qs(req.query)
|
|
|
|
|
|
|
|
if req.path == "/ping":
|
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes("pong", "utf-8"))
|
|
|
|
|
|
|
|
elif req.path == "/predict":
|
2020-05-02 09:39:54 +00:00
|
|
|
try:
|
|
|
|
job = query.get('job')[0]
|
2020-06-24 14:55:42 +00:00
|
|
|
features = json.loads(query.get('features')[0])
|
|
|
|
pred, success = predict(job, features)
|
2020-05-02 09:29:33 +00:00
|
|
|
|
2020-05-02 09:39:54 +00:00
|
|
|
if not success:
|
|
|
|
msg = {'code': 2, 'error': "Job " + job + " not exist"}
|
2020-05-02 11:48:21 +00:00
|
|
|
else:
|
2020-06-25 02:10:46 +00:00
|
|
|
msg = {'code': 0, 'error': "", "labels": pred}
|
2020-05-02 09:39:54 +00:00
|
|
|
except Exception as e:
|
2020-05-02 10:37:37 +00:00
|
|
|
track = traceback.format_exc()
|
|
|
|
print(track)
|
2020-05-02 09:39:54 +00:00
|
|
|
msg = {'code': 1, 'error': str(e)}
|
2020-05-02 09:16:18 +00:00
|
|
|
|
2020-04-29 14:29:32 +00:00
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
|
|
|
|
|
2020-04-29 15:32:09 +00:00
|
|
|
elif req.path == "/feed":
|
|
|
|
try:
|
|
|
|
job = query.get('job')[0]
|
2020-06-24 14:55:42 +00:00
|
|
|
features = json.loads(query.get('features')[0])
|
|
|
|
labels = json.loads(query.get('labels')[0])
|
|
|
|
|
|
|
|
lock.acquire()
|
|
|
|
flag = False
|
|
|
|
if job not in models:
|
|
|
|
models[job] = {
|
|
|
|
'lock': Lock(),
|
|
|
|
'features': list(features.keys()),
|
|
|
|
'labels': list(labels.keys())
|
|
|
|
}
|
|
|
|
flag = True
|
|
|
|
lock.release()
|
|
|
|
models[job]['lock'].acquire()
|
|
|
|
|
|
|
|
for label in models[job]['labels']:
|
|
|
|
values = [job]
|
|
|
|
for feature in models[job]['features']:
|
2020-07-10 07:21:27 +00:00
|
|
|
if feature in features:
|
|
|
|
values.append(features[feature])
|
|
|
|
else:
|
|
|
|
values.append(0)
|
|
|
|
if label in labels:
|
|
|
|
values.append(labels[label])
|
|
|
|
else:
|
|
|
|
values.append(0)
|
|
|
|
|
2020-06-24 14:55:42 +00:00
|
|
|
if flag:
|
|
|
|
t = ['job']
|
|
|
|
t.extend(models[job]['features'])
|
|
|
|
t.append(label)
|
|
|
|
with open('./data/' + job + '_' + label + '.csv', 'w', newline='') as csvfile:
|
|
|
|
spamwriter = csv.writer(
|
|
|
|
csvfile, delimiter=',',
|
|
|
|
quotechar='|', quoting=csv.QUOTE_MINIMAL
|
|
|
|
)
|
|
|
|
spamwriter.writerow(t)
|
|
|
|
|
|
|
|
with open('./data/' + job + '_' + label + '.csv', 'a+', newline='') as csvfile:
|
2020-05-02 09:16:18 +00:00
|
|
|
spamwriter = csv.writer(
|
|
|
|
csvfile, delimiter=',',
|
|
|
|
quotechar='|', quoting=csv.QUOTE_MINIMAL
|
|
|
|
)
|
2020-07-01 02:42:55 +00:00
|
|
|
spamwriter.writerow(values)
|
2020-06-24 14:55:42 +00:00
|
|
|
|
|
|
|
models[job]['lock'].release()
|
2020-05-02 09:16:18 +00:00
|
|
|
msg = {'code': 0, 'error': ""}
|
2020-04-29 15:32:09 +00:00
|
|
|
except Exception as e:
|
2020-05-02 09:16:18 +00:00
|
|
|
msg = {'code': 1, 'error': str(e)}
|
2020-06-24 14:55:42 +00:00
|
|
|
track = traceback.format_exc()
|
|
|
|
print(track)
|
2020-04-29 15:32:09 +00:00
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
|
|
|
|
|
2020-04-29 14:29:32 +00:00
|
|
|
elif req.path == "/train":
|
2020-04-29 15:22:56 +00:00
|
|
|
try:
|
2020-05-02 09:19:44 +00:00
|
|
|
job = query.get('job')[0]
|
|
|
|
t = Thread(target=train_models, name='train_models', args=(job,))
|
2020-04-29 15:22:56 +00:00
|
|
|
t.start()
|
2020-05-02 09:16:18 +00:00
|
|
|
msg = {'code': 0, 'error': ""}
|
2020-04-29 15:22:56 +00:00
|
|
|
except Exception as e:
|
2020-05-02 09:16:18 +00:00
|
|
|
msg = {'code': 1, 'error': str(e)}
|
2020-04-29 14:18:18 +00:00
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
|
|
|
|
|
|
|
|
else:
|
|
|
|
self.send_error(404, 'File Not Found: %s' % self.path)
|
|
|
|
|
2020-05-02 09:16:18 +00:00
|
|
|
# Handler for the POST requests
|
|
|
|
def do_POST(self):
|
|
|
|
if self.path == "/train2":
|
|
|
|
form = cgi.FieldStorage(
|
|
|
|
fp=self.rfile,
|
|
|
|
headers=self.headers,
|
|
|
|
environ={
|
|
|
|
'REQUEST_METHOD': 'POST',
|
|
|
|
'CONTENT_TYPE': self.headers['Content-Type'],
|
|
|
|
})
|
|
|
|
try:
|
|
|
|
job = form.getvalue('job')[0]
|
|
|
|
seq = form.getvalue('seq')[0]
|
|
|
|
t = Thread(target=train_models(), name='train_models', args=(job, seq,))
|
|
|
|
t.start()
|
|
|
|
msg = {"code": 0, "error": ""}
|
|
|
|
except Exception as e:
|
|
|
|
msg = {"code": 1, "error": str(e)}
|
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type', 'application/json')
|
|
|
|
self.end_headers()
|
|
|
|
self.wfile.write(bytes(json.dumps(msg), "utf-8"))
|
|
|
|
else:
|
|
|
|
self.send_error(404, 'File Not Found: %s' % self.path)
|
2020-04-29 14:18:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
try:
|
|
|
|
# Create a web server and define the handler to manage the
|
|
|
|
# incoming request
|
|
|
|
server = HTTPServer(('', PORT_NUMBER), MyHandler)
|
|
|
|
print('Started http server on port ', PORT_NUMBER)
|
|
|
|
|
|
|
|
# Wait forever for incoming http requests
|
|
|
|
server.serve_forever()
|
|
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print('^C received, shutting down the web server')
|
|
|
|
|
|
|
|
server.socket.close()
|