From b16c14e08db7dbac41daae823e5716635db55b65 Mon Sep 17 00:00:00 2001 From: Newnius Date: Fri, 10 Jul 2020 15:21:27 +0800 Subject: [PATCH] robust, allow unseen features & labels --- .idea/workspace.xml | 52 +++++++++++++++++++++++++-------------------- serve.py | 24 ++++++++++++++------- 2 files changed, 45 insertions(+), 31 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 1f76ebc..1c0af67 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,10 +1,7 @@ - - - - + - + - - + + @@ -83,6 +80,15 @@ + + + + + + + + + @@ -206,7 +212,7 @@ - + @@ -251,12 +257,12 @@ - + - @@ -267,10 +273,10 @@ - + - + @@ -321,13 +327,6 @@ - - - - - - - @@ -438,14 +437,21 @@ - - + + + + + + + + + diff --git a/serve.py b/serve.py index b2e55e4..cfb94c2 100644 --- a/serve.py +++ b/serve.py @@ -41,7 +41,7 @@ def train_models(job): feature_data = traindata.iloc[:, 1:-1] label_data = traindata.iloc[:, -1] - X_train, X_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01) + x_train, x_test, y_train, y_test = train_test_split(feature_data, label_data, test_size=0.01) params = { 'n_estimators': 70, 'max_depth': 13, @@ -51,14 +51,14 @@ def train_models(job): } # print(params) model = RandomForestRegressor(**params) - model.fit(X_train, y_train) + model.fit(x_train, y_train) # save the model to disk modelname = './data/' + job + '_' + label + '.sav' pickle.dump(model, open(modelname, 'wb')) # 对测试集进行预测 - y_pred = model.predict(X_test) + y_pred = model.predict(x_test) # 计算准确率 MSE = mean_squared_error(y_test, y_pred) RMSE = np.sqrt(MSE) @@ -76,7 +76,10 @@ def predict(job, features): values = [job] for feature in models[job]['features']: - values.append(features[feature]) + if feature in features: + values.append(features[feature]) + else: + values.append(0) testfile = './data/' + job + '.' + str(random.randint(1000, 9999)) + '.csv' t = ['job'] @@ -131,8 +134,6 @@ class MyHandler(BaseHTTPRequestHandler): try: job = query.get('job')[0] features = json.loads(query.get('features')[0]) - msg = {'code': 0, 'error': ""} - pred, success = predict(job, features) if not success: @@ -170,8 +171,15 @@ class MyHandler(BaseHTTPRequestHandler): for label in models[job]['labels']: values = [job] for feature in models[job]['features']: - values.append(features[feature]) - values.append(labels[label]) + if feature in features: + values.append(features[feature]) + else: + values.append(0) + if label in labels: + values.append(labels[label]) + else: + values.append(0) + if flag: t = ['job'] t.extend(models[job]['features'])