# Ensemble learning

Using a single model can not achieve the best results, so we consider using ensemble learning method to further reduce the error.
Integrated learning is the stacking and integration of different models and the selection of optimal parameters.
Thirteen models will be used in ensemble learning. First, import the packages that need to be used.

```from sklearn.model_selection import cross_val_score, GridSearchCV, KFold
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.linear_model import ElasticNet, SGDRegressor, BayesianRidge
from sklearn.kernel_ridge import KernelRidge
from xgboost import XGBRegressor
```

### 1. Basic Modeling and Evaluation

According to the requirements of the competition, the cross validation evaluation index based on RMSE is defined at first.

```#Define cross-validation strategies and evaluation functions
def rmse_cv(model,X,y):
rmse = np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=5))
return rmse
```

Save all the models to be used

```models = [LinearRegression(),Ridge(),Lasso(alpha=0.01,max_iter=10000),RandomForestRegressor(),GradientBoostingRegressor(),SVR(),LinearSVR(),
ElasticNet(alpha=0.001,max_iter=10000),SGDRegressor(max_iter=1000,tol=1e-3),BayesianRidge(),KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5),
ExtraTreesRegressor(),XGBRegressor()]
```

Firstly, 13 models and 5-fold cross-validation are used to evaluate the prediction effect of each model.

```names = ["LR", "Ridge", "Lasso", "RF", "GBR", "SVR", "LinSVR", "Ela","SGD","Bay","Ker","Extra","Xgb"]
for name, model in zip(names, models):
score = rmse_cv(model, X_scaled, y_log)
print("{}: {:.6f}, {:.4f}".format(name,score.mean(),score.std()))
```
```LR: 621771864740.772827, 411989810656.7503
Ridge: 0.118922, 0.0076
Lasso: 0.118914, 0.0065
RF: 0.148033, 0.0049
GBR: 0.123131, 0.0076
SVR: 0.179019, 0.0129
LinSVR: 1.239646, 0.4882
Ela: 0.116366, 0.0070
SGD: 2.814280, 0.5916
Bay: 0.117589, 0.0066
Ker: 0.114100, 0.0081
Extra: 0.141861, 0.0112
Xgb: 0.124880, 0.0058
```

### 2. Adjusting the parameters of each model

Establish a parameter adjustment method, where the evaluation index is RMSE, so the printed score should also be RMSE. Define the crossover mode, specify the model first and then specify the parameters, which is convenient to test multiple models, and use grid cross validation.

```class grid():
def __init__(self,model):
self.model = model

def grid_get(self,X,y,param_grid):
grid_search = GridSearchCV(self.model,param_grid,cv=5, scoring="neg_mean_squared_error")
grid_search.fit(X,y)
print(grid_search.best_params_, np.sqrt(-grid_search.best_score_))
grid_search.cv_results_['mean_test_score'] = np.sqrt(-grid_search.cv_results_['mean_test_score'])
print(pd.DataFrame(grid_search.cv_results_)[['params','mean_test_score','std_test_score']])
```

Result of Lasso()

```grid(Lasso()).grid_get(X_scaled,y_log,{'alpha': [0.0004,0.0005,0.0007,0.0006,0.0009,0.0008],'max_iter':})
```
```{'alpha': 0.0009, 'max_iter': 10000} 0.11557402177546283
params  mean_test_score  std_test_score
0  {'alpha': 0.0004, 'max_iter': 10000}         0.116897        0.001659
1  {'alpha': 0.0005, 'max_iter': 10000}         0.116580        0.001644
2  {'alpha': 0.0007, 'max_iter': 10000}         0.116041        0.001612
3  {'alpha': 0.0006, 'max_iter': 10000}         0.116301        0.001630
4  {'alpha': 0.0009, 'max_iter': 10000}         0.115574        0.001574
5  {'alpha': 0.0008, 'max_iter': 10000}         0.115794        0.001591
```

Result of Ridge()

```grid(Ridge()).grid_get(X_scaled,y_log,{'alpha':[35,40,45,50,55,60,65,70,80,90]})
```
```{'alpha': 90} 0.11753822142197719
params  mean_test_score  std_test_score
0  {'alpha': 35}         0.118097        0.001621
1  {'alpha': 40}         0.118003        0.001607
2  {'alpha': 45}         0.117921        0.001595
3  {'alpha': 50}         0.117849        0.001583
4  {'alpha': 55}         0.117787        0.001573
5  {'alpha': 60}         0.117733        0.001564
6  {'alpha': 65}         0.117686        0.001555
7  {'alpha': 70}         0.117646        0.001547
8  {'alpha': 80}         0.117582        0.001533
9  {'alpha': 90}         0.117538        0.001522
```

After many rounds of testing, the following six models and their corresponding optimal parameters are finally selected.

```lasso = Lasso(alpha=0.0005,max_iter=10000)
ridge = Ridge(alpha=60)
svr = SVR(gamma= 0.0004,kernel='rbf',C=13,epsilon=0.009)
ker = KernelRidge(alpha=0.2 ,kernel='polynomial',degree=3 , coef0=0.8)
ela = ElasticNet(alpha=0.005,l1_ratio=0.08,max_iter=10000)
bay = BayesianRidge()
```

### 3. Integration method, using weighted averaging

Weighted average of each model according to weight

```##Defining a weighted average is equivalent to writing fit_transform()
class AverageWeight(BaseEstimator, RegressorMixin):
def __init__(self,mod,weight):
self.mod = mod##Number of models
self.weight = weight##weight

def fit(self,X,y):
self.models_ = [clone(x) for x in self.mod]
for model in self.models_:
model.fit(X,y)
return self

def predict(self,X):
w = list()
# pred returns the cross-validation results for each model in size (number of models) x (number of samples in validation set)
pred = np.array([model.predict(X) for model in self.models_])
# For each data point, a single model is multiplied by weights and then added up.
for data in range(pred.shape):
single = [pred[model,data]*weight for model,weight in zip(range(pred.shape),self.weight)]
w.append(np.sum(single))
return w
```

Define six initial weights

```w1 = 0.02
w2 = 0.2
w3 = 0.25
w4 = 0.3
w5 = 0.03
w6 = 0.2
```
```weight_avg = AverageWeight(mod = [lasso,ridge,svr,ker,ela,bay],weight=[w1,w2,w3,w4,w5,w6])
print(rmse_cv(weight_avg,X_scaled,y_log))
print(rmse_cv(weight_avg,X_scaled,y_log).mean())##Calculate the mean of cross validation
```
```[0.11579461, 0.12567714, 0.12194691, 0.10298115, 0.11307847]
0.11589565350500453
```

### 4. stacking model stacking ```class stacking(BaseEstimator, RegressorMixin, TransformerMixin):
def __init__(self,mod,meta_model):
self.mod = mod
self.meta_model = meta_model#Meta model
self.kf = KFold(n_splits=5, random_state=42, shuffle=True)##That's the biggest feature of stacking. It's broken down a few times.

def fit(self,X,y):
self.saved_model = [list() for i in self.mod] # Preservation model
oof_train = np.zeros((X.shape, len(self.mod))) # Here we get a matrix of the number of rows multiplied by the number of models in the training set.

for i,model in enumerate(self.mod):#What is returned is the index and the model itself.
for train_index, val_index in self.kf.split(X,y):##The data returned is from the province.
renew_model = clone(model)##Model replication
renew_model.fit(X[train_index], y[train_index])#Training data
self.saved_model[i].append(renew_model)##Add the model in
oof_train[val_index,i] = renew_model.predict(X[val_index])##Used to predict validation sets

self.meta_model.fit(oof_train,y)#Meta model
return self

def predict(self,X):
whole_test = np.column_stack([np.column_stack(model.predict(X) for model in single_model).mean(axis=1)
for single_model in self.saved_model]) ##What you get is the entire test suite.
return self.meta_model.predict(whole_test)#What is returned is to use the metamodel to predict the entire test set.

def get_oof(self,X,y,test_X):
oof = np.zeros((X.shape,len(self.mod)))##Initialization is 0
test_single = np.zeros((test_X.shape,5))##Initialization is 0
test_mean = np.zeros((test_X.shape,len(self.mod)))
for i,model in enumerate(self.mod):##i is a model.
for j, (train_index,val_index) in enumerate(self.kf.split(X,y)):##j is all partitioned data
clone_model = clone(model)##Cloning module is equivalent to copying the model.
clone_model.fit(X[train_index],y[train_index])##Training the segmented data
oof[val_index,i] = clone_model.predict(X[val_index])##Predicting Verification Sets
test_single[:,j] = clone_model.predict(test_X)##Predicting test sets
test_mean[:,i] = test_single.mean(axis=1)##Test Setting Well Means
return oof, test_mean
```
```##After preprocessing, it can be placed in a stacked model for calculation.
a = Imputer().fit_transform(X_scaled)#Equivalent to x
b = Imputer().fit_transform(y_log.values.reshape(-1,1)).ravel()#Equivalent to y
```
```stack_model = stacking(mod=[lasso,ridge,svr,ker,ela,bay],meta_model=ker)
stack_model.fit(a,b)#Model training
pred = np.exp(stack_model.predict(test_X_scaled))#Forecast
result.to_csv("submission2.csv",index=False)
```

The post-upload score was 0.12310

Posted by redesigner on Thu, 12 Sep 2019 02:32:29 -0700