library(tidyverse)
library(xgboost)
library(data.table)
library(randomForest)
library(caret)
housing <- fread("housing.csv", data.table = FALSE)
housing <- housing %>%
mutate(total_bedrooms = ifelse(is.na(total_bedrooms),
median(housing$total_bedrooms, na.rm = T), total_bedrooms))
housing$mean_bedrooms = housing$total_bedrooms/housing$households
housing$mean_rooms = housing$total_rooms/housing$households
housing <- housing %>%
select(-c(total_rooms,total_bedrooms))
#One hot encoding
categories = unique(housing$ocean_proximity)
cat_housing = data.frame(ocean_proximity = housing$ocean_proximity)
for(cat in categories){
cat_housing[,cat] = rep(0, times= nrow(cat_housing))
}
for(i in 1:length(cat_housing$ocean_proximity)){
cat = as.character(cat_housing$ocean_proximity[i])
cat_housing[,cat][i] = 1
}
cat_housing <- cat_housing %>% select(-ocean_proximity)
drops = c('ocean_proximity','median_house_value')
housing_num = housing[ , !(names(housing) %in% drops)]
scaled_housing_num = scale(housing_num)
cleaned_housing = cbind(cat_housing, scaled_housing_num, median_house_value=housing$median_house_value)
set.seed(19) # Set a random seed so that same sample can be reproduced in future runs
sample = sample.int(n = nrow(cleaned_housing), size = floor(.8*nrow(cleaned_housing)), replace = F)
train = cleaned_housing[sample, ] #just the samples
test = cleaned_housing[-sample, ] #everything but the samples
train_y = train[,'median_house_value']
train_x = train[, names(train) !='median_house_value']
test_y = test[,'median_house_value']
test_x = test[, names(test) !='median_house_value']
head(train)
## NEAR BAY <1H OCEAN INLAND NEAR OCEAN ISLAND longitude latitude
## 7677 0 1 0 0 0 0.7385481 -0.8014511
## 9910 1 0 0 0 0 -1.3627414 1.2538381
## 4483 0 1 0 0 0 0.6886362 -0.7359066
## 14674 0 0 0 1 0 1.2276843 -1.3164439
## 19129 0 1 0 0 0 -1.5524064 1.2257476
## 4803 0 1 0 0 0 0.6037860 -0.7499518
## housing_median_age population households median_income mean_bedrooms
## 7677 0.58483810 -0.63090564 -0.73376366 0.36804980 -0.26473554
## 9910 0.42592579 -0.23265833 -0.24204153 -0.21879474 -0.25116427
## 4483 0.26701348 -0.59823347 -0.88023409 -0.99718357 -0.24164088
## 14674 -0.92482882 -0.07459565 -0.48528706 -0.52408655 -0.09622247
## 19129 0.02864502 -0.68477058 -0.71022377 -0.02303953 -0.09299386
## 4803 0.50538194 0.21238967 -0.04587579 -1.17141044 -0.12643615
## mean_rooms median_house_value
## 7677 0.06837588 170800
## 9910 -0.40973944 137500
## 4483 -0.81808817 137500
## 14674 0.10592753 112500
## 19129 0.13683122 306000
## 4803 -0.66896726 125900
rf_model = randomForest(train_x, y = train_y , ntree = 500, importance = TRUE)
names(rf_model) #these are all the different things you can call from the model.
## [1] "call" "type" "predicted" "mse"
## [5] "rsq" "oob.times" "importance" "importanceSD"
## [9] "localImportance" "proximity" "ntree" "mtry"
## [13] "forest" "coefs" "y" "test"
## [17] "inbag"
importance_dat = rf_model$importance
importance_dat
## %IncMSE IncNodePurity
## NEAR BAY 443660928.4 1.461671e+12
## <1H OCEAN 1546015541.3 4.616786e+12
## INLAND 3895405979.2 3.107474e+13
## NEAR OCEAN 456668450.0 2.109282e+12
## ISLAND 597689.1 5.115434e+10
## longitude 6662896402.7 2.531535e+13
## latitude 5355943908.8 2.198212e+13
## housing_median_age 1032310411.4 9.719367e+12
## population 1072996419.2 7.567615e+12
## households 1157015112.3 8.025847e+12
## median_income 8358786022.8 7.218902e+13
## mean_bedrooms 415700691.6 7.498511e+12
## mean_rooms 1776288466.1 2.118150e+13
sorted_predictors = sort(importance_dat[,1], decreasing=TRUE)
sorted_predictors
## median_income longitude latitude INLAND
## 8358786022.8 6662896402.7 5355943908.8 3895405979.2
## mean_rooms <1H OCEAN households population
## 1776288466.1 1546015541.3 1157015112.3 1072996419.2
## housing_median_age NEAR OCEAN NEAR BAY mean_bedrooms
## 1032310411.4 456668450.0 443660928.4 415700691.6
## ISLAND
## 597689.1
oob_prediction = predict(rf_model) #leaving out a data source forces OOB predictions
#you may have noticed that this is avaliable using the $mse in the model options.
#but this way we learn stuff!
train_mse = mean(as.numeric((oob_prediction - train_y)^2))
oob_rmse = sqrt(train_mse)
oob_rmse
## [1] 49203.08
y_pred_rf = predict(rf_model , test_x)
test_mse = mean(((y_pred_rf - test_y)^2))
test_rmse = sqrt(test_mse)
test_rmse # ~48620
## [1] 47806.95
rf_benchmark <- 47806
Una forma más precisa de llamarlo es Gradiente boosteado regularizado (REgularized gradient Boost), el eXtreme hace referencia a eficiencia computacional
XGboost también es un bosque de arboles, pero esos arboles son construidos de forma aditiva a diferencia de un random forest. El algoritmo construye arboles de forma iterativa tal que se minimiaz el error, de manera que al final se obtiene un conjunto optimo de arbolespredictivos. Los arboles crecen de forma secuencial: cada arbol crece usando la información de los arboles previos. Cada arbol es un versión modificada del datase original basada en los arboles previamente construidos. En XGBoost los arboles vienen con parametros de regulariación que evitan el sobreajuste
#put into the xgb matrix format
dtrain = xgb.DMatrix(data = as.matrix(train_x), label = train_y )
dtest = xgb.DMatrix(data = as.matrix(test_x), label = test_y)
# these are the datasets the rmse is evaluated for at each iteration
watchlist = list(train=dtrain, test=dtest)
# try 1 - off a set of paramaters I know work pretty well for most stuff
bst = xgb.train(data = dtrain,
max.depth = 8, # Max profundida de los arboles
eta = 0.3, # Tasa de aprendiazje
nthread = 2,
nround = 1000, # Numero de arboles
watchlist = watchlist,
objective = "reg:linear",
early_stopping_rounds = 50, # Detención del algoritmo si en 50 arboles no ha mejorado
print_every_n = 500)
## [1] train-rmse:171584.296875 test-rmse:171376.609375
## Multiple eval metrics are present. Will use test_rmse for early stopping.
## Will train until test_rmse hasn't improved in 50 rounds.
##
## Stopping. Best iteration:
## [223] train-rmse:8550.336914 test-rmse:46795.085938
####
# Proper use - validation set
####
sample = sample.int(n = nrow(train), size = floor(.8*nrow(train)), replace = F)
train_t = train[sample, ] #just the samples
valid = train[-sample, ] #everything but the samples
train_y = train_t[,'median_house_value']
#if tidyverse was used, dplyr pull function solves the problem:
#train_y = pull(train_t, median_house_value)
train_x = train_t[, names(train_t) !='median_house_value']
valid_y = valid[,'median_house_value']
valid_x = valid[, names(train_t) !='median_house_value']
train_y[1:10]
## [1] 302000 340800 128500 113800 146900 177500 215400 73400 240500 500001
gb_train = xgb.DMatrix(data = as.matrix(train_x), label = train_y )
gb_valid = xgb.DMatrix(data = as.matrix(valid_x), label = valid_y )
#in jupyter the label needs to be in an as.matrix() or I get an error? subtle and annoying differences
# train xgb, evaluating against the validation
watchlist = list(train = gb_train, valid = gb_valid)
bst_slow = xgb.train(data= gb_train,
max.depth = 10,
eta = 0.01,
nthread = 2,
nround = 1000,
watchlist = watchlist,
objective = "reg:linear",
early_stopping_rounds = 50,
print_every_n = 500)
## [1] train-rmse:234876.875000 valid-rmse:234400.703125
## Multiple eval metrics are present. Will use valid_rmse for early stopping.
## Will train until valid_rmse hasn't improved in 50 rounds.
##
## [501] train-rmse:21971.261719 valid-rmse:48034.664062
## [1000] train-rmse:14432.571289 valid-rmse:47050.019531
# recall we ran the following to get the test data in the right format:
# dtest = xgb.DMatrix(data = as.matrix(test_x), label = test_y)
# here I have it with the label taken off, just to remind us its external data xgb would ignore the label though during predictions
dtest = xgb.DMatrix(data = as.matrix(test_x))
#test the model on truly external data
y_hat_valid = predict(bst_slow, dtest)
test_mse = mean(((y_hat_valid - test_y)^2))
test_rmse = sqrt(test_mse)
test_rmse
## [1] 45921.76
# Hiper parametros
max.depths = c(7, 9)
etas = c(0.01, 0.001)
best_params = 0
best_score = 0
count = 1
for( depth in max.depths ){
for( num in etas){
bst_grid = xgb.train(data = gb_train,
max.depth = depth,
eta=num,
nthread = 2,
nround = 1000,
watchlist = watchlist,
objective = "reg:linear",
early_stopping_rounds = 50,
verbose=0)
if(count == 1){
best_params = bst_grid$params
best_score = bst_grid$best_score
count = count + 1
}
else if( bst_grid$best_score < best_score){
best_params = bst_grid$params
best_score = bst_grid$best_score
}
}
}
best_params
## $max_depth
## [1] 9
##
## $eta
## [1] 0.01
##
## $nthread
## [1] 2
##
## $objective
## [1] "reg:linear"
##
## $silent
## [1] 1
best_score
## valid-rmse
## 47111.51
# max_depth of 9, eta of 0.01
bst_tuned = xgb.train( data = gb_train,
max.depth = 7,
eta = 0.01,
nthread = 2,
nround = 1000,
watchlist = watchlist,
objective = "reg:linear",
early_stopping_rounds = 50,
print_every_n = 500)
## [1] train-rmse:234899.500000 valid-rmse:234407.437500
## Multiple eval metrics are present. Will use valid_rmse for early stopping.
## Will train until valid_rmse hasn't improved in 50 rounds.
##
## [501] train-rmse:38416.281250 valid-rmse:50135.722656
## [1000] train-rmse:30785.687500 valid-rmse:47815.984375
y_hat_xgb_grid = predict(bst_tuned, dtest)
test_mse = mean(((y_hat_xgb_grid - test_y)^2))
test_rmse = sqrt(test_mse)
test_rmse # test-rmse: 46675
## [1] 46544.54
test_rmse/rf_benchmark
## [1] 0.973613
# look up the model we are running to see the paramaters
modelLookup("xgbLinear")
## model parameter label forReg forClass probModel
## 1 xgbLinear nrounds # Boosting Iterations TRUE TRUE TRUE
## 2 xgbLinear lambda L2 Regularization TRUE TRUE TRUE
## 3 xgbLinear alpha L1 Regularization TRUE TRUE TRUE
## 4 xgbLinear eta Learning Rate TRUE TRUE TRUE
# set up all the pairwise combinations
xgb_grid_1 = expand.grid(nrounds = c(1000,2000,3000,4000) ,
eta = c(0.01, 0.001, 0.0001),
lambda = 1,
alpha = 0)
xgb_grid_1
## nrounds eta lambda alpha
## 1 1000 1e-02 1 0
## 2 2000 1e-02 1 0
## 3 3000 1e-02 1 0
## 4 4000 1e-02 1 0
## 5 1000 1e-03 1 0
## 6 2000 1e-03 1 0
## 7 3000 1e-03 1 0
## 8 4000 1e-03 1 0
## 9 1000 1e-04 1 0
## 10 2000 1e-04 1 0
## 11 3000 1e-04 1 0
## 12 4000 1e-04 1 0
#here we do one better then a validation set, we use cross validation to
#expand the amount of info we have!
xgb_trcontrol_1 = trainControl(method = "cv",
number = 5,
verboseIter = TRUE,
returnData = FALSE,
returnResamp = "all",
allowParallel = TRUE)
######
#below a grid-search, cross-validation xgboost model in caret
######
xgb_train_1 = train(x = as.matrix(train_x),
y = train_y,
trControl = xgb_trcontrol_1,
tuneGrid = xgb_grid_1,
method = "xgbLinear",
max.depth = 5)
## + Fold1: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## - Fold1: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## + Fold1: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## - Fold1: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## + Fold1: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## - Fold1: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## + Fold1: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## - Fold1: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## + Fold1: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## - Fold1: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## + Fold1: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## - Fold1: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## + Fold1: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## - Fold1: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## + Fold1: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## - Fold1: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## + Fold1: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## - Fold1: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## + Fold1: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## - Fold1: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## + Fold1: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## - Fold1: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## + Fold1: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## - Fold1: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## + Fold2: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## - Fold2: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## + Fold2: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## - Fold2: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## + Fold2: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## - Fold2: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## + Fold2: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## - Fold2: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## + Fold2: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## - Fold2: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## + Fold2: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## - Fold2: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## + Fold2: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## - Fold2: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## + Fold2: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## - Fold2: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## + Fold2: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## - Fold2: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## + Fold2: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## - Fold2: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## + Fold2: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## - Fold2: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## + Fold2: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## - Fold2: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## + Fold3: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## - Fold3: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## + Fold3: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## - Fold3: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## + Fold3: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## - Fold3: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## + Fold3: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## - Fold3: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## + Fold3: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## - Fold3: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## + Fold3: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## - Fold3: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## + Fold3: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## - Fold3: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## + Fold3: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## - Fold3: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## + Fold3: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## - Fold3: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## + Fold3: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## - Fold3: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## + Fold3: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## - Fold3: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## + Fold3: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## - Fold3: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## + Fold4: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## - Fold4: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## + Fold4: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## - Fold4: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## + Fold4: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## - Fold4: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## + Fold4: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## - Fold4: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## + Fold4: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## - Fold4: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## + Fold4: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## - Fold4: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## + Fold4: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## - Fold4: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## + Fold4: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## - Fold4: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## + Fold4: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## - Fold4: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## + Fold4: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## - Fold4: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## + Fold4: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## - Fold4: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## + Fold4: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## - Fold4: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## + Fold5: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## - Fold5: nrounds=1000, eta=1e-02, lambda=1, alpha=0
## + Fold5: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## - Fold5: nrounds=2000, eta=1e-02, lambda=1, alpha=0
## + Fold5: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## - Fold5: nrounds=3000, eta=1e-02, lambda=1, alpha=0
## + Fold5: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## - Fold5: nrounds=4000, eta=1e-02, lambda=1, alpha=0
## + Fold5: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## - Fold5: nrounds=1000, eta=1e-03, lambda=1, alpha=0
## + Fold5: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## - Fold5: nrounds=2000, eta=1e-03, lambda=1, alpha=0
## + Fold5: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## - Fold5: nrounds=3000, eta=1e-03, lambda=1, alpha=0
## + Fold5: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## - Fold5: nrounds=4000, eta=1e-03, lambda=1, alpha=0
## + Fold5: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## - Fold5: nrounds=1000, eta=1e-04, lambda=1, alpha=0
## + Fold5: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## - Fold5: nrounds=2000, eta=1e-04, lambda=1, alpha=0
## + Fold5: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## - Fold5: nrounds=3000, eta=1e-04, lambda=1, alpha=0
## + Fold5: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## - Fold5: nrounds=4000, eta=1e-04, lambda=1, alpha=0
## Aggregating results
## Selecting tuning parameters
## Fitting nrounds = 1000, lambda = 1, alpha = 0, eta = 1e-04 on full training set
names(xgb_train_1)
## [1] "method" "modelInfo" "modelType" "results" "pred"
## [6] "bestTune" "call" "dots" "metric" "control"
## [11] "finalModel" "preProcess" "trainingData" "resample" "resampledCM"
## [16] "perfNames" "maximize" "yLimits" "times" "levels"
xgb_train_1$bestTune
## nrounds lambda alpha eta
## 1 1000 1 0 1e-04
xgb_train_1$method
## [1] "xgbLinear"
summary(xgb_train_1)
## Length Class Mode
## handle 1 xgb.Booster.handle externalptr
## raw 2039214 -none- raw
## niter 1 -none- numeric
## call 6 -none- call
## params 5 -none- list
## callbacks 1 -none- list
## feature_names 13 -none- character
## nfeatures 1 -none- numeric
## xNames 13 -none- character
## problemType 1 -none- character
## tuneValue 4 data.frame list
## obsLevels 1 -none- logical
## param 1 -none- list
#alternatively, you can 'narrow in' on the best paramaters. Repeat the above by taking a range of options around
#the best values found and seeing if high resolution tweaks can provide even further improvements.
xgb_cv_yhat = predict(xgb_train_1 , as.matrix(test_x))
test_mse = mean(((xgb_cv_yhat - test_y)^2))
test_rmse = sqrt(test_mse)
test_rmse # 46641... pretty close to the 'by hand' grid search!
## [1] 46672.62
Podemos ensamblar los resultados al final si lo más importante el la predicción que la interpretación del modelo.
#y_pred_rf #random forest
#y_hat_valid #xgBoost with validation
#y_hat_xgb_grid #xgBoost grid search
#xgb_cv_yhat #xgBoost caret cross validation
length(y_hat_xgb_grid)
## [1] 4128
blend_pred = (y_hat_valid * .25) + (y_pred_rf * .25) + (xgb_cv_yhat * .25) + (y_hat_xgb_grid * .25)
length(blend_pred)
## [1] 4128
length(blend_pred) == length(y_hat_xgb_grid)
## [1] TRUE
blend_test_mse = mean(((blend_pred - test_y)^2))
blend_test_rmse = sqrt(blend_test_mse)
blend_test_rmse
## [1] 44684.51