#install.packages("tidyverse")
#install.packages("mlr3verse")
#install.packages("kknn")
#install.packages("e1071")
#install.packages("ranger")
#install.packages("rpart.plot")
#install.packages("glmnet")
#install.packages("xgboost")
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: mlr3
## Loading required package: mlr3filters
## Loading required package: mlr3learners
## Loading required package: mlr3pipelines
## Loading required package: mlr3tuning
## Loading required package: mlr3viz
## Loading required package: paradox
data_house <- read.csv("https://raw.githubusercontent.com/Rifqiaulya/poladatawaktu/main/house_price1.csv",stringsAsFactors = TRUE)
glimpse(data_house)
## Rows: 1,460
## Columns: 81
## $ Id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
## $ MSSubClass <int> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60, 20, ...
## $ MSZoning <fct> RL, RL, RL, RL, RL, RL, RL, RL, RM, RL, RL, RL, RL, R...
## $ LotFrontage <int> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, NA, 9...
## $ LotArea <int> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10382, ...
## $ Street <fct> Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave,...
## $ Alley <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ LotShape <fct> Reg, Reg, IR1, IR1, IR1, IR1, Reg, IR1, Reg, Reg, Reg...
## $ LandContour <fct> Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl...
## $ Utilities <fct> AllPub, AllPub, AllPub, AllPub, AllPub, AllPub, AllPu...
## $ LotConfig <fct> Inside, FR2, Inside, Corner, FR2, Inside, Inside, Cor...
## $ LandSlope <fct> Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl...
## $ Neighborhood <fct> CollgCr, Veenker, CollgCr, Crawfor, NoRidge, Mitchel,...
## $ Condition1 <fct> Norm, Feedr, Norm, Norm, Norm, Norm, Norm, PosN, Arte...
## $ Condition2 <fct> Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm,...
## $ BldgType <fct> 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam,...
## $ HouseStyle <fct> 2Story, 1Story, 2Story, 2Story, 2Story, 1.5Fin, 1Stor...
## $ OverallQual <int> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, 6, 4,...
## $ OverallCond <int> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, 7, 5,...
## $ YearBuilt <int> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, 1931,...
## $ YearRemodAdd <int> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, 1950,...
## $ RoofStyle <fct> Gable, Gable, Gable, Gable, Gable, Gable, Gable, Gabl...
## $ RoofMatl <fct> CompShg, CompShg, CompShg, CompShg, CompShg, CompShg,...
## $ Exterior1st <fct> VinylSd, MetalSd, VinylSd, Wd Sdng, VinylSd, VinylSd,...
## $ Exterior2nd <fct> VinylSd, MetalSd, VinylSd, Wd Shng, VinylSd, VinylSd,...
## $ MasVnrType <fct> BrkFace, None, BrkFace, None, BrkFace, None, Stone, S...
## $ MasVnrArea <int> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, 0, 30...
## $ ExterQual <fct> Gd, TA, Gd, TA, Gd, TA, Gd, TA, TA, TA, TA, Ex, TA, G...
## $ ExterCond <fct> TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, T...
## $ Foundation <fct> PConc, CBlock, PConc, BrkTil, PConc, Wood, PConc, CBl...
## $ BsmtQual <fct> Gd, Gd, Gd, TA, Gd, Gd, Ex, Gd, TA, TA, TA, Ex, TA, G...
## $ BsmtCond <fct> TA, TA, TA, Gd, TA, TA, TA, TA, TA, TA, TA, TA, TA, T...
## $ BsmtExposure <fct> No, Gd, Mn, No, Av, No, Av, Mn, No, No, No, No, No, A...
## $ BsmtFinType1 <fct> GLQ, ALQ, GLQ, ALQ, GLQ, GLQ, GLQ, ALQ, Unf, GLQ, Rec...
## $ BsmtFinSF1 <int> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851, 906,...
## $ BsmtFinType2 <fct> Unf, Unf, Unf, Unf, Unf, Unf, Unf, BLQ, Unf, Unf, Unf...
## $ BsmtFinSF2 <int> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ BsmtUnfSF <int> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140, 134,...
## $ TotalBsmtSF <int> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952, 991,...
## $ Heating <fct> GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA,...
## $ HeatingQC <fct> Ex, Ex, Ex, Gd, Ex, Ex, Ex, Ex, Gd, Ex, Ex, Ex, TA, E...
## $ CentralAir <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y,...
## $ Electrical <fct> SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrk...
## $ X1stFlrSF <int> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022, 107...
## $ X2ndFlrSF <int> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, 1142,...
## $ LowQualFinSF <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ GrLivArea <int> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, 1774,...
## $ BsmtFullBath <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,...
## $ BsmtHalfBath <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ FullBath <int> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, 1, 2,...
## $ HalfBath <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,...
## $ BedroomAbvGr <int> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, 2, 2,...
## $ KitchenAbvGr <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2,...
## $ KitchenQual <fct> Gd, TA, Gd, Gd, Gd, TA, Gd, TA, TA, TA, TA, Ex, TA, G...
## $ TotRmsAbvGrd <int> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5, 5, 6...
## $ Functional <fct> Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, Min1, Typ, Ty...
## $ Fireplaces <int> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, 1, 0,...
## $ FireplaceQu <fct> NA, TA, TA, Gd, TA, NA, Gd, TA, TA, TA, NA, Gd, NA, G...
## $ GarageType <fct> Attchd, Attchd, Attchd, Detchd, Attchd, Attchd, Attch...
## $ GarageYrBlt <int> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, 1931,...
## $ GarageFinish <fct> RFn, RFn, RFn, Unf, RFn, Unf, RFn, RFn, Unf, RFn, Unf...
## $ GarageCars <int> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, 2, 2,...
## $ GarageArea <int> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205, 384...
## $ GarageQual <fct> TA, TA, TA, TA, TA, TA, TA, TA, Fa, Gd, TA, TA, TA, T...
## $ GarageCond <fct> TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, T...
## $ PavedDrive <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y,...
## $ WoodDeckSF <int> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, 140, ...
## $ OpenPorchSF <int> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, 33, 2...
## $ EnclosedPorch <int> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, 176, ...
## $ X3SsnPorch <int> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ ScreenPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, ...
## $ PoolArea <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ PoolQC <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Fence <fct> NA, NA, NA, NA, NA, MnPrv, NA, NA, NA, NA, NA, NA, NA...
## $ MiscFeature <fct> NA, NA, NA, NA, NA, Shed, NA, Shed, NA, NA, NA, NA, N...
## $ MiscVal <int> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0, 0, 7...
## $ MoSold <int> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, 7, 3,...
## $ YrSold <int> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, 2008,...
## $ SaleType <fct> WD, WD, WD, WD, WD, WD, WD, WD, WD, WD, WD, New, WD, ...
## $ SaleCondition <fct> Normal, Normal, Normal, Abnorml, Normal, Normal, Norm...
## $ SalePrice <int> 208500, 181500, 223500, 140000, 250000, 143000, 30700...
data_house <- data_house %>%
select(SalePrice,LotArea,
LotFrontage,MSSubClass,YrSold) %>% na.omit
TaskClassifnew digunakan jika peubah respon kita berupa peubah biner atau multiclass, sedangkan TaskRegrnew digunakan jika responya berupa peubah numerik.
task_house = TaskRegr$new(id="house",backend = data_house,
target = "SalePrice")
id yang merupakan nama dari task (bisa diisi dengan nama apapun). backend adalah data yang ingin dimodelkan dengan catatan peubah respon-nya harus berupa factor. target adalah nama kolom yang dijadikan peubah respon. Sebagai contoh model KNN dari package cv_glmnet memiliki argumen-argumen yang bisa dilihat dengan menggunakan
as.data.table(lrn("regr.cv_glmnet")$param_set)
## id class lower upper levels nlevels
## 1: family ParamFct NA NA gaussian,poisson 2
## 2: offset ParamUty NA NA Inf
## 3: alpha ParamDbl 0 1 Inf
## 4: nfolds ParamInt 3 Inf Inf
## 5: type.measure ParamFct NA NA deviance,class,auc,mse,mae 5
## 6: s ParamDbl 0 Inf Inf
## 7: lambda.min.ratio ParamDbl 0 1 Inf
## 8: lambda ParamUty NA NA Inf
## 9: standardize ParamLgl NA NA TRUE,FALSE 2
## 10: intercept ParamLgl NA NA TRUE,FALSE 2
## 11: thresh ParamDbl 0 Inf Inf
## 12: dfmax ParamInt 0 Inf Inf
## 13: pmax ParamInt 0 Inf Inf
## 14: exclude ParamInt 1 Inf Inf
## 15: penalty.factor ParamUty NA NA Inf
## 16: lower.limits ParamUty NA NA Inf
## 17: upper.limits ParamUty NA NA Inf
## 18: maxit ParamInt 1 Inf Inf
## 19: type.gaussian ParamFct NA NA covariance,naive 2
## 20: type.logistic ParamFct NA NA Newton,modified.Newton 2
## 21: type.multinomial ParamFct NA NA ungrouped,grouped 2
## 22: keep ParamLgl NA NA TRUE,FALSE 2
## 23: parallel ParamLgl NA NA TRUE,FALSE 2
## 24: trace.it ParamInt 0 1 2
## 25: foldid ParamUty NA NA Inf
## 26: alignment ParamFct NA NA lambda,fraction 2
## 27: grouped ParamLgl NA NA TRUE,FALSE 2
## 28: gamma ParamUty NA NA Inf
## 29: relax ParamLgl NA NA TRUE,FALSE 2
## 30: fdev ParamDbl 0 1 Inf
## 31: devmax ParamDbl 0 1 Inf
## 32: eps ParamDbl 0 1 Inf
## 33: epsnr ParamDbl 0 1 Inf
## 34: big ParamDbl -Inf Inf Inf
## 35: mnlam ParamInt 1 Inf Inf
## 36: pmin ParamDbl 0 1 Inf
## 37: exmx ParamDbl -Inf Inf Inf
## 38: prec ParamDbl -Inf Inf Inf
## 39: mxit ParamInt 1 Inf Inf
## 40: mxitnr ParamInt 1 Inf Inf
## 41: newoffset ParamUty NA NA Inf
## 42: predict.gamma ParamDbl -Inf Inf Inf
## id class lower upper levels nlevels
## is_bounded special_vals default storage_type tags
## 1: TRUE <list[0]> gaussian character train
## 2: FALSE <list[0]> list train
## 3: TRUE <list[0]> 1 numeric train
## 4: FALSE <list[0]> 10 integer train
## 5: TRUE <list[0]> deviance character train
## 6: FALSE <list[2]> lambda.1se numeric predict
## 7: TRUE <list[0]> <NoDefault[3]> numeric train
## 8: FALSE <list[0]> <NoDefault[3]> list train
## 9: TRUE <list[0]> TRUE logical train
## 10: TRUE <list[0]> TRUE logical train
## 11: FALSE <list[0]> 1e-07 numeric train
## 12: FALSE <list[0]> <NoDefault[3]> integer train
## 13: FALSE <list[0]> <NoDefault[3]> integer train
## 14: FALSE <list[0]> <NoDefault[3]> integer train
## 15: FALSE <list[0]> <NoDefault[3]> list train
## 16: FALSE <list[0]> <NoDefault[3]> list train
## 17: FALSE <list[0]> <NoDefault[3]> list train
## 18: FALSE <list[0]> 100000 integer train
## 19: TRUE <list[0]> <NoDefault[3]> character train
## 20: TRUE <list[0]> <NoDefault[3]> character train
## 21: TRUE <list[0]> <NoDefault[3]> character train
## 22: TRUE <list[0]> FALSE logical train
## 23: TRUE <list[0]> FALSE logical train
## 24: TRUE <list[0]> 0 integer train
## 25: FALSE <list[0]> list train
## 26: TRUE <list[0]> lambda character train
## 27: TRUE <list[0]> TRUE logical train
## 28: FALSE <list[0]> <NoDefault[3]> list train
## 29: TRUE <list[0]> FALSE logical train
## 30: TRUE <list[0]> 1e-05 numeric train
## 31: TRUE <list[0]> 0.999 numeric train
## 32: TRUE <list[0]> 1e-06 numeric train
## 33: TRUE <list[0]> 1e-08 numeric train
## 34: FALSE <list[0]> 9.9e+35 numeric train
## 35: FALSE <list[0]> 5 integer train
## 36: TRUE <list[0]> 1e-09 numeric train
## 37: FALSE <list[0]> 250 numeric train
## 38: FALSE <list[0]> 1e-10 numeric train
## 39: FALSE <list[0]> 100 integer train
## 40: FALSE <list[0]> 25 integer train
## 41: FALSE <list[0]> <NoDefault[3]> list predict
## 42: FALSE <list[0]> 1 numeric predict
## is_bounded special_vals default storage_type tags
regresi_linear <- lrn("regr.lm")
regresi_lasso <- lrn("regr.cv_glmnet",alpha=1)
pohon_regresi <- lrn("regr.rpart")
gradient_boosting <- lrn("regr.xgboost")
random_forest <- lrn("regr.ranger",importance="impurity")
#info
as.data.table(mlr_resamplings)
## key params iters
## 1: bootstrap repeats,ratio 30
## 2: custom 0
## 3: cv folds 10
## 4: holdout ratio 1
## 5: insample 1
## 6: loo NA
## 7: repeated_cv repeats,folds 100
## 8: subsampling repeats,ratio 30
#splitting
resample_holdout = rsmp("holdout", ratio = 0.8)
# Regresi Linear
regresi_linear$train(task = task_house)
summary(regresi_linear$model)
##
## Call:
## stats::lm(formula = task$formula(), data = task$data())
##
## Residuals:
## Min 1Q Median 3Q Max
## -373288 -46598 -18389 31847 515175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.661e+06 3.339e+06 0.797 0.426
## LotArea 2.098e+00 3.089e-01 6.791 1.75e-11 ***
## LotFrontage 9.852e+02 1.070e+02 9.204 < 2e-16 ***
## MSSubClass 9.960e+01 5.569e+01 1.788 0.074 .
## YrSold -1.283e+03 1.663e+03 -0.772 0.441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76630 on 1196 degrees of freedom
## Multiple R-squared: 0.1583, Adjusted R-squared: 0.1555
## F-statistic: 56.24 on 4 and 1196 DF, p-value: < 2.2e-16
# Regression Tree
pohon_regresi$train(task = task_house)
rpart.plot::rpart.plot(pohon_regresi$model,roundint = FALSE,type = 5,tweak = 1.5)
# Random Forest
random_forest$train(task = task_house)
random_forest$model$variable.importance
## LotArea LotFrontage MSSubClass YrSold
## 3.418793e+12 2.315635e+12 1.252661e+12 4.847773e+11
# Merapikan
importance <- data.frame(Predictors = names(random_forest$model$variable.importance),
impurity = random_forest$model$variable.importance
)
rownames(importance) <- NULL
importance %>% arrange(desc(impurity))
## Predictors impurity
## 1 LotArea 3.418793e+12
## 2 LotFrontage 2.315635e+12
## 3 MSSubClass 1.252661e+12
## 4 YrSold 4.847773e+11
train_test = resample(task = task_house,
learner = regresi_linear,
resampling = resample_holdout,
store_models = TRUE
)
## INFO [18:00:19.070] Applying learner 'regr.lm' on task 'house' (iter 1/1)
jika ingin melakukan training untuk model lainnya cukup ganti learner=regresi_linear dengan regresi_lasso,pohon_regresi,random_forest dan gradient_boosting. Hasil testing
prediksi_test = as.data.table(train_test$prediction())
head(prediksi_test)
## row_id truth response
## 1: 15 159000 177096.1
## 2: 17 325300 220854.0
## 3: 20 129900 150515.0
## 4: 28 165500 173621.5
## 5: 29 277500 172660.1
## 6: 35 160000 188076.4
Untuk menghitung performa model dengan menggunakan ukuran akurasi bisa menggunakan sintaks dibawah ini. Fungsi msr merupakan fungsi yang dapat mengakses ukuran-ukuran kebaikan model yang ada di dalam package mlr3.
train_test$aggregate(msr("regr.rmse"))
## regr.rmse
## 75207.14
Jika banyak kriteria kebaikan
train_test$aggregate(list(msr("regr.rmse"),
msr("regr.mae"),
msr("regr.mape"),
msr("regr.srho")
)) %>% round(3)
## regr.rmse regr.mae regr.mape regr.srho
## 75207.138 55674.671 0.319 0.493
model_house <- list(regresi_linear,
regresi_lasso,
pohon_regresi,
random_forest,
gradient_boosting
)
resample_cv = rsmp("cv",folds=10)
Fungsi benchmark_design digunakan untuk memasukan informasi-inforamsi yang dibutuhkan untuk komparasi, seperti data yang digunakan (tasks), model yang ingin dikomparasi (learners) dan metode pembagian data yang digunakan (resamplings).
design <- benchmark_grid(tasks = task_house,
learners = model_house,
resamplings = resample_cv
)
Kemudian fungsi benchmark digunakan untuk menjalankan/ running komparasi model berdasarkan desain yang sudah dirancang.Karena terdapat 5 model dan masing-masing model menjalankan 10-folds cross-validation maka iterasi yang dilakukan ada sebanyak 50 kali.
bmr = benchmark(design,store_models = TRUE)
## INFO [18:00:20.166] Benchmark with 50 resampling iterations
## INFO [18:00:20.187] Applying learner 'regr.cv_glmnet' on task 'house' (iter 7/10)
## INFO [18:00:20.471] Applying learner 'regr.cv_glmnet' on task 'house' (iter 6/10)
## INFO [18:00:20.576] Applying learner 'regr.rpart' on task 'house' (iter 4/10)
## INFO [18:00:20.623] Applying learner 'regr.lm' on task 'house' (iter 7/10)
## INFO [18:00:20.658] Applying learner 'regr.ranger' on task 'house' (iter 2/10)
## INFO [18:00:20.887] Applying learner 'regr.ranger' on task 'house' (iter 5/10)
## INFO [18:00:21.337] Applying learner 'regr.cv_glmnet' on task 'house' (iter 3/10)
## INFO [18:00:21.515] Applying learner 'regr.rpart' on task 'house' (iter 1/10)
## INFO [18:00:21.552] Applying learner 'regr.cv_glmnet' on task 'house' (iter 9/10)
## INFO [18:00:21.770] Applying learner 'regr.lm' on task 'house' (iter 10/10)
## INFO [18:00:21.805] Applying learner 'regr.ranger' on task 'house' (iter 9/10)
## INFO [18:00:22.027] Applying learner 'regr.xgboost' on task 'house' (iter 2/10)
## [18:00:22] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:22.096] Applying learner 'regr.rpart' on task 'house' (iter 3/10)
## INFO [18:00:22.123] Applying learner 'regr.rpart' on task 'house' (iter 9/10)
## INFO [18:00:22.139] Applying learner 'regr.cv_glmnet' on task 'house' (iter 10/10)
## INFO [18:00:22.318] Applying learner 'regr.lm' on task 'house' (iter 5/10)
## INFO [18:00:22.352] Applying learner 'regr.xgboost' on task 'house' (iter 5/10)
## [18:00:22] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:22.397] Applying learner 'regr.xgboost' on task 'house' (iter 10/10)
## [18:00:22] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:22.786] Applying learner 'regr.lm' on task 'house' (iter 4/10)
## INFO [18:00:22.819] Applying learner 'regr.lm' on task 'house' (iter 9/10)
## INFO [18:00:22.855] Applying learner 'regr.rpart' on task 'house' (iter 8/10)
## INFO [18:00:22.893] Applying learner 'regr.lm' on task 'house' (iter 8/10)
## INFO [18:00:22.927] Applying learner 'regr.cv_glmnet' on task 'house' (iter 1/10)
## INFO [18:00:23.103] Applying learner 'regr.rpart' on task 'house' (iter 7/10)
## INFO [18:00:23.124] Applying learner 'regr.rpart' on task 'house' (iter 10/10)
## INFO [18:00:23.164] Applying learner 'regr.ranger' on task 'house' (iter 1/10)
## INFO [18:00:23.425] Applying learner 'regr.xgboost' on task 'house' (iter 1/10)
## [18:00:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:23.445] Applying learner 'regr.xgboost' on task 'house' (iter 9/10)
## [18:00:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:23.475] Applying learner 'regr.xgboost' on task 'house' (iter 8/10)
## [18:00:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:23.493] Applying learner 'regr.ranger' on task 'house' (iter 7/10)
## INFO [18:00:23.655] Applying learner 'regr.lm' on task 'house' (iter 3/10)
## INFO [18:00:23.670] Applying learner 'regr.lm' on task 'house' (iter 6/10)
## INFO [18:00:23.687] Applying learner 'regr.xgboost' on task 'house' (iter 6/10)
## [18:00:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:23.706] Applying learner 'regr.rpart' on task 'house' (iter 6/10)
## INFO [18:00:23.727] Applying learner 'regr.ranger' on task 'house' (iter 3/10)
## INFO [18:00:24.169] Applying learner 'regr.cv_glmnet' on task 'house' (iter 5/10)
## INFO [18:00:24.338] Applying learner 'regr.rpart' on task 'house' (iter 2/10)
## INFO [18:00:24.360] Applying learner 'regr.lm' on task 'house' (iter 1/10)
## INFO [18:00:24.384] Applying learner 'regr.ranger' on task 'house' (iter 10/10)
## INFO [18:00:24.555] Applying learner 'regr.xgboost' on task 'house' (iter 4/10)
## [18:00:24] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:24.571] Applying learner 'regr.lm' on task 'house' (iter 2/10)
## INFO [18:00:24.587] Applying learner 'regr.xgboost' on task 'house' (iter 7/10)
## [18:00:24] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:24.609] Applying learner 'regr.ranger' on task 'house' (iter 8/10)
## INFO [18:00:24.761] Applying learner 'regr.ranger' on task 'house' (iter 4/10)
## INFO [18:00:24.921] Applying learner 'regr.cv_glmnet' on task 'house' (iter 4/10)
## INFO [18:00:25.344] Applying learner 'regr.cv_glmnet' on task 'house' (iter 2/10)
## INFO [18:00:25.486] Applying learner 'regr.cv_glmnet' on task 'house' (iter 8/10)
## INFO [18:00:25.676] Applying learner 'regr.xgboost' on task 'house' (iter 3/10)
## [18:00:25] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO [18:00:25.721] Applying learner 'regr.rpart' on task 'house' (iter 5/10)
## INFO [18:00:25.765] Applying learner 'regr.ranger' on task 'house' (iter 6/10)
## INFO [18:00:25.977] Finished benchmark
Berdasarkan nilai akurasi model yang memiliki performa prediksi terbaik adalah model Random Forest.
result = bmr$aggregate(list(msr("regr.rmse"),
msr("regr.mae"),
msr("regr.mape")
)
)
result
## nr resample_result task_id learner_id resampling_id iters regr.rmse
## 1: 1 <ResampleResult[21]> house regr.lm cv 10 78637.18
## 2: 2 <ResampleResult[21]> house regr.cv_glmnet cv 10 81680.89
## 3: 3 <ResampleResult[21]> house regr.rpart cv 10 68659.46
## 4: 4 <ResampleResult[21]> house regr.ranger cv 10 62786.19
## 5: 5 <ResampleResult[21]> house regr.xgboost cv 10 147491.93
## regr.mae regr.mape
## 1: 56013.97 0.3455374
## 2: 59912.05 0.3799168
## 3: 47402.72 0.2831441
## 4: 42345.02 0.2473125
## 5: 127576.96 0.6752066
data_house_baru <- read.csv("https://raw.githubusercontent.com/Rifqiaulya/poladatawaktu/main/house_price2.csv")
glimpse(data_house_baru)
## Rows: 1,459
## Columns: 80
## $ Id <int> 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469,...
## $ MSSubClass <int> 20, 20, 60, 60, 120, 60, 20, 60, 20, 20, 120, 160, 16...
## $ MSZoning <chr> "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL",...
## $ LotFrontage <int> 80, 81, 74, 78, 43, 75, NA, 63, 85, 70, 26, 21, 21, 2...
## $ LotArea <int> 11622, 14267, 13830, 9978, 5005, 10000, 7980, 8402, 1...
## $ Street <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave...
## $ Alley <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ LotShape <chr> "Reg", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1...
## $ LandContour <chr> "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl", "Lvl...
## $ Utilities <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "Al...
## $ LotConfig <chr> "Inside", "Corner", "Inside", "Inside", "Inside", "Co...
## $ LandSlope <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl...
## $ Neighborhood <chr> "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr", "G...
## $ Condition1 <chr> "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm", "Nor...
## $ Condition2 <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm...
## $ BldgType <chr> "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "1Fam", "1F...
## $ HouseStyle <chr> "1Story", "1Story", "2Story", "2Story", "1Story", "2S...
## $ OverallQual <int> 5, 6, 5, 6, 8, 6, 6, 6, 7, 4, 7, 6, 5, 6, 7, 9, 8, 9,...
## $ OverallCond <int> 6, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5,...
## $ YearBuilt <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, 1990,...
## $ YearRemodAdd <int> 1961, 1958, 1998, 1998, 1992, 1994, 2007, 1998, 1990,...
## $ RoofStyle <chr> "Gable", "Hip", "Gable", "Gable", "Gable", "Gable", "...
## $ RoofMatl <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg"...
## $ Exterior1st <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdBoard"...
## $ Exterior2nd <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdBoard"...
## $ MasVnrType <chr> "None", "BrkFace", "None", "BrkFace", "None", "None",...
## $ MasVnrArea <int> 0, 108, 0, 20, 0, 0, 0, 0, 0, 0, 0, 504, 492, 0, 0, 1...
## $ ExterQual <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA",...
## $ ExterCond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", "TA",...
## $ Foundation <chr> "CBlock", "CBlock", "PConc", "PConc", "PConc", "PConc...
## $ BsmtQual <chr> "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", "Gd",...
## $ BsmtCond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ BsmtExposure <chr> "No", "No", "No", "No", "No", "No", "No", "No", "Gd",...
## $ BsmtFinType1 <chr> "Rec", "ALQ", "GLQ", "GLQ", "ALQ", "Unf", "ALQ", "Unf...
## $ BsmtFinSF1 <int> 468, 923, 791, 602, 263, 0, 935, 0, 637, 804, 1051, 1...
## $ BsmtFinType2 <chr> "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf...
## $ BsmtFinSF2 <int> 144, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, 0, 0,...
## $ BsmtUnfSF <int> 270, 406, 137, 324, 1017, 763, 233, 789, 663, 0, 354,...
## $ TotalBsmtSF <int> 882, 1329, 928, 926, 1280, 763, 1168, 789, 1300, 882,...
## $ Heating <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA...
## $ HeatingQC <chr> "TA", "TA", "Gd", "Ex", "Ex", "Gd", "Ex", "Gd", "Gd",...
## $ CentralAir <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ Electrical <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr",...
## $ X1stFlrSF <int> 896, 1329, 928, 926, 1280, 763, 1187, 789, 1341, 882,...
## $ X2ndFlrSF <int> 0, 0, 701, 678, 0, 892, 0, 676, 0, 0, 0, 504, 567, 60...
## $ LowQualFinSF <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ GrLivArea <int> 896, 1329, 1629, 1604, 1280, 1655, 1187, 1465, 1341, ...
## $ BsmtFullBath <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,...
## $ BsmtHalfBath <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ FullBath <int> 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,...
## $ HalfBath <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,...
## $ BedroomAbvGr <int> 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, 3, 3,...
## $ KitchenAbvGr <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ KitchenQual <chr> "TA", "Gd", "TA", "Gd", "Gd", "TA", "TA", "TA", "Gd",...
## $ TotRmsAbvGrd <int> 5, 6, 6, 7, 5, 7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 10, 7, 7...
## $ Functional <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ...
## $ Fireplaces <int> 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,...
## $ FireplaceQu <chr> NA, NA, "TA", "Gd", NA, "TA", NA, "Gd", "Po", NA, "Fa...
## $ GarageType <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "At...
## $ GarageYrBlt <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, 1990,...
## $ GarageFinish <chr> "Unf", "Unf", "Fin", "Fin", "RFn", "Fin", "Fin", "Fin...
## $ GarageCars <int> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 3, 3, 3,...
## $ GarageArea <int> 730, 312, 482, 470, 506, 440, 420, 393, 506, 525, 511...
## $ GarageQual <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ GarageCond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ PavedDrive <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ WoodDeckSF <int> 140, 393, 212, 360, 0, 157, 483, 0, 192, 240, 203, 27...
## $ OpenPorchSF <int> 0, 36, 34, 36, 82, 84, 21, 75, 0, 0, 68, 0, 0, 0, 30,...
## $ EnclosedPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ X3SsnPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ ScreenPorch <int> 120, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ PoolArea <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ PoolQC <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Fence <chr> "MnPrv", NA, "MnPrv", NA, NA, NA, "GdPrv", NA, NA, "M...
## $ MiscFeature <chr> NA, "Gar2", NA, NA, NA, NA, "Shed", NA, NA, NA, NA, N...
## $ MiscVal <int> 0, 12500, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ MoSold <int> 6, 6, 3, 6, 1, 4, 3, 5, 2, 4, 6, 2, 3, 6, 6, 1, 6, 6,...
## $ YrSold <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010,...
## $ SaleType <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD",...
## $ SaleCondition <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "No...
data_house_baru <- data_house_baru %>% select(names(data_house)[-1]) %>% na.omit
Prediksi dengan random forest.Fungsi $train digunakan untuk melakukan training pada data keseluruhan (tanpa ada proses pembagian data). Terakhir, fungsi predict_newdata digunakan untuk melakukan prediksi pada data baru.
random_forest$train(task = task_house)
prediksi_random_forest_new <- random_forest$predict_newdata(newdata = data_house_baru)
as.data.table(prediksi_random_forest_new)
## row_id truth response
## 1: 1 NA 205178.50
## 2: 2 NA 222272.98
## 3: 3 NA 279907.76
## 4: 4 NA 235509.60
## 5: 5 NA 220573.00
## ---
## 1228: 1228 NA 91558.57
## 1229: 1229 NA 100683.52
## 1230: 1230 NA 283116.58
## 1231: 1231 NA 161688.99
## 1232: 1232 NA 242193.50