Packages

#install.packages("tidyverse")
#install.packages("mlr3verse")
#install.packages("kknn")
#install.packages("e1071")
#install.packages("ranger")
#install.packages("rpart.plot")
#install.packages("glmnet")
#install.packages("xgboost")

Library

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: mlr3
## Loading required package: mlr3filters
## Loading required package: mlr3learners
## Loading required package: mlr3pipelines
## Loading required package: mlr3tuning
## Loading required package: mlr3viz
## Loading required package: paradox

Import Data

data_house <- read.csv("https://raw.githubusercontent.com/Rifqiaulya/poladatawaktu/main/house_price1.csv",stringsAsFactors = TRUE)
glimpse(data_house)
## Rows: 1,460
## Columns: 81
## $ Id            <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
## $ MSSubClass    <int> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60, 20, ...
## $ MSZoning      <fct> RL, RL, RL, RL, RL, RL, RL, RL, RM, RL, RL, RL, RL, R...
## $ LotFrontage   <int> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, NA, 9...
## $ LotArea       <int> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10382, ...
## $ Street        <fct> Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave,...
## $ Alley         <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ LotShape      <fct> Reg, Reg, IR1, IR1, IR1, IR1, Reg, IR1, Reg, Reg, Reg...
## $ LandContour   <fct> Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl...
## $ Utilities     <fct> AllPub, AllPub, AllPub, AllPub, AllPub, AllPub, AllPu...
## $ LotConfig     <fct> Inside, FR2, Inside, Corner, FR2, Inside, Inside, Cor...
## $ LandSlope     <fct> Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl...
## $ Neighborhood  <fct> CollgCr, Veenker, CollgCr, Crawfor, NoRidge, Mitchel,...
## $ Condition1    <fct> Norm, Feedr, Norm, Norm, Norm, Norm, Norm, PosN, Arte...
## $ Condition2    <fct> Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm,...
## $ BldgType      <fct> 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam,...
## $ HouseStyle    <fct> 2Story, 1Story, 2Story, 2Story, 2Story, 1.5Fin, 1Stor...
## $ OverallQual   <int> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, 6, 4,...
## $ OverallCond   <int> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, 7, 5,...
## $ YearBuilt     <int> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, 1931,...
## $ YearRemodAdd  <int> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, 1950,...
## $ RoofStyle     <fct> Gable, Gable, Gable, Gable, Gable, Gable, Gable, Gabl...
## $ RoofMatl      <fct> CompShg, CompShg, CompShg, CompShg, CompShg, CompShg,...
## $ Exterior1st   <fct> VinylSd, MetalSd, VinylSd, Wd Sdng, VinylSd, VinylSd,...
## $ Exterior2nd   <fct> VinylSd, MetalSd, VinylSd, Wd Shng, VinylSd, VinylSd,...
## $ MasVnrType    <fct> BrkFace, None, BrkFace, None, BrkFace, None, Stone, S...
## $ MasVnrArea    <int> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, 0, 30...
## $ ExterQual     <fct> Gd, TA, Gd, TA, Gd, TA, Gd, TA, TA, TA, TA, Ex, TA, G...
## $ ExterCond     <fct> TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, T...
## $ Foundation    <fct> PConc, CBlock, PConc, BrkTil, PConc, Wood, PConc, CBl...
## $ BsmtQual      <fct> Gd, Gd, Gd, TA, Gd, Gd, Ex, Gd, TA, TA, TA, Ex, TA, G...
## $ BsmtCond      <fct> TA, TA, TA, Gd, TA, TA, TA, TA, TA, TA, TA, TA, TA, T...
## $ BsmtExposure  <fct> No, Gd, Mn, No, Av, No, Av, Mn, No, No, No, No, No, A...
## $ BsmtFinType1  <fct> GLQ, ALQ, GLQ, ALQ, GLQ, GLQ, GLQ, ALQ, Unf, GLQ, Rec...
## $ BsmtFinSF1    <int> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851, 906,...
## $ BsmtFinType2  <fct> Unf, Unf, Unf, Unf, Unf, Unf, Unf, BLQ, Unf, Unf, Unf...
## $ BsmtFinSF2    <int> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ BsmtUnfSF     <int> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140, 134,...
## $ TotalBsmtSF   <int> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952, 991,...
## $ Heating       <fct> GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA,...
## $ HeatingQC     <fct> Ex, Ex, Ex, Gd, Ex, Ex, Ex, Ex, Gd, Ex, Ex, Ex, TA, E...
## $ CentralAir    <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y,...
## $ Electrical    <fct> SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrk...
## $ X1stFlrSF     <int> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022, 107...
## $ X2ndFlrSF     <int> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, 1142,...
## $ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ GrLivArea     <int> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, 1774,...
## $ BsmtFullBath  <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,...
## $ BsmtHalfBath  <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ FullBath      <int> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, 1, 2,...
## $ HalfBath      <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,...
## $ BedroomAbvGr  <int> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, 2, 2,...
## $ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2,...
## $ KitchenQual   <fct> Gd, TA, Gd, Gd, Gd, TA, Gd, TA, TA, TA, TA, Ex, TA, G...
## $ TotRmsAbvGrd  <int> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5, 5, 6...
## $ Functional    <fct> Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, Min1, Typ, Ty...
## $ Fireplaces    <int> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, 1, 0,...
## $ FireplaceQu   <fct> NA, TA, TA, Gd, TA, NA, Gd, TA, TA, TA, NA, Gd, NA, G...
## $ GarageType    <fct> Attchd, Attchd, Attchd, Detchd, Attchd, Attchd, Attch...
## $ GarageYrBlt   <int> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, 1931,...
## $ GarageFinish  <fct> RFn, RFn, RFn, Unf, RFn, Unf, RFn, RFn, Unf, RFn, Unf...
## $ GarageCars    <int> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, 2, 2,...
## $ GarageArea    <int> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205, 384...
## $ GarageQual    <fct> TA, TA, TA, TA, TA, TA, TA, TA, Fa, Gd, TA, TA, TA, T...
## $ GarageCond    <fct> TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, T...
## $ PavedDrive    <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y,...
## $ WoodDeckSF    <int> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, 140, ...
## $ OpenPorchSF   <int> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, 33, 2...
## $ EnclosedPorch <int> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, 176, ...
## $ X3SsnPorch    <int> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ ScreenPorch   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, ...
## $ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ PoolQC        <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Fence         <fct> NA, NA, NA, NA, NA, MnPrv, NA, NA, NA, NA, NA, NA, NA...
## $ MiscFeature   <fct> NA, NA, NA, NA, NA, Shed, NA, Shed, NA, NA, NA, NA, N...
## $ MiscVal       <int> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0, 0, 7...
## $ MoSold        <int> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, 7, 3,...
## $ YrSold        <int> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, 2008,...
## $ SaleType      <fct> WD, WD, WD, WD, WD, WD, WD, WD, WD, WD, WD, New, WD, ...
## $ SaleCondition <fct> Normal, Normal, Normal, Abnorml, Normal, Normal, Norm...
## $ SalePrice     <int> 208500, 181500, 223500, 140000, 250000, 143000, 30700...

Filter data

data_house <- data_house %>% 
  select(SalePrice,LotArea,
         LotFrontage,MSSubClass,YrSold) %>% na.omit

Import ke ekosistem mlr3

TaskClassifnew digunakan jika peubah respon kita berupa peubah biner atau multiclass, sedangkan TaskRegrnew digunakan jika responya berupa peubah numerik.

task_house = TaskRegr$new(id="house",backend = data_house,
                                target = "SalePrice")

id yang merupakan nama dari task (bisa diisi dengan nama apapun). backend adalah data yang ingin dimodelkan dengan catatan peubah respon-nya harus berupa factor. target adalah nama kolom yang dijadikan peubah respon. Sebagai contoh model KNN dari package cv_glmnet memiliki argumen-argumen yang bisa dilihat dengan menggunakan

as.data.table(lrn("regr.cv_glmnet")$param_set)
##                   id    class lower upper                     levels nlevels
##  1:           family ParamFct    NA    NA           gaussian,poisson       2
##  2:           offset ParamUty    NA    NA                                Inf
##  3:            alpha ParamDbl     0     1                                Inf
##  4:           nfolds ParamInt     3   Inf                                Inf
##  5:     type.measure ParamFct    NA    NA deviance,class,auc,mse,mae       5
##  6:                s ParamDbl     0   Inf                                Inf
##  7: lambda.min.ratio ParamDbl     0     1                                Inf
##  8:           lambda ParamUty    NA    NA                                Inf
##  9:      standardize ParamLgl    NA    NA                 TRUE,FALSE       2
## 10:        intercept ParamLgl    NA    NA                 TRUE,FALSE       2
## 11:           thresh ParamDbl     0   Inf                                Inf
## 12:            dfmax ParamInt     0   Inf                                Inf
## 13:             pmax ParamInt     0   Inf                                Inf
## 14:          exclude ParamInt     1   Inf                                Inf
## 15:   penalty.factor ParamUty    NA    NA                                Inf
## 16:     lower.limits ParamUty    NA    NA                                Inf
## 17:     upper.limits ParamUty    NA    NA                                Inf
## 18:            maxit ParamInt     1   Inf                                Inf
## 19:    type.gaussian ParamFct    NA    NA           covariance,naive       2
## 20:    type.logistic ParamFct    NA    NA     Newton,modified.Newton       2
## 21: type.multinomial ParamFct    NA    NA          ungrouped,grouped       2
## 22:             keep ParamLgl    NA    NA                 TRUE,FALSE       2
## 23:         parallel ParamLgl    NA    NA                 TRUE,FALSE       2
## 24:         trace.it ParamInt     0     1                                  2
## 25:           foldid ParamUty    NA    NA                                Inf
## 26:        alignment ParamFct    NA    NA            lambda,fraction       2
## 27:          grouped ParamLgl    NA    NA                 TRUE,FALSE       2
## 28:            gamma ParamUty    NA    NA                                Inf
## 29:            relax ParamLgl    NA    NA                 TRUE,FALSE       2
## 30:             fdev ParamDbl     0     1                                Inf
## 31:           devmax ParamDbl     0     1                                Inf
## 32:              eps ParamDbl     0     1                                Inf
## 33:            epsnr ParamDbl     0     1                                Inf
## 34:              big ParamDbl  -Inf   Inf                                Inf
## 35:            mnlam ParamInt     1   Inf                                Inf
## 36:             pmin ParamDbl     0     1                                Inf
## 37:             exmx ParamDbl  -Inf   Inf                                Inf
## 38:             prec ParamDbl  -Inf   Inf                                Inf
## 39:             mxit ParamInt     1   Inf                                Inf
## 40:           mxitnr ParamInt     1   Inf                                Inf
## 41:        newoffset ParamUty    NA    NA                                Inf
## 42:    predict.gamma ParamDbl  -Inf   Inf                                Inf
##                   id    class lower upper                     levels nlevels
##     is_bounded special_vals        default storage_type    tags
##  1:       TRUE    <list[0]>       gaussian    character   train
##  2:      FALSE    <list[0]>                        list   train
##  3:       TRUE    <list[0]>              1      numeric   train
##  4:      FALSE    <list[0]>             10      integer   train
##  5:       TRUE    <list[0]>       deviance    character   train
##  6:      FALSE    <list[2]>     lambda.1se      numeric predict
##  7:       TRUE    <list[0]> <NoDefault[3]>      numeric   train
##  8:      FALSE    <list[0]> <NoDefault[3]>         list   train
##  9:       TRUE    <list[0]>           TRUE      logical   train
## 10:       TRUE    <list[0]>           TRUE      logical   train
## 11:      FALSE    <list[0]>          1e-07      numeric   train
## 12:      FALSE    <list[0]> <NoDefault[3]>      integer   train
## 13:      FALSE    <list[0]> <NoDefault[3]>      integer   train
## 14:      FALSE    <list[0]> <NoDefault[3]>      integer   train
## 15:      FALSE    <list[0]> <NoDefault[3]>         list   train
## 16:      FALSE    <list[0]> <NoDefault[3]>         list   train
## 17:      FALSE    <list[0]> <NoDefault[3]>         list   train
## 18:      FALSE    <list[0]>         100000      integer   train
## 19:       TRUE    <list[0]> <NoDefault[3]>    character   train
## 20:       TRUE    <list[0]> <NoDefault[3]>    character   train
## 21:       TRUE    <list[0]> <NoDefault[3]>    character   train
## 22:       TRUE    <list[0]>          FALSE      logical   train
## 23:       TRUE    <list[0]>          FALSE      logical   train
## 24:       TRUE    <list[0]>              0      integer   train
## 25:      FALSE    <list[0]>                        list   train
## 26:       TRUE    <list[0]>         lambda    character   train
## 27:       TRUE    <list[0]>           TRUE      logical   train
## 28:      FALSE    <list[0]> <NoDefault[3]>         list   train
## 29:       TRUE    <list[0]>          FALSE      logical   train
## 30:       TRUE    <list[0]>          1e-05      numeric   train
## 31:       TRUE    <list[0]>          0.999      numeric   train
## 32:       TRUE    <list[0]>          1e-06      numeric   train
## 33:       TRUE    <list[0]>          1e-08      numeric   train
## 34:      FALSE    <list[0]>        9.9e+35      numeric   train
## 35:      FALSE    <list[0]>              5      integer   train
## 36:       TRUE    <list[0]>          1e-09      numeric   train
## 37:      FALSE    <list[0]>            250      numeric   train
## 38:      FALSE    <list[0]>          1e-10      numeric   train
## 39:      FALSE    <list[0]>            100      integer   train
## 40:      FALSE    <list[0]>             25      integer   train
## 41:      FALSE    <list[0]> <NoDefault[3]>         list predict
## 42:      FALSE    <list[0]>              1      numeric predict
##     is_bounded special_vals        default storage_type    tags

Pemodelan

regresi_linear <- lrn("regr.lm")
regresi_lasso <- lrn("regr.cv_glmnet",alpha=1)
pohon_regresi <- lrn("regr.rpart")
gradient_boosting <- lrn("regr.xgboost")
random_forest <- lrn("regr.ranger",importance="impurity")

Pembagian Data

#info
as.data.table(mlr_resamplings)
##            key        params iters
## 1:   bootstrap repeats,ratio    30
## 2:      custom                   0
## 3:          cv         folds    10
## 4:     holdout         ratio     1
## 5:    insample                   1
## 6:         loo                  NA
## 7: repeated_cv repeats,folds   100
## 8: subsampling repeats,ratio    30
#splitting
resample_holdout = rsmp("holdout", ratio = 0.8)

Interpretasi Model

# Regresi Linear
regresi_linear$train(task = task_house)
summary(regresi_linear$model)
## 
## Call:
## stats::lm(formula = task$formula(), data = task$data())
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -373288  -46598  -18389   31847  515175 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.661e+06  3.339e+06   0.797    0.426    
## LotArea      2.098e+00  3.089e-01   6.791 1.75e-11 ***
## LotFrontage  9.852e+02  1.070e+02   9.204  < 2e-16 ***
## MSSubClass   9.960e+01  5.569e+01   1.788    0.074 .  
## YrSold      -1.283e+03  1.663e+03  -0.772    0.441    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 76630 on 1196 degrees of freedom
## Multiple R-squared:  0.1583, Adjusted R-squared:  0.1555 
## F-statistic: 56.24 on 4 and 1196 DF,  p-value: < 2.2e-16
# Regression Tree
pohon_regresi$train(task = task_house)
rpart.plot::rpart.plot(pohon_regresi$model,roundint = FALSE,type = 5,tweak = 1.5)

# Random Forest
random_forest$train(task = task_house)
random_forest$model$variable.importance
##      LotArea  LotFrontage   MSSubClass       YrSold 
## 3.418793e+12 2.315635e+12 1.252661e+12 4.847773e+11
# Merapikan
importance <- data.frame(Predictors = names(random_forest$model$variable.importance),
                         impurity = random_forest$model$variable.importance
                         )
rownames(importance) <- NULL

importance %>% arrange(desc(impurity))
##    Predictors     impurity
## 1     LotArea 3.418793e+12
## 2 LotFrontage 2.315635e+12
## 3  MSSubClass 1.252661e+12
## 4      YrSold 4.847773e+11

Performa Model

train_test = resample(task = task_house,
                               learner = regresi_linear,
                               resampling = resample_holdout,
                               store_models = TRUE
                               )
## INFO  [18:00:19.070] Applying learner 'regr.lm' on task 'house' (iter 1/1)

jika ingin melakukan training untuk model lainnya cukup ganti learner=regresi_linear dengan regresi_lasso,pohon_regresi,random_forest dan gradient_boosting. Hasil testing

prediksi_test = as.data.table(train_test$prediction())
head(prediksi_test)
##    row_id  truth response
## 1:     15 159000 177096.1
## 2:     17 325300 220854.0
## 3:     20 129900 150515.0
## 4:     28 165500 173621.5
## 5:     29 277500 172660.1
## 6:     35 160000 188076.4

Untuk menghitung performa model dengan menggunakan ukuran akurasi bisa menggunakan sintaks dibawah ini. Fungsi msr merupakan fungsi yang dapat mengakses ukuran-ukuran kebaikan model yang ada di dalam package mlr3.

train_test$aggregate(msr("regr.rmse"))
## regr.rmse 
##  75207.14

Jika banyak kriteria kebaikan

train_test$aggregate(list(msr("regr.rmse"),
                          msr("regr.mae"),
                          msr("regr.mape"),
                          msr("regr.srho")
                                   )) %>% round(3)
## regr.rmse  regr.mae regr.mape regr.srho 
## 75207.138 55674.671     0.319     0.493

Komparasi Model

model_house <- list(regresi_linear,
                    regresi_lasso,
                    pohon_regresi,
                    random_forest,
                    gradient_boosting
                    )
resample_cv = rsmp("cv",folds=10)

Fungsi benchmark_design digunakan untuk memasukan informasi-inforamsi yang dibutuhkan untuk komparasi, seperti data yang digunakan (tasks), model yang ingin dikomparasi (learners) dan metode pembagian data yang digunakan (resamplings).

design <- benchmark_grid(tasks = task_house,
                         learners = model_house,
                         resamplings = resample_cv 
                         )

Kemudian fungsi benchmark digunakan untuk menjalankan/ running komparasi model berdasarkan desain yang sudah dirancang.Karena terdapat 5 model dan masing-masing model menjalankan 10-folds cross-validation maka iterasi yang dilakukan ada sebanyak 50 kali.

bmr = benchmark(design,store_models = TRUE)
## INFO  [18:00:20.166] Benchmark with 50 resampling iterations 
## INFO  [18:00:20.187] Applying learner 'regr.cv_glmnet' on task 'house' (iter 7/10) 
## INFO  [18:00:20.471] Applying learner 'regr.cv_glmnet' on task 'house' (iter 6/10) 
## INFO  [18:00:20.576] Applying learner 'regr.rpart' on task 'house' (iter 4/10) 
## INFO  [18:00:20.623] Applying learner 'regr.lm' on task 'house' (iter 7/10) 
## INFO  [18:00:20.658] Applying learner 'regr.ranger' on task 'house' (iter 2/10) 
## INFO  [18:00:20.887] Applying learner 'regr.ranger' on task 'house' (iter 5/10) 
## INFO  [18:00:21.337] Applying learner 'regr.cv_glmnet' on task 'house' (iter 3/10) 
## INFO  [18:00:21.515] Applying learner 'regr.rpart' on task 'house' (iter 1/10) 
## INFO  [18:00:21.552] Applying learner 'regr.cv_glmnet' on task 'house' (iter 9/10) 
## INFO  [18:00:21.770] Applying learner 'regr.lm' on task 'house' (iter 10/10) 
## INFO  [18:00:21.805] Applying learner 'regr.ranger' on task 'house' (iter 9/10) 
## INFO  [18:00:22.027] Applying learner 'regr.xgboost' on task 'house' (iter 2/10) 
## [18:00:22] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:22.096] Applying learner 'regr.rpart' on task 'house' (iter 3/10) 
## INFO  [18:00:22.123] Applying learner 'regr.rpart' on task 'house' (iter 9/10) 
## INFO  [18:00:22.139] Applying learner 'regr.cv_glmnet' on task 'house' (iter 10/10) 
## INFO  [18:00:22.318] Applying learner 'regr.lm' on task 'house' (iter 5/10) 
## INFO  [18:00:22.352] Applying learner 'regr.xgboost' on task 'house' (iter 5/10) 
## [18:00:22] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:22.397] Applying learner 'regr.xgboost' on task 'house' (iter 10/10) 
## [18:00:22] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:22.786] Applying learner 'regr.lm' on task 'house' (iter 4/10) 
## INFO  [18:00:22.819] Applying learner 'regr.lm' on task 'house' (iter 9/10) 
## INFO  [18:00:22.855] Applying learner 'regr.rpart' on task 'house' (iter 8/10) 
## INFO  [18:00:22.893] Applying learner 'regr.lm' on task 'house' (iter 8/10) 
## INFO  [18:00:22.927] Applying learner 'regr.cv_glmnet' on task 'house' (iter 1/10) 
## INFO  [18:00:23.103] Applying learner 'regr.rpart' on task 'house' (iter 7/10) 
## INFO  [18:00:23.124] Applying learner 'regr.rpart' on task 'house' (iter 10/10) 
## INFO  [18:00:23.164] Applying learner 'regr.ranger' on task 'house' (iter 1/10) 
## INFO  [18:00:23.425] Applying learner 'regr.xgboost' on task 'house' (iter 1/10) 
## [18:00:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:23.445] Applying learner 'regr.xgboost' on task 'house' (iter 9/10) 
## [18:00:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:23.475] Applying learner 'regr.xgboost' on task 'house' (iter 8/10) 
## [18:00:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:23.493] Applying learner 'regr.ranger' on task 'house' (iter 7/10) 
## INFO  [18:00:23.655] Applying learner 'regr.lm' on task 'house' (iter 3/10) 
## INFO  [18:00:23.670] Applying learner 'regr.lm' on task 'house' (iter 6/10) 
## INFO  [18:00:23.687] Applying learner 'regr.xgboost' on task 'house' (iter 6/10) 
## [18:00:23] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:23.706] Applying learner 'regr.rpart' on task 'house' (iter 6/10) 
## INFO  [18:00:23.727] Applying learner 'regr.ranger' on task 'house' (iter 3/10) 
## INFO  [18:00:24.169] Applying learner 'regr.cv_glmnet' on task 'house' (iter 5/10) 
## INFO  [18:00:24.338] Applying learner 'regr.rpart' on task 'house' (iter 2/10) 
## INFO  [18:00:24.360] Applying learner 'regr.lm' on task 'house' (iter 1/10) 
## INFO  [18:00:24.384] Applying learner 'regr.ranger' on task 'house' (iter 10/10) 
## INFO  [18:00:24.555] Applying learner 'regr.xgboost' on task 'house' (iter 4/10) 
## [18:00:24] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:24.571] Applying learner 'regr.lm' on task 'house' (iter 2/10) 
## INFO  [18:00:24.587] Applying learner 'regr.xgboost' on task 'house' (iter 7/10) 
## [18:00:24] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:24.609] Applying learner 'regr.ranger' on task 'house' (iter 8/10) 
## INFO  [18:00:24.761] Applying learner 'regr.ranger' on task 'house' (iter 4/10) 
## INFO  [18:00:24.921] Applying learner 'regr.cv_glmnet' on task 'house' (iter 4/10) 
## INFO  [18:00:25.344] Applying learner 'regr.cv_glmnet' on task 'house' (iter 2/10) 
## INFO  [18:00:25.486] Applying learner 'regr.cv_glmnet' on task 'house' (iter 8/10) 
## INFO  [18:00:25.676] Applying learner 'regr.xgboost' on task 'house' (iter 3/10) 
## [18:00:25] WARNING: amalgamation/../src/objective/regression_obj.cu:170: reg:linear is now deprecated in favor of reg:squarederror.
## INFO  [18:00:25.721] Applying learner 'regr.rpart' on task 'house' (iter 5/10) 
## INFO  [18:00:25.765] Applying learner 'regr.ranger' on task 'house' (iter 6/10) 
## INFO  [18:00:25.977] Finished benchmark

Hasil Komparasi Model

Berdasarkan nilai akurasi model yang memiliki performa prediksi terbaik adalah model Random Forest.

result = bmr$aggregate(list(msr("regr.rmse"),
                          msr("regr.mae"),
                          msr("regr.mape")
                                   )
              )
result
##    nr      resample_result task_id     learner_id resampling_id iters regr.rmse
## 1:  1 <ResampleResult[21]>   house        regr.lm            cv    10  78637.18
## 2:  2 <ResampleResult[21]>   house regr.cv_glmnet            cv    10  81680.89
## 3:  3 <ResampleResult[21]>   house     regr.rpart            cv    10  68659.46
## 4:  4 <ResampleResult[21]>   house    regr.ranger            cv    10  62786.19
## 5:  5 <ResampleResult[21]>   house   regr.xgboost            cv    10 147491.93
##     regr.mae regr.mape
## 1:  56013.97 0.3455374
## 2:  59912.05 0.3799168
## 3:  47402.72 0.2831441
## 4:  42345.02 0.2473125
## 5: 127576.96 0.6752066

Memprediksi respon

data_house_baru <- read.csv("https://raw.githubusercontent.com/Rifqiaulya/poladatawaktu/main/house_price2.csv")
glimpse(data_house_baru)
## Rows: 1,459
## Columns: 80
## $ Id            <int> 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469,...
## $ MSSubClass    <int> 20, 20, 60, 60, 120, 60, 20, 60, 20, 20, 120, 160, 16...
## $ MSZoning      <chr> "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL",...
## $ LotFrontage   <int> 80, 81, 74, 78, 43, 75, NA, 63, 85, 70, 26, 21, 21, 2...
## $ LotArea       <int> 11622, 14267, 13830, 9978, 5005, 10000, 7980, 8402, 1...
## $ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave...
## $ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ LotShape      <chr> "Reg", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1", "IR1...
## $ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl", "Lvl...
## $ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "Al...
## $ LotConfig     <chr> "Inside", "Corner", "Inside", "Inside", "Inside", "Co...
## $ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl...
## $ Neighborhood  <chr> "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr", "G...
## $ Condition1    <chr> "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm", "Nor...
## $ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm...
## $ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "1Fam", "1F...
## $ HouseStyle    <chr> "1Story", "1Story", "2Story", "2Story", "1Story", "2S...
## $ OverallQual   <int> 5, 6, 5, 6, 8, 6, 6, 6, 7, 4, 7, 6, 5, 6, 7, 9, 8, 9,...
## $ OverallCond   <int> 6, 6, 5, 6, 5, 5, 7, 5, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5,...
## $ YearBuilt     <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, 1990,...
## $ YearRemodAdd  <int> 1961, 1958, 1998, 1998, 1992, 1994, 2007, 1998, 1990,...
## $ RoofStyle     <chr> "Gable", "Hip", "Gable", "Gable", "Gable", "Gable", "...
## $ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg"...
## $ Exterior1st   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdBoard"...
## $ Exterior2nd   <chr> "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "HdBoard"...
## $ MasVnrType    <chr> "None", "BrkFace", "None", "BrkFace", "None", "None",...
## $ MasVnrArea    <int> 0, 108, 0, 20, 0, 0, 0, 0, 0, 0, 0, 504, 492, 0, 0, 1...
## $ ExterQual     <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA",...
## $ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", "TA",...
## $ Foundation    <chr> "CBlock", "CBlock", "PConc", "PConc", "PConc", "PConc...
## $ BsmtQual      <chr> "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", "Gd",...
## $ BsmtCond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ BsmtExposure  <chr> "No", "No", "No", "No", "No", "No", "No", "No", "Gd",...
## $ BsmtFinType1  <chr> "Rec", "ALQ", "GLQ", "GLQ", "ALQ", "Unf", "ALQ", "Unf...
## $ BsmtFinSF1    <int> 468, 923, 791, 602, 263, 0, 935, 0, 637, 804, 1051, 1...
## $ BsmtFinType2  <chr> "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf...
## $ BsmtFinSF2    <int> 144, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, 0, 0,...
## $ BsmtUnfSF     <int> 270, 406, 137, 324, 1017, 763, 233, 789, 663, 0, 354,...
## $ TotalBsmtSF   <int> 882, 1329, 928, 926, 1280, 763, 1168, 789, 1300, 882,...
## $ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA...
## $ HeatingQC     <chr> "TA", "TA", "Gd", "Ex", "Ex", "Gd", "Ex", "Gd", "Gd",...
## $ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr",...
## $ X1stFlrSF     <int> 896, 1329, 928, 926, 1280, 763, 1187, 789, 1341, 882,...
## $ X2ndFlrSF     <int> 0, 0, 701, 678, 0, 892, 0, 676, 0, 0, 0, 504, 567, 60...
## $ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ GrLivArea     <int> 896, 1329, 1629, 1604, 1280, 1655, 1187, 1465, 1341, ...
## $ BsmtFullBath  <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,...
## $ BsmtHalfBath  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ FullBath      <int> 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2,...
## $ HalfBath      <int> 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,...
## $ BedroomAbvGr  <int> 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 2, 3, 3, 3,...
## $ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ KitchenQual   <chr> "TA", "Gd", "TA", "Gd", "Gd", "TA", "TA", "TA", "Gd",...
## $ TotRmsAbvGrd  <int> 5, 6, 6, 7, 5, 7, 6, 7, 5, 4, 5, 5, 6, 6, 4, 10, 7, 7...
## $ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ...
## $ Fireplaces    <int> 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,...
## $ FireplaceQu   <chr> NA, NA, "TA", "Gd", NA, "TA", NA, "Gd", "Po", NA, "Fa...
## $ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "At...
## $ GarageYrBlt   <int> 1961, 1958, 1997, 1998, 1992, 1993, 1992, 1998, 1990,...
## $ GarageFinish  <chr> "Unf", "Unf", "Fin", "Fin", "RFn", "Fin", "Fin", "Fin...
## $ GarageCars    <int> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 3, 3, 3,...
## $ GarageArea    <int> 730, 312, 482, 470, 506, 440, 420, 393, 506, 525, 511...
## $ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ WoodDeckSF    <int> 140, 393, 212, 360, 0, 157, 483, 0, 192, 240, 203, 27...
## $ OpenPorchSF   <int> 0, 36, 34, 36, 82, 84, 21, 75, 0, 0, 68, 0, 0, 0, 30,...
## $ EnclosedPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ X3SsnPorch    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ ScreenPorch   <int> 120, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Fence         <chr> "MnPrv", NA, "MnPrv", NA, NA, NA, "GdPrv", NA, NA, "M...
## $ MiscFeature   <chr> NA, "Gar2", NA, NA, NA, NA, "Shed", NA, NA, NA, NA, N...
## $ MiscVal       <int> 0, 12500, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ MoSold        <int> 6, 6, 3, 6, 1, 4, 3, 5, 2, 4, 6, 2, 3, 6, 6, 1, 6, 6,...
## $ YrSold        <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010,...
## $ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD",...
## $ SaleCondition <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "No...
data_house_baru <- data_house_baru %>% select(names(data_house)[-1]) %>% na.omit

Prediksi dengan random forest.Fungsi $train digunakan untuk melakukan training pada data keseluruhan (tanpa ada proses pembagian data). Terakhir, fungsi predict_newdata digunakan untuk melakukan prediksi pada data baru.

random_forest$train(task = task_house)
prediksi_random_forest_new <- random_forest$predict_newdata(newdata = data_house_baru)
as.data.table(prediksi_random_forest_new)
##       row_id truth  response
##    1:      1    NA 205178.50
##    2:      2    NA 222272.98
##    3:      3    NA 279907.76
##    4:      4    NA 235509.60
##    5:      5    NA 220573.00
##   ---                       
## 1228:   1228    NA  91558.57
## 1229:   1229    NA 100683.52
## 1230:   1230    NA 283116.58
## 1231:   1231    NA 161688.99
## 1232:   1232    NA 242193.50