library(mlr)
library(tidyverse)
library(rpart)
data("airquality")
airquality=data.frame(airquality) %>%
filter(!is.na(Ozone))
airquality
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 28 NA 14.9 66 5 6
## 6 23 299 8.6 65 5 7
## 7 19 99 13.8 59 5 8
## 8 8 19 20.1 61 5 9
## 9 7 NA 6.9 74 5 11
## 10 16 256 9.7 69 5 12
## 11 11 290 9.2 66 5 13
## 12 14 274 10.9 68 5 14
## 13 18 65 13.2 58 5 15
## 14 14 334 11.5 64 5 16
## 15 34 307 12.0 66 5 17
## 16 6 78 18.4 57 5 18
## 17 30 322 11.5 68 5 19
## 18 11 44 9.7 62 5 20
## 19 1 8 9.7 59 5 21
## 20 11 320 16.6 73 5 22
## 21 4 25 9.7 61 5 23
## 22 32 92 12.0 61 5 24
## 23 23 13 12.0 67 5 28
## 24 45 252 14.9 81 5 29
## 25 115 223 5.7 79 5 30
## 26 37 279 7.4 76 5 31
## 27 29 127 9.7 82 6 7
## 28 71 291 13.8 90 6 9
## 29 39 323 11.5 87 6 10
## 30 23 148 8.0 82 6 13
## 31 21 191 14.9 77 6 16
## 32 37 284 20.7 72 6 17
## 33 20 37 9.2 65 6 18
## 34 12 120 11.5 73 6 19
## 35 13 137 10.3 76 6 20
## 36 135 269 4.1 84 7 1
## 37 49 248 9.2 85 7 2
## 38 32 236 9.2 81 7 3
## 39 64 175 4.6 83 7 5
## 40 40 314 10.9 83 7 6
## 41 77 276 5.1 88 7 7
## 42 97 267 6.3 92 7 8
## 43 97 272 5.7 92 7 9
## 44 85 175 7.4 89 7 10
## 45 10 264 14.3 73 7 12
## 46 27 175 14.9 81 7 13
## 47 7 48 14.3 80 7 15
## 48 48 260 6.9 81 7 16
## 49 35 274 10.3 82 7 17
## 50 61 285 6.3 84 7 18
## 51 79 187 5.1 87 7 19
## 52 63 220 11.5 85 7 20
## 53 16 7 6.9 74 7 21
## 54 80 294 8.6 86 7 24
## 55 108 223 8.0 85 7 25
## 56 20 81 8.6 82 7 26
## 57 52 82 12.0 86 7 27
## 58 82 213 7.4 88 7 28
## 59 50 275 7.4 86 7 29
## 60 64 253 7.4 83 7 30
## 61 59 254 9.2 81 7 31
## 62 39 83 6.9 81 8 1
## 63 9 24 13.8 81 8 2
## 64 16 77 7.4 82 8 3
## 65 78 NA 6.9 86 8 4
## 66 35 NA 7.4 85 8 5
## 67 66 NA 4.6 87 8 6
## 68 122 255 4.0 89 8 7
## 69 89 229 10.3 90 8 8
## 70 110 207 8.0 90 8 9
## 71 44 192 11.5 86 8 12
## 72 28 273 11.5 82 8 13
## 73 65 157 9.7 80 8 14
## 74 22 71 10.3 77 8 16
## 75 59 51 6.3 79 8 17
## 76 23 115 7.4 76 8 18
## 77 31 244 10.9 78 8 19
## 78 44 190 10.3 78 8 20
## 79 21 259 15.5 77 8 21
## 80 9 36 14.3 72 8 22
## 81 45 212 9.7 79 8 24
## 82 168 238 3.4 81 8 25
## 83 73 215 8.0 86 8 26
## 84 76 203 9.7 97 8 28
## 85 118 225 2.3 94 8 29
## 86 84 237 6.3 96 8 30
## 87 85 188 6.3 94 8 31
## 88 96 167 6.9 91 9 1
## 89 78 197 5.1 92 9 2
## 90 73 183 2.8 93 9 3
## 91 91 189 4.6 93 9 4
## 92 47 95 7.4 87 9 5
## 93 32 92 15.5 84 9 6
## 94 20 252 10.9 80 9 7
## 95 23 220 10.3 78 9 8
## 96 21 230 10.9 75 9 9
## 97 24 259 9.7 73 9 10
## 98 44 236 14.9 81 9 11
## 99 21 259 15.5 76 9 12
## 100 28 238 6.3 77 9 13
## 101 9 24 10.9 71 9 14
## 102 13 112 11.5 71 9 15
## 103 46 237 6.9 78 9 16
## 104 18 224 13.8 67 9 17
## 105 13 27 10.3 76 9 18
## 106 24 238 10.3 68 9 19
## 107 16 201 8.0 82 9 20
## 108 13 238 12.6 64 9 21
## 109 23 14 9.2 71 9 22
## 110 36 139 10.3 81 9 23
## 111 7 49 10.3 69 9 24
## 112 14 20 16.6 63 9 25
## 113 30 193 6.9 70 9 26
## 114 14 191 14.3 75 9 28
## 115 18 131 8.0 76 9 29
## 116 20 223 11.5 68 9 30
imputeMethod=imputeLearner("regr.rpart")
#air.imp=impute(as.data.frame(airquality),classes=list(integer = imputeMethod))
#air.imp
lrn=makeLearner("regr.gamboost")
imp.wrp=makeImputeWrapper(learner = lrn, classes = list(integer = imputeMethod))
imp.wrp
## Learner regr.gamboost.imputed from package mboost
## Type: regr
## Name: ; Short name:
## Class: ImputeWrapper
## Properties: numerics,factors,weights,missings
## Predict-Type: response
## Hyperparameters:
tsk=makeRegrTask(data = airquality , target = "Ozone")
feat=generateFilterValuesData(task = tsk ,method = "linear.correlation")
plotFilterValues(feat)

feat.wrp=makeFilterWrapper(learner = imp.wrp, fw.method = "linear.correlation")
getParamSet(feat.wrp)
## Type len Def
## fw.method discrete - -
## fw.base.methods discrete - -
## fw.perc numeric - -
## fw.abs integer - -
## fw.threshold numeric - -
## fw.fun function - -
## fw.fun.args untyped - <NULL>
## fw.mandatory.feat untyped - -
## baselearner discrete - -
## dfbase integer - 4
## offset numeric - -
## family discrete - Gaussian
## custom.family.definition untyped - -
## nuirange numericvector <NA> 0,100
## d numeric - -
## mstop integer - 100
## nu numeric - 0.1
## risk discrete - -
## stopintern logical - FALSE
## trace logical - FALSE
## Constr Req Tunable
## fw.method anova.test,auc,carscore,cforest.impor... - TRUE
## fw.base.methods anova.test,auc,carscore,cforest.impor... - TRUE
## fw.perc 0 to 1 - TRUE
## fw.abs 0 to Inf - TRUE
## fw.threshold -Inf to Inf - TRUE
## fw.fun - - TRUE
## fw.fun.args - - TRUE
## fw.mandatory.feat - - TRUE
## baselearner bbs,bols,btree - TRUE
## dfbase -Inf to Inf - TRUE
## offset -Inf to Inf - TRUE
## family Gaussian,Laplace,Huber,Poisson,GammaR... - TRUE
## custom.family.definition - Y TRUE
## nuirange -Inf to Inf Y TRUE
## d -Inf to Inf Y TRUE
## mstop 1 to Inf - TRUE
## nu 0 to 1 - TRUE
## risk inbag,oobag,none - TRUE
## stopintern - - TRUE
## trace - - FALSE
## Trafo
## fw.method -
## fw.base.methods -
## fw.perc -
## fw.abs -
## fw.threshold -
## fw.fun -
## fw.fun.args -
## fw.mandatory.feat -
## baselearner -
## dfbase -
## offset -
## family -
## custom.family.definition -
## nuirange -
## d -
## mstop -
## nu -
## risk -
## stopintern -
## trace -
ps=makeParamSet(makeNumericParam("fw.threshold",0.2,1))
sc=makeTuneControlGrid()
kfold = makeResampleDesc("CV", iters=10)
tp=tuneParams(learner = feat.wrp, task = tsk , par.set = ps ,control = sc, resampling = kfold,rmse)
twrap=makeTuneWrapper(learner = feat.wrp,resampling = kfold,par.set = ps, control = sc)
## Tune result:
## Op. pars: fw.threshold=0.2
## rmse.test.rmse=19.0059419
feat.new=filterFeatures(task=tsk, fval=feat, threshold=unlist(tp$x))
predict(train(imp.wrp,feat.new),tsk)
## Prediction: 116 observations
## predict.type: response
## threshold:
## time: 0.00
## id truth response
## 1 1 41 32.42692
## 2 2 36 21.03379
## 3 3 12 13.48028
## 4 4 18 21.11129
## 5 5 28 21.17879
## 6 6 23 28.91712
## ... (#rows: 116, #cols: 3)
resample(twrap,tsk,kfold,rmse)
## Resample Result
## Task: airquality
## Learner: regr.gamboost.imputed.filtered.tuned
## Aggr perf: rmse.test.rmse=19.1592216
## Runtime: 31.0128