GAM practice

omon das

2023-09-18

library(mlr)

library(tidyverse)

library(rpart)

data("airquality")

airquality=data.frame(airquality) %>% 
  filter(!is.na(Ozone))


airquality
##     Ozone Solar.R Wind Temp Month Day
## 1      41     190  7.4   67     5   1
## 2      36     118  8.0   72     5   2
## 3      12     149 12.6   74     5   3
## 4      18     313 11.5   62     5   4
## 5      28      NA 14.9   66     5   6
## 6      23     299  8.6   65     5   7
## 7      19      99 13.8   59     5   8
## 8       8      19 20.1   61     5   9
## 9       7      NA  6.9   74     5  11
## 10     16     256  9.7   69     5  12
## 11     11     290  9.2   66     5  13
## 12     14     274 10.9   68     5  14
## 13     18      65 13.2   58     5  15
## 14     14     334 11.5   64     5  16
## 15     34     307 12.0   66     5  17
## 16      6      78 18.4   57     5  18
## 17     30     322 11.5   68     5  19
## 18     11      44  9.7   62     5  20
## 19      1       8  9.7   59     5  21
## 20     11     320 16.6   73     5  22
## 21      4      25  9.7   61     5  23
## 22     32      92 12.0   61     5  24
## 23     23      13 12.0   67     5  28
## 24     45     252 14.9   81     5  29
## 25    115     223  5.7   79     5  30
## 26     37     279  7.4   76     5  31
## 27     29     127  9.7   82     6   7
## 28     71     291 13.8   90     6   9
## 29     39     323 11.5   87     6  10
## 30     23     148  8.0   82     6  13
## 31     21     191 14.9   77     6  16
## 32     37     284 20.7   72     6  17
## 33     20      37  9.2   65     6  18
## 34     12     120 11.5   73     6  19
## 35     13     137 10.3   76     6  20
## 36    135     269  4.1   84     7   1
## 37     49     248  9.2   85     7   2
## 38     32     236  9.2   81     7   3
## 39     64     175  4.6   83     7   5
## 40     40     314 10.9   83     7   6
## 41     77     276  5.1   88     7   7
## 42     97     267  6.3   92     7   8
## 43     97     272  5.7   92     7   9
## 44     85     175  7.4   89     7  10
## 45     10     264 14.3   73     7  12
## 46     27     175 14.9   81     7  13
## 47      7      48 14.3   80     7  15
## 48     48     260  6.9   81     7  16
## 49     35     274 10.3   82     7  17
## 50     61     285  6.3   84     7  18
## 51     79     187  5.1   87     7  19
## 52     63     220 11.5   85     7  20
## 53     16       7  6.9   74     7  21
## 54     80     294  8.6   86     7  24
## 55    108     223  8.0   85     7  25
## 56     20      81  8.6   82     7  26
## 57     52      82 12.0   86     7  27
## 58     82     213  7.4   88     7  28
## 59     50     275  7.4   86     7  29
## 60     64     253  7.4   83     7  30
## 61     59     254  9.2   81     7  31
## 62     39      83  6.9   81     8   1
## 63      9      24 13.8   81     8   2
## 64     16      77  7.4   82     8   3
## 65     78      NA  6.9   86     8   4
## 66     35      NA  7.4   85     8   5
## 67     66      NA  4.6   87     8   6
## 68    122     255  4.0   89     8   7
## 69     89     229 10.3   90     8   8
## 70    110     207  8.0   90     8   9
## 71     44     192 11.5   86     8  12
## 72     28     273 11.5   82     8  13
## 73     65     157  9.7   80     8  14
## 74     22      71 10.3   77     8  16
## 75     59      51  6.3   79     8  17
## 76     23     115  7.4   76     8  18
## 77     31     244 10.9   78     8  19
## 78     44     190 10.3   78     8  20
## 79     21     259 15.5   77     8  21
## 80      9      36 14.3   72     8  22
## 81     45     212  9.7   79     8  24
## 82    168     238  3.4   81     8  25
## 83     73     215  8.0   86     8  26
## 84     76     203  9.7   97     8  28
## 85    118     225  2.3   94     8  29
## 86     84     237  6.3   96     8  30
## 87     85     188  6.3   94     8  31
## 88     96     167  6.9   91     9   1
## 89     78     197  5.1   92     9   2
## 90     73     183  2.8   93     9   3
## 91     91     189  4.6   93     9   4
## 92     47      95  7.4   87     9   5
## 93     32      92 15.5   84     9   6
## 94     20     252 10.9   80     9   7
## 95     23     220 10.3   78     9   8
## 96     21     230 10.9   75     9   9
## 97     24     259  9.7   73     9  10
## 98     44     236 14.9   81     9  11
## 99     21     259 15.5   76     9  12
## 100    28     238  6.3   77     9  13
## 101     9      24 10.9   71     9  14
## 102    13     112 11.5   71     9  15
## 103    46     237  6.9   78     9  16
## 104    18     224 13.8   67     9  17
## 105    13      27 10.3   76     9  18
## 106    24     238 10.3   68     9  19
## 107    16     201  8.0   82     9  20
## 108    13     238 12.6   64     9  21
## 109    23      14  9.2   71     9  22
## 110    36     139 10.3   81     9  23
## 111     7      49 10.3   69     9  24
## 112    14      20 16.6   63     9  25
## 113    30     193  6.9   70     9  26
## 114    14     191 14.3   75     9  28
## 115    18     131  8.0   76     9  29
## 116    20     223 11.5   68     9  30
imputeMethod=imputeLearner("regr.rpart")

#air.imp=impute(as.data.frame(airquality),classes=list(integer = imputeMethod))

#air.imp



lrn=makeLearner("regr.gamboost")

imp.wrp=makeImputeWrapper(learner = lrn, classes = list(integer = imputeMethod))

imp.wrp
## Learner regr.gamboost.imputed from package mboost
## Type: regr
## Name: ; Short name: 
## Class: ImputeWrapper
## Properties: numerics,factors,weights,missings
## Predict-Type: response
## Hyperparameters:
tsk=makeRegrTask(data = airquality , target = "Ozone")


feat=generateFilterValuesData(task = tsk ,method = "linear.correlation")

plotFilterValues(feat)

feat.wrp=makeFilterWrapper(learner = imp.wrp, fw.method = "linear.correlation")


getParamSet(feat.wrp)
##                                   Type  len      Def
## fw.method                     discrete    -        -
## fw.base.methods               discrete    -        -
## fw.perc                        numeric    -        -
## fw.abs                         integer    -        -
## fw.threshold                   numeric    -        -
## fw.fun                        function    -        -
## fw.fun.args                    untyped    -   <NULL>
## fw.mandatory.feat              untyped    -        -
## baselearner                   discrete    -        -
## dfbase                         integer    -        4
## offset                         numeric    -        -
## family                        discrete    - Gaussian
## custom.family.definition       untyped    -        -
## nuirange                 numericvector <NA>    0,100
## d                              numeric    -        -
## mstop                          integer    -      100
## nu                             numeric    -      0.1
## risk                          discrete    -        -
## stopintern                     logical    -    FALSE
## trace                          logical    -    FALSE
##                                                            Constr Req Tunable
## fw.method                anova.test,auc,carscore,cforest.impor...   -    TRUE
## fw.base.methods          anova.test,auc,carscore,cforest.impor...   -    TRUE
## fw.perc                                                    0 to 1   -    TRUE
## fw.abs                                                   0 to Inf   -    TRUE
## fw.threshold                                          -Inf to Inf   -    TRUE
## fw.fun                                                          -   -    TRUE
## fw.fun.args                                                     -   -    TRUE
## fw.mandatory.feat                                               -   -    TRUE
## baselearner                                        bbs,bols,btree   -    TRUE
## dfbase                                                -Inf to Inf   -    TRUE
## offset                                                -Inf to Inf   -    TRUE
## family                   Gaussian,Laplace,Huber,Poisson,GammaR...   -    TRUE
## custom.family.definition                                        -   Y    TRUE
## nuirange                                              -Inf to Inf   Y    TRUE
## d                                                     -Inf to Inf   Y    TRUE
## mstop                                                    1 to Inf   -    TRUE
## nu                                                         0 to 1   -    TRUE
## risk                                             inbag,oobag,none   -    TRUE
## stopintern                                                      -   -    TRUE
## trace                                                           -   -   FALSE
##                          Trafo
## fw.method                    -
## fw.base.methods              -
## fw.perc                      -
## fw.abs                       -
## fw.threshold                 -
## fw.fun                       -
## fw.fun.args                  -
## fw.mandatory.feat            -
## baselearner                  -
## dfbase                       -
## offset                       -
## family                       -
## custom.family.definition     -
## nuirange                     -
## d                            -
## mstop                        -
## nu                           -
## risk                         -
## stopintern                   -
## trace                        -
ps=makeParamSet(makeNumericParam("fw.threshold",0.2,1))

sc=makeTuneControlGrid()


kfold = makeResampleDesc("CV", iters=10)




tp=tuneParams(learner = feat.wrp, task = tsk , par.set = ps ,control = sc, resampling = kfold,rmse)


twrap=makeTuneWrapper(learner = feat.wrp,resampling = kfold,par.set = ps, control = sc)
tp
## Tune result:
## Op. pars: fw.threshold=0.2
## rmse.test.rmse=19.0059419
feat.new=filterFeatures(task=tsk, fval=feat, threshold=unlist(tp$x))

predict(train(imp.wrp,feat.new),tsk)
## Prediction: 116 observations
## predict.type: response
## threshold: 
## time: 0.00
##   id truth response
## 1  1    41 32.42692
## 2  2    36 21.03379
## 3  3    12 13.48028
## 4  4    18 21.11129
## 5  5    28 21.17879
## 6  6    23 28.91712
## ... (#rows: 116, #cols: 3)
resample(twrap,tsk,kfold,rmse)
## Resample Result
## Task: airquality
## Learner: regr.gamboost.imputed.filtered.tuned
## Aggr perf: rmse.test.rmse=19.1592216
## Runtime: 31.0128