model optimization

library(mlr)
library(tidyverse)
library(reshape2)


data("airquality")

air=airquality %>% 
  filter(!is.na(Ozone))








lin1=ggplot(air) +
  aes(
    y = Solar.R,
    x = Ozone,
    colour = Ozone,
    size = Ozone,
  ) +
  geom_point(alpha=0.6,size=6) +
  geom_smooth(color="red",se=TRUE)+
  scale_color_viridis_c(option = "magma", direction = -1) +
  theme_bw()


lin2=ggplot(air) +
  aes(
    y = Temp,
    x = Ozone,
    colour = Ozone,
    size = Ozone,
  ) +
  geom_point(alpha=0.6,size=6) +
  geom_smooth(color="red",se=TRUE)+
  scale_color_viridis_c(option = "magma", direction = -1) +
  theme_bw()





lin3=ggplot(air) +
  aes(
    y = Wind,
    x = Ozone,
    colour = Ozone,
    size = Ozone,
  ) +
  geom_point(alpha=0.6,size=6) +
  geom_smooth(color="red",se=TRUE)+
  scale_color_viridis_c(option = "magma", direction = -1) +
  theme_bw()


lin4=ggplot(air) +
  aes(
    y = Month,
    x = Ozone,
    colour = Ozone,
    size = Ozone,
  ) +
  geom_point(alpha=0.6,size=6) +
  geom_smooth(color="red",se=TRUE)+
  scale_color_viridis_c(option = "magma", direction = -1) +
  theme_bw()



lin5=ggplot(air) +
  aes(
    y = Day,
    x = Ozone,
    colour = Ozone,
    size = Ozone,
  ) +
  geom_point(alpha=0.6,size=6) +
  geom_smooth(color="red",se=TRUE)+
  scale_color_viridis_c(option = "magma", direction = -1) +
  theme_bw()



library(patchwork)


(lin1+lin2+lin3)/(lin4+lin5)

fit=lm(Ozone~.,data=air)
summary(fit)

## 
## Call:
## lm(formula = Ozone ~ ., data = air)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.014 -12.284  -3.302   8.454  95.348 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -64.11632   23.48249  -2.730  0.00742 ** 
## Solar.R       0.05027    0.02342   2.147  0.03411 *  
## Wind         -3.31844    0.64451  -5.149 1.23e-06 ***
## Temp          1.89579    0.27389   6.922 3.66e-10 ***
## Month        -3.03996    1.51346  -2.009  0.04714 *  
## Day           0.27388    0.22967   1.192  0.23576    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.86 on 105 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.6249, Adjusted R-squared:  0.6071 
## F-statistic: 34.99 on 5 and 105 DF,  p-value: < 2.2e-16

plot(fit)

library(naniar)

vis_miss(air,cluster = F)

visdat::vis_dat(air,palette = "cb_safe" )

task=makeRegrTask(data=air,target = "Ozone")
lrn.lm=makeLearner("regr.lm")
lrn.gam=makeLearner("regr.gamboost")

#if our model over fits then we can use below models
#if does not we will do it anyways " _ "
lrn.ridge=makeLearner("regr.glmnet",alpha=0,id="ridge")
lrn.lasso=makeLearner("regr.glmnet",alpha=1,id="Lasso")
lrn.elastic=makeLearner("regr.glmnet",id="elastic")

#linearmodel


imputeMethod=imputeLearner("regr.rpart")

lrn.lm.imp=makeImputeWrapper(learner = lrn.lm, classes = list(integer=imputeMethod) )


#filteringFeature

fval=generateFilterValuesData(task,method = "linear.correlation")


plotFilterValues(fval)

lrn.lm.imp.filter=makeFilterWrapper(learner = lrn.lm.imp,fw.method = "linear.correlation")

getParamSet(lrn.lm.imp.filter)

##                       Type len    Def                                   Constr
## fw.method         discrete   -      - anova.test,auc,carscore,cforest.impor...
## fw.base.methods   discrete   -      - anova.test,auc,carscore,cforest.impor...
## fw.perc            numeric   -      -                                   0 to 1
## fw.abs             integer   -      -                                 0 to Inf
## fw.threshold       numeric   -      -                              -Inf to Inf
## fw.fun            function   -      -                                        -
## fw.fun.args        untyped   - <NULL>                                        -
## fw.mandatory.feat  untyped   -      -                                        -
## tol                numeric   -  1e-07                                 0 to Inf
## singular.ok        logical   -   TRUE                                        -
##                   Req Tunable Trafo
## fw.method           -    TRUE     -
## fw.base.methods     -    TRUE     -
## fw.perc             -    TRUE     -
## fw.abs              -    TRUE     -
## fw.threshold        -    TRUE     -
## fw.fun              -    TRUE     -
## fw.fun.args         -    TRUE     -
## fw.mandatory.feat   -    TRUE     -
## tol                 -    TRUE     -
## singular.ok         -   FALSE     -

#parameters set

ps=makeParamSet(
  makeNumericParam("fw.perc",0.3,1)
)


sc=makeTuneControlGrid()



kfold=makeResampleDesc("CV", iters=5)

#crossvalidation and performace




tp=tuneParams(learner = lrn.lm.imp.filter,task = task , par.set = ps, resampling = kfold, control = sc,rmse)

#or

lrn.lm.imp.filter.tune=makeTuneWrapper(learner = lrn.lm.imp.filter, par.set = ps, resampling = kfold, control = sc)


CV=resample(learner = lrn.lm.imp.filter.tune,task = task ,resampling = kfold,rmse)




model1=train(lrn.lm.imp.filter.tune,task)
pred1=predict(model1,task)

tp

## Tune result:
## Op. pars: fw.perc=0.533
## rmse.test.rmse=21.6343771

CV

## Resample Result
## Task: air
## Learner: regr.lm.imputed.filtered.tuned
## Aggr perf: rmse.test.rmse=22.0180357
## Runtime: 6.31697

performance(pred1,rmse)

##     rmse 
## 20.67302

getLearnerModel(model1)

## Model for learner.id=regr.lm.imputed.filtered; learner.class=FilterWrapper
## Trained on: task.id = air; obs = 116; features = 5
## Hyperparameters: fw.perc=0.767

#nonlinear model


task=makeRegrTask(data=air,target = "Ozone")
lrn.gamboost=makeLearner("regr.gamboost")

#impute


imputeMethod=imputeLearner("regr.rpart")

lrn.gam.impute=makeImputeWrapper(learner = lrn.gam, classes = list(integer=imputeMethod))


#automate filter feature


lrn.gam.impute.filter=makeFilterWrapper(learner = lrn.gam.impute,fw.method = "linear.correlation")


getParamSet(lrn.gam.impute.filter)

##                                   Type  len      Def
## fw.method                     discrete    -        -
## fw.base.methods               discrete    -        -
## fw.perc                        numeric    -        -
## fw.abs                         integer    -        -
## fw.threshold                   numeric    -        -
## fw.fun                        function    -        -
## fw.fun.args                    untyped    -   <NULL>
## fw.mandatory.feat              untyped    -        -
## baselearner                   discrete    -        -
## dfbase                         integer    -        4
## offset                         numeric    -        -
## family                        discrete    - Gaussian
## custom.family.definition       untyped    -        -
## nuirange                 numericvector <NA>    0,100
## d                              numeric    -        -
## mstop                          integer    -      100
## nu                             numeric    -      0.1
## risk                          discrete    -        -
## stopintern                     logical    -    FALSE
## trace                          logical    -    FALSE
##                                                            Constr Req Tunable
## fw.method                anova.test,auc,carscore,cforest.impor...   -    TRUE
## fw.base.methods          anova.test,auc,carscore,cforest.impor...   -    TRUE
## fw.perc                                                    0 to 1   -    TRUE
## fw.abs                                                   0 to Inf   -    TRUE
## fw.threshold                                          -Inf to Inf   -    TRUE
## fw.fun                                                          -   -    TRUE
## fw.fun.args                                                     -   -    TRUE
## fw.mandatory.feat                                               -   -    TRUE
## baselearner                                        bbs,bols,btree   -    TRUE
## dfbase                                                -Inf to Inf   -    TRUE
## offset                                                -Inf to Inf   -    TRUE
## family                   Gaussian,Laplace,Huber,Poisson,GammaR...   -    TRUE
## custom.family.definition                                        -   Y    TRUE
## nuirange                                              -Inf to Inf   Y    TRUE
## d                                                     -Inf to Inf   Y    TRUE
## mstop                                                    1 to Inf   -    TRUE
## nu                                                         0 to 1   -    TRUE
## risk                                             inbag,oobag,none   -    TRUE
## stopintern                                                      -   -    TRUE
## trace                                                           -   -   FALSE
##                          Trafo
## fw.method                    -
## fw.base.methods              -
## fw.perc                      -
## fw.abs                       -
## fw.threshold                 -
## fw.fun                       -
## fw.fun.args                  -
## fw.mandatory.feat            -
## baselearner                  -
## dfbase                       -
## offset                       -
## family                       -
## custom.family.definition     -
## nuirange                     -
## d                            -
## mstop                        -
## nu                           -
## risk                         -
## stopintern                   -
## trace                        -

#paraset


ps=makeParamSet(
  makeNumericParam("fw.perc",0.2,1)
)


kfold=makeResampleDesc("CV",iters=5)


tc=makeTuneControlGrid()



#parameter tuning and cross vaalidatiom


tp=tuneParams(learner = lrn.gam.impute.filter,task = task,par.set = ps,resampling = kfold,control = tc,rmse)


#or


lrn.gam.impute.filter.tune=makeTuneWrapper(learner = lrn.gam.impute.filter,par.set = ps,resampling = kfold,control = tc)


resample(learner = lrn.gam.impute.filter.tune,task=task,resampling = kfold,rmse)

## Resample Result
## Task: air
## Learner: regr.gamboost.imputed.filtered.tuned
## Aggr perf: rmse.test.rmse=19.3045404
## Runtime: 13.3062

#model.train


model2=train(lrn.gam.impute.filter.tune,task)
prediction2=predict(model2,task)
performance(prediction2,rmse)

##     rmse 
## 17.02491

tuning=generateHyperParsEffectData(tune.result = tp)
tuning$data

##      fw.perc rmse.test.rmse iteration exec.time
## 1  0.2000000       22.34807         1      0.17
## 2  0.2888889       22.34807         2      0.14
## 3  0.3777778       19.70743         3      0.19
## 4  0.4666667       19.70743         4      0.18
## 5  0.5555556       18.80696         5      0.30
## 6  0.6444444       18.80696         6      0.28
## 7  0.7333333       18.91867         7      0.33
## 8  0.8222222       18.91867         8      0.34
## 9  0.9111111       19.09023         9      0.39
## 10 1.0000000       19.09023        10      0.41

library(plotly)


plot=plotHyperParsEffect(tuning, y ="rmse.test.rmse",x="fw.perc",plot.type="line")

plot

#elastic net
#lasso
#ridge regression

task

## Supervised task: air
## Type: regr
## Target: Ozone
## Observations: 116
## Features:
##    numerics     factors     ordered functionals 
##           5           0           0           0 
## Missings: TRUE
## Has weights: FALSE
## Has blocking: FALSE
## Has coordinates: FALSE

lrn.ridge=makeLearner("regr.glmnet", id="ridge",alpha=0)


imputeMethod=imputeLearner("regr.rpart")

lrn.ridge.imp=makeImputeWrapper(lrn.ridge,classes = list(integer=imputeMethod))


ps2=makeParamSet(makeNumericParam("s",0,100))


kfold2=makeResampleDesc("CV", iters=5)


tc=makeTuneControlGrid()


lrn.ridge.imp.tune=makeTuneWrapper(learner = lrn.ridge.imp,par.set=ps2,control = tc,resampling=kfold2)



crossvalidate=resample(learner = lrn.ridge.imp.tune,task = task, resampling = kfold2,rmse)


tp.ridge=tuneParams(learner = lrn.ridge.imp,par.set=ps2,control = tc,resampling=kfold2,task = task,rmse)

crossvalidate

## Resample Result
## Task: air
## Learner: ridge.imputed.tuned
## Aggr perf: rmse.test.rmse=21.6075845
## Runtime: 7.52145

ridge.hyperpars=generateHyperParsEffectData(tp.ridge)



plotHyperParsEffect(ridge.hyperpars,x="s",y="rmse.test.rmse",plot.type = "line")

model.ridge=train(lrn.ridge.imp.tune,task)

predict.ridge=predict(model.ridge,task)



performance(predict.ridge,rmse)

##     rmse 
## 20.51208

task

## Supervised task: air
## Type: regr
## Target: Ozone
## Observations: 116
## Features:
##    numerics     factors     ordered functionals 
##           5           0           0           0 
## Missings: TRUE
## Has weights: FALSE
## Has blocking: FALSE
## Has coordinates: FALSE

lrn.lasso=makeLearner("regr.glmnet", id="lasso",alpha=1)


imputeMethod=imputeLearner("regr.rpart")

lrn.lasso.imp=makeImputeWrapper(lrn.lasso,classes = list(integer=imputeMethod))


ps2=makeParamSet(makeNumericParam("s",0,100))


kfold2=makeResampleDesc("CV", iters=10)


tc=makeTuneControlGrid()


lrn.lasso.imp.tune=makeTuneWrapper(learner = lrn.lasso.imp,par.set=ps2,control = tc,resampling=kfold2)



crossvalidate2=resample(learner = lrn.lasso.imp.tune,task = task, resampling = kfold2,rmse)


tp.lasso=tuneParams(learner = lrn.lasso.imp,par.set=ps2,control = tc,resampling=kfold2,task = task,rmse)




crossvalidate2

## Resample Result
## Task: air
## Learner: lasso.imputed.tuned
## Aggr perf: rmse.test.rmse=21.4163095
## Runtime: 27.6723

lasso.hyperpars=generateHyperParsEffectData(tp.lasso)



plotHyperParsEffect(lasso.hyperpars,x="s",y="rmse.test.rmse",plot.type = "line")

model.lasso=train(lrn.lasso.imp.tune,task)

predict.lasso=predict(model.lasso,task)



performance(predict.lasso,rmse)

##     rmse 
## 20.46227

task

## Supervised task: air
## Type: regr
## Target: Ozone
## Observations: 116
## Features:
##    numerics     factors     ordered functionals 
##           5           0           0           0 
## Missings: TRUE
## Has weights: FALSE
## Has blocking: FALSE
## Has coordinates: FALSE

lrn.elasticnet=makeLearner("regr.glmnet",id="elastic")



imputeMethod=imputeLearner("regr.rpart")


lrn.elasticnet.impute=makeImputeWrapper(learner = lrn.elasticnet, classes = list(integer=imputeMethod,numeric=imputeMethod))


getParamSet(lrn.elasticnet.impute)

##                               Type  len      Def           Constr Req Tunable
## family                    discrete    - gaussian gaussian,poisson   -    TRUE
## alpha                      numeric    -        1           0 to 1   -    TRUE
## s                          numeric    -        -         0 to Inf   -    TRUE
## exact                      logical    -    FALSE                -   -    TRUE
## nlambda                    integer    -      100         1 to Inf   -    TRUE
## lambda.min.ratio           numeric    -        -           0 to 1   -    TRUE
## lambda               numericvector <NA>        -         0 to Inf   -    TRUE
## standardize                logical    -     TRUE                -   -    TRUE
## intercept                  logical    -     TRUE                -   -    TRUE
## thresh                     numeric    -    1e-07         0 to Inf   -    TRUE
## dfmax                      integer    -        -         0 to Inf   -    TRUE
## pmax                       integer    -        -         0 to Inf   -    TRUE
## exclude              integervector <NA>        -         1 to Inf   -    TRUE
## penalty.factor       numericvector <NA>        -           0 to 1   -    TRUE
## lower.limits         numericvector <NA>        -        -Inf to 0   -    TRUE
## upper.limits         numericvector <NA>        -         0 to Inf   -    TRUE
## maxit                      integer    -   100000         1 to Inf   -    TRUE
## standardize.response       logical    -    FALSE                -   -    TRUE
## fdev                       numeric    -    1e-05           0 to 1   -    TRUE
## devmax                     numeric    -    0.999           0 to 1   -    TRUE
## eps                        numeric    -    1e-06           0 to 1   -    TRUE
## big                        numeric    -  9.9e+35      -Inf to Inf   -    TRUE
## mnlam                      integer    -        5         1 to Inf   -    TRUE
## pmin                       numeric    -    1e-09           0 to 1   -    TRUE
## exmx                       numeric    -      250      -Inf to Inf   -    TRUE
## prec                       numeric    -    1e-10      -Inf to Inf   -    TRUE
## mxit                       integer    -      100         1 to Inf   -    TRUE
## offset                     untyped    -   <NULL>                -   -    TRUE
## type.gaussian             discrete    -        - covariance,naive   Y    TRUE
## relax                      logical    -    FALSE                -   -    TRUE
##                      Trafo
## family                   -
## alpha                    -
## s                        -
## exact                    -
## nlambda                  -
## lambda.min.ratio         -
## lambda                   -
## standardize              -
## intercept                -
## thresh                   -
## dfmax                    -
## pmax                     -
## exclude                  -
## penalty.factor           -
## lower.limits             -
## upper.limits             -
## maxit                    -
## standardize.response     -
## fdev                     -
## devmax                   -
## eps                      -
## big                      -
## mnlam                    -
## pmin                     -
## exmx                     -
## prec                     -
## mxit                     -
## offset                   -
## type.gaussian            -
## relax                    -

ps2=makeParamSet(makeNumericParam("s",0,100))

kfold3=makeResampleDesc("CV",iters=10)

tc=makeTuneControlGrid()


lrn.elasticnet.impute.tune=makeTuneWrapper(learner = lrn.elasticnet.impute,par.set = ps2,control = tc,resampling = kfold3)

cv=resample(task,learner=lrn.elasticnet.impute.tune,resampling = kfold3 ,rmse)


tp.elasticnet=tuneParams(task,learner=lrn.elasticnet.impute.tune,resampling = kfold3 ,par.set = ps2,control = tc,rmse)



model.elastic=train(learner=lrn.elasticnet.impute.tune,task)

elastic.predict=predict(model.elastic,task)

performance(elastic.predict,rmse)

##     rmse 
## 20.46227

cv

## Resample Result
## Task: air
## Learner: elastic.imputed.tuned
## Aggr perf: rmse.test.rmse=21.8699670
## Runtime: 31.1308

task

## Supervised task: air
## Type: regr
## Target: Ozone
## Observations: 116
## Features:
##    numerics     factors     ordered functionals 
##           5           0           0           0 
## Missings: TRUE
## Has weights: FALSE
## Has blocking: FALSE
## Has coordinates: FALSE

lrn.knn=makeLearner("regr.kknn")

imputeMethod=imputeLearner("regr.rpart")

lrn.knn.imp=makeImputeWrapper(learner = lrn.knn,classes = list(numeric=imputeMethod,integer=imputeMethod))

getParamSet(lrn.knn.imp)

##              Type len     Def                                   Constr Req
## k         integer   -       7                                 1 to Inf   -
## distance  numeric   -       2                                 0 to Inf   -
## kernel   discrete   - optimal rectangular,triangular,epanechnikov,b...   -
## scale     logical   -    TRUE                                        -   -
##          Tunable Trafo
## k           TRUE     -
## distance    TRUE     -
## kernel      TRUE     -
## scale       TRUE     -

ps=makeParamSet(
  makeIntegerParam("k",1,30)
  
)

kfold=makeResampleDesc("CV",iters=10)


sc=makeTuneControlGrid()



lrn.knn.imp.tune=makeTuneWrapper(learner=lrn.knn.imp,resampling=kfold,control=sc,par.set = ps)



cv=resample(learner = lrn.knn.imp.tune,task=task,resampling = kfold,rmse)


cv

## Resample Result
## Task: air
## Learner: regr.kknn.imputed.tuned
## Aggr perf: rmse.test.rmse=19.4788632
## Runtime: 29.2668

tp=tuneParams(learner=lrn.knn.imp,resampling=kfold,control=sc,par.set = ps,task=task,rmse)





model.kknn=train(learner = lrn.knn.imp.tune,task = task)

predict.knn=predict(model.kknn,task)

performance(predict.knn,rmse)

##    rmse 
## 12.7292

task

## Supervised task: air
## Type: regr
## Target: Ozone
## Observations: 116
## Features:
##    numerics     factors     ordered functionals 
##           5           0           0           0 
## Missings: TRUE
## Has weights: FALSE
## Has blocking: FALSE
## Has coordinates: FALSE

lrn.rand=makeLearner("regr.randomForest")


impute=imputeLearner("regr.rpart")

lrn.rand.impute=makeImputeWrapper(learner = lrn.rand,classes = list(integer=impute))


getParamSet(lrn.rand.impute)

##                      Type  len   Def                 Constr Req Tunable Trafo
## ntree             integer    -   500               1 to Inf   -    TRUE     -
## se.ntree          integer    -   100               1 to Inf   Y    TRUE     -
## se.method        discrete    -    sd bootstrap,jackknife,sd   Y    TRUE     -
## se.boot           integer    -    50               1 to Inf   -    TRUE     -
## mtry              integer    -     -               1 to Inf   -    TRUE     -
## replace           logical    -  TRUE                      -   -    TRUE     -
## strata            untyped    -     -                      -   -   FALSE     -
## sampsize    integervector <NA>     -               1 to Inf   -    TRUE     -
## nodesize          integer    -     5               1 to Inf   -    TRUE     -
## maxnodes          integer    -     -               1 to Inf   -    TRUE     -
## importance        logical    - FALSE                      -   -    TRUE     -
## localImp          logical    - FALSE                      -   -    TRUE     -
## nPerm             integer    -     1            -Inf to Inf   -    TRUE     -
## proximity         logical    - FALSE                      -   -   FALSE     -
## oob.prox          logical    -     -                      -   Y   FALSE     -
## do.trace          logical    - FALSE                      -   -   FALSE     -
## keep.forest       logical    -  TRUE                      -   -   FALSE     -
## keep.inbag        logical    - FALSE                      -   -   FALSE     -

ps=makeParamSet(
  makeIntegerParam("ntree",30,30),
  makeIntegerParam("mtry",1,30),
  makeIntegerParam("nodesize",5,10),
  makeIntegerParam("maxnodes",1,30)
)


kfold=makeResampleDesc("CV",iters=5)


sc=makeTuneControlRandom(maxit = 10)



lrn.rnd.impute.tune=makeTuneWrapper(learner = lrn.rand.impute,kfold,par.set = ps,control = sc)



resample(lrn.rnd.impute.tune,task,kfold,rmse)

## Resample Result
## Task: air
## Learner: regr.randomForest.imputed.tuned
## Aggr perf: rmse.test.rmse=18.9157454
## Runtime: 7.37146

task

## Supervised task: air
## Type: regr
## Target: Ozone
## Observations: 116
## Features:
##    numerics     factors     ordered functionals 
##           5           0           0           0 
## Missings: TRUE
## Has weights: FALSE
## Has blocking: FALSE
## Has coordinates: FALSE

lrn.xg=makeLearner("regr.xgboost")

method=imputeLearner("regr.rpart")


lrn.xg.imp=makeImputeWrapper(learner = lrn.xg , classes=list(integer=method))



getParamSet(lrn.xg.imp)

##                                 Type  len             Def
## booster                     discrete    -          gbtree
## watchlist                    untyped    -          <NULL>
## eta                          numeric    -             0.3
## gamma                        numeric    -               0
## max_depth                    integer    -               6
## min_child_weight             numeric    -               1
## subsample                    numeric    -               1
## colsample_bytree             numeric    -               1
## colsample_bylevel            numeric    -               1
## colsample_bynode             numeric    -               1
## num_parallel_tree            integer    -               1
## lambda                       numeric    -               1
## lambda_bias                  numeric    -               0
## alpha                        numeric    -               0
## objective                    untyped    - reg:squarede...
## eval_metric                  untyped    -            rmse
## base_score                   numeric    -             0.5
## max_delta_step               numeric    -               0
## missing                      numeric    -                
## monotone_constraints   integervector <NA>               0
## tweedie_variance_power       numeric    -             1.5
## nthread                      integer    -               -
## nrounds                      integer    -               -
## feval                        untyped    -          <NULL>
## verbose                      integer    -               1
## print_every_n                integer    -               1
## early_stopping_rounds        integer    -          <NULL>
## maximize                     logical    -          <NULL>
## sample_type                 discrete    -         uniform
## normalize_type              discrete    -            tree
## rate_drop                    numeric    -               0
## skip_drop                    numeric    -               0
## scale_pos_weight             numeric    -               1
## refresh_leaf                 logical    -            TRUE
## feature_selector            discrete    -          cyclic
## top_k                        integer    -               0
## predictor                   discrete    -   cpu_predictor
## updater                      untyped    -               -
## sketch_eps                   numeric    -            0.03
## one_drop                     logical    -           FALSE
## tree_method                 discrete    -            auto
## grow_policy                 discrete    -       depthwise
## max_leaves                   integer    -               0
## max_bin                      integer    -             256
## callbacks                    untyped    -          list()
##                                                      Constr Req Tunable Trafo
## booster                                gbtree,gblinear,dart   -    TRUE     -
## watchlist                                                 -   -   FALSE     -
## eta                                                  0 to 1   -    TRUE     -
## gamma                                              0 to Inf   -    TRUE     -
## max_depth                                          0 to Inf   -    TRUE     -
## min_child_weight                                   0 to Inf   -    TRUE     -
## subsample                                            0 to 1   -    TRUE     -
## colsample_bytree                                     0 to 1   -    TRUE     -
## colsample_bylevel                                    0 to 1   -    TRUE     -
## colsample_bynode                                     0 to 1   -    TRUE     -
## num_parallel_tree                                  1 to Inf   -    TRUE     -
## lambda                                             0 to Inf   -    TRUE     -
## lambda_bias                                        0 to Inf   -    TRUE     -
## alpha                                              0 to Inf   -    TRUE     -
## objective                                                 -   -   FALSE     -
## eval_metric                                               -   -   FALSE     -
## base_score                                      -Inf to Inf   -   FALSE     -
## max_delta_step                                     0 to Inf   -    TRUE     -
## missing                                         -Inf to Inf   -   FALSE     -
## monotone_constraints                                -1 to 1   -    TRUE     -
## tweedie_variance_power                               1 to 2   Y    TRUE     -
## nthread                                            1 to Inf   -   FALSE     -
## nrounds                                            1 to Inf   -    TRUE     -
## feval                                                     -   -   FALSE     -
## verbose                                              0 to 2   -   FALSE     -
## print_every_n                                      1 to Inf   Y   FALSE     -
## early_stopping_rounds                              1 to Inf   -   FALSE     -
## maximize                                                  -   -   FALSE     -
## sample_type                                uniform,weighted   Y    TRUE     -
## normalize_type                                  tree,forest   Y    TRUE     -
## rate_drop                                            0 to 1   Y    TRUE     -
## skip_drop                                            0 to 1   Y    TRUE     -
## scale_pos_weight                                -Inf to Inf   -    TRUE     -
## refresh_leaf                                              -   -    TRUE     -
## feature_selector       cyclic,shuffle,random,greedy,thrifty   -    TRUE     -
## top_k                                              0 to Inf   -    TRUE     -
## predictor                       cpu_predictor,gpu_predictor   -    TRUE     -
## updater                                                   -   -    TRUE     -
## sketch_eps                                           0 to 1   -    TRUE     -
## one_drop                                                  -   Y    TRUE     -
## tree_method                 auto,exact,approx,hist,gpu_hist   Y    TRUE     -
## grow_policy                             depthwise,lossguide   Y    TRUE     -
## max_leaves                                         0 to Inf   Y    TRUE     -
## max_bin                                            2 to Inf   Y    TRUE     -
## callbacks                                                 -   -   FALSE     -

ps=makeParamSet(
  makeNumericParam("eta",0.3,0.5),
  makeNumericParam("gamma",0,0.5),
  makeIntegerParam("max_depth",6,12),
  makeNumericParam("min_child_weight",1,10),
  makeNumericParam("subsample",0.5,1),
  makeIntegerParam("nrounds",30,30),
  makeNumericParam("colsample_bytree",0.5,1)
)

sc=makeTuneControlRandom(maxit = 10)


kfold=makeResampleDesc("CV",iters=10)


lrn.xg.imp.tune=makeTuneWrapper(learner = lrn.xg.imp,par.set = ps,control = sc,resampling = kfold)


resample(lrn.xg.imp.tune,task,rmse,resampling=kfold)

## Resample Result
## Task: air
## Learner: regr.xgboost.imputed.tuned
## Aggr perf: rmse.test.rmse=19.2146588
## Runtime: 71.9593

model optimization

omon das

2023-12-05