Team member: Priyanka Veeranki Lixia Chen Amod Panchal Ikeahor Ezewele

Load packages

library(mlr)
## Warning: package 'mlr' was built under R version 4.2.1
## Loading required package: ParamHelpers
## Warning: package 'ParamHelpers' was built under R version 4.2.1
## Warning message: 'mlr' is in 'maintenance-only' mode since July 2019.
## Future development will only happen in 'mlr3'
## (<https://mlr3.mlr-org.com>). Due to the focus on 'mlr3' there might be
## uncaught bugs meanwhile in {mlr} - please consider switching.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.1
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Load data

load train dataset

santander.train.df <- read.csv("C:/Users/Public/train.csv")
head(santander.train.df,20)
t(t(names(santander.train.df)))
##        [,1]     
##   [1,] "ID_code"
##   [2,] "target" 
##   [3,] "var_0"  
##   [4,] "var_1"  
##   [5,] "var_2"  
##   [6,] "var_3"  
##   [7,] "var_4"  
##   [8,] "var_5"  
##   [9,] "var_6"  
##  [10,] "var_7"  
##  [11,] "var_8"  
##  [12,] "var_9"  
##  [13,] "var_10" 
##  [14,] "var_11" 
##  [15,] "var_12" 
##  [16,] "var_13" 
##  [17,] "var_14" 
##  [18,] "var_15" 
##  [19,] "var_16" 
##  [20,] "var_17" 
##  [21,] "var_18" 
##  [22,] "var_19" 
##  [23,] "var_20" 
##  [24,] "var_21" 
##  [25,] "var_22" 
##  [26,] "var_23" 
##  [27,] "var_24" 
##  [28,] "var_25" 
##  [29,] "var_26" 
##  [30,] "var_27" 
##  [31,] "var_28" 
##  [32,] "var_29" 
##  [33,] "var_30" 
##  [34,] "var_31" 
##  [35,] "var_32" 
##  [36,] "var_33" 
##  [37,] "var_34" 
##  [38,] "var_35" 
##  [39,] "var_36" 
##  [40,] "var_37" 
##  [41,] "var_38" 
##  [42,] "var_39" 
##  [43,] "var_40" 
##  [44,] "var_41" 
##  [45,] "var_42" 
##  [46,] "var_43" 
##  [47,] "var_44" 
##  [48,] "var_45" 
##  [49,] "var_46" 
##  [50,] "var_47" 
##  [51,] "var_48" 
##  [52,] "var_49" 
##  [53,] "var_50" 
##  [54,] "var_51" 
##  [55,] "var_52" 
##  [56,] "var_53" 
##  [57,] "var_54" 
##  [58,] "var_55" 
##  [59,] "var_56" 
##  [60,] "var_57" 
##  [61,] "var_58" 
##  [62,] "var_59" 
##  [63,] "var_60" 
##  [64,] "var_61" 
##  [65,] "var_62" 
##  [66,] "var_63" 
##  [67,] "var_64" 
##  [68,] "var_65" 
##  [69,] "var_66" 
##  [70,] "var_67" 
##  [71,] "var_68" 
##  [72,] "var_69" 
##  [73,] "var_70" 
##  [74,] "var_71" 
##  [75,] "var_72" 
##  [76,] "var_73" 
##  [77,] "var_74" 
##  [78,] "var_75" 
##  [79,] "var_76" 
##  [80,] "var_77" 
##  [81,] "var_78" 
##  [82,] "var_79" 
##  [83,] "var_80" 
##  [84,] "var_81" 
##  [85,] "var_82" 
##  [86,] "var_83" 
##  [87,] "var_84" 
##  [88,] "var_85" 
##  [89,] "var_86" 
##  [90,] "var_87" 
##  [91,] "var_88" 
##  [92,] "var_89" 
##  [93,] "var_90" 
##  [94,] "var_91" 
##  [95,] "var_92" 
##  [96,] "var_93" 
##  [97,] "var_94" 
##  [98,] "var_95" 
##  [99,] "var_96" 
## [100,] "var_97" 
## [101,] "var_98" 
## [102,] "var_99" 
## [103,] "var_100"
## [104,] "var_101"
## [105,] "var_102"
## [106,] "var_103"
## [107,] "var_104"
## [108,] "var_105"
## [109,] "var_106"
## [110,] "var_107"
## [111,] "var_108"
## [112,] "var_109"
## [113,] "var_110"
## [114,] "var_111"
## [115,] "var_112"
## [116,] "var_113"
## [117,] "var_114"
## [118,] "var_115"
## [119,] "var_116"
## [120,] "var_117"
## [121,] "var_118"
## [122,] "var_119"
## [123,] "var_120"
## [124,] "var_121"
## [125,] "var_122"
## [126,] "var_123"
## [127,] "var_124"
## [128,] "var_125"
## [129,] "var_126"
## [130,] "var_127"
## [131,] "var_128"
## [132,] "var_129"
## [133,] "var_130"
## [134,] "var_131"
## [135,] "var_132"
## [136,] "var_133"
## [137,] "var_134"
## [138,] "var_135"
## [139,] "var_136"
## [140,] "var_137"
## [141,] "var_138"
## [142,] "var_139"
## [143,] "var_140"
## [144,] "var_141"
## [145,] "var_142"
## [146,] "var_143"
## [147,] "var_144"
## [148,] "var_145"
## [149,] "var_146"
## [150,] "var_147"
## [151,] "var_148"
## [152,] "var_149"
## [153,] "var_150"
## [154,] "var_151"
## [155,] "var_152"
## [156,] "var_153"
## [157,] "var_154"
## [158,] "var_155"
## [159,] "var_156"
## [160,] "var_157"
## [161,] "var_158"
## [162,] "var_159"
## [163,] "var_160"
## [164,] "var_161"
## [165,] "var_162"
## [166,] "var_163"
## [167,] "var_164"
## [168,] "var_165"
## [169,] "var_166"
## [170,] "var_167"
## [171,] "var_168"
## [172,] "var_169"
## [173,] "var_170"
## [174,] "var_171"
## [175,] "var_172"
## [176,] "var_173"
## [177,] "var_174"
## [178,] "var_175"
## [179,] "var_176"
## [180,] "var_177"
## [181,] "var_178"
## [182,] "var_179"
## [183,] "var_180"
## [184,] "var_181"
## [185,] "var_182"
## [186,] "var_183"
## [187,] "var_184"
## [188,] "var_185"
## [189,] "var_186"
## [190,] "var_187"
## [191,] "var_188"
## [192,] "var_189"
## [193,] "var_190"
## [194,] "var_191"
## [195,] "var_192"
## [196,] "var_193"
## [197,] "var_194"
## [198,] "var_195"
## [199,] "var_196"
## [200,] "var_197"
## [201,] "var_198"
## [202,] "var_199"

loading data for train data set

load test dataset

santander.test.df <- read.csv("C:/Users/Public/test.csv")
head(santander.test.df,10)
t(t(names(santander.test.df)))
##        [,1]     
##   [1,] "ID_code"
##   [2,] "var_0"  
##   [3,] "var_1"  
##   [4,] "var_2"  
##   [5,] "var_3"  
##   [6,] "var_4"  
##   [7,] "var_5"  
##   [8,] "var_6"  
##   [9,] "var_7"  
##  [10,] "var_8"  
##  [11,] "var_9"  
##  [12,] "var_10" 
##  [13,] "var_11" 
##  [14,] "var_12" 
##  [15,] "var_13" 
##  [16,] "var_14" 
##  [17,] "var_15" 
##  [18,] "var_16" 
##  [19,] "var_17" 
##  [20,] "var_18" 
##  [21,] "var_19" 
##  [22,] "var_20" 
##  [23,] "var_21" 
##  [24,] "var_22" 
##  [25,] "var_23" 
##  [26,] "var_24" 
##  [27,] "var_25" 
##  [28,] "var_26" 
##  [29,] "var_27" 
##  [30,] "var_28" 
##  [31,] "var_29" 
##  [32,] "var_30" 
##  [33,] "var_31" 
##  [34,] "var_32" 
##  [35,] "var_33" 
##  [36,] "var_34" 
##  [37,] "var_35" 
##  [38,] "var_36" 
##  [39,] "var_37" 
##  [40,] "var_38" 
##  [41,] "var_39" 
##  [42,] "var_40" 
##  [43,] "var_41" 
##  [44,] "var_42" 
##  [45,] "var_43" 
##  [46,] "var_44" 
##  [47,] "var_45" 
##  [48,] "var_46" 
##  [49,] "var_47" 
##  [50,] "var_48" 
##  [51,] "var_49" 
##  [52,] "var_50" 
##  [53,] "var_51" 
##  [54,] "var_52" 
##  [55,] "var_53" 
##  [56,] "var_54" 
##  [57,] "var_55" 
##  [58,] "var_56" 
##  [59,] "var_57" 
##  [60,] "var_58" 
##  [61,] "var_59" 
##  [62,] "var_60" 
##  [63,] "var_61" 
##  [64,] "var_62" 
##  [65,] "var_63" 
##  [66,] "var_64" 
##  [67,] "var_65" 
##  [68,] "var_66" 
##  [69,] "var_67" 
##  [70,] "var_68" 
##  [71,] "var_69" 
##  [72,] "var_70" 
##  [73,] "var_71" 
##  [74,] "var_72" 
##  [75,] "var_73" 
##  [76,] "var_74" 
##  [77,] "var_75" 
##  [78,] "var_76" 
##  [79,] "var_77" 
##  [80,] "var_78" 
##  [81,] "var_79" 
##  [82,] "var_80" 
##  [83,] "var_81" 
##  [84,] "var_82" 
##  [85,] "var_83" 
##  [86,] "var_84" 
##  [87,] "var_85" 
##  [88,] "var_86" 
##  [89,] "var_87" 
##  [90,] "var_88" 
##  [91,] "var_89" 
##  [92,] "var_90" 
##  [93,] "var_91" 
##  [94,] "var_92" 
##  [95,] "var_93" 
##  [96,] "var_94" 
##  [97,] "var_95" 
##  [98,] "var_96" 
##  [99,] "var_97" 
## [100,] "var_98" 
## [101,] "var_99" 
## [102,] "var_100"
## [103,] "var_101"
## [104,] "var_102"
## [105,] "var_103"
## [106,] "var_104"
## [107,] "var_105"
## [108,] "var_106"
## [109,] "var_107"
## [110,] "var_108"
## [111,] "var_109"
## [112,] "var_110"
## [113,] "var_111"
## [114,] "var_112"
## [115,] "var_113"
## [116,] "var_114"
## [117,] "var_115"
## [118,] "var_116"
## [119,] "var_117"
## [120,] "var_118"
## [121,] "var_119"
## [122,] "var_120"
## [123,] "var_121"
## [124,] "var_122"
## [125,] "var_123"
## [126,] "var_124"
## [127,] "var_125"
## [128,] "var_126"
## [129,] "var_127"
## [130,] "var_128"
## [131,] "var_129"
## [132,] "var_130"
## [133,] "var_131"
## [134,] "var_132"
## [135,] "var_133"
## [136,] "var_134"
## [137,] "var_135"
## [138,] "var_136"
## [139,] "var_137"
## [140,] "var_138"
## [141,] "var_139"
## [142,] "var_140"
## [143,] "var_141"
## [144,] "var_142"
## [145,] "var_143"
## [146,] "var_144"
## [147,] "var_145"
## [148,] "var_146"
## [149,] "var_147"
## [150,] "var_148"
## [151,] "var_149"
## [152,] "var_150"
## [153,] "var_151"
## [154,] "var_152"
## [155,] "var_153"
## [156,] "var_154"
## [157,] "var_155"
## [158,] "var_156"
## [159,] "var_157"
## [160,] "var_158"
## [161,] "var_159"
## [162,] "var_160"
## [163,] "var_161"
## [164,] "var_162"
## [165,] "var_163"
## [166,] "var_164"
## [167,] "var_165"
## [168,] "var_166"
## [169,] "var_167"
## [170,] "var_168"
## [171,] "var_169"
## [172,] "var_170"
## [173,] "var_171"
## [174,] "var_172"
## [175,] "var_173"
## [176,] "var_174"
## [177,] "var_175"
## [178,] "var_176"
## [179,] "var_177"
## [180,] "var_178"
## [181,] "var_179"
## [182,] "var_180"
## [183,] "var_181"
## [184,] "var_182"
## [185,] "var_183"
## [186,] "var_184"
## [187,] "var_185"
## [188,] "var_186"
## [189,] "var_187"
## [190,] "var_188"
## [191,] "var_189"
## [192,] "var_190"
## [193,] "var_191"
## [194,] "var_192"
## [195,] "var_193"
## [196,] "var_194"
## [197,] "var_195"
## [198,] "var_196"
## [199,] "var_197"
## [200,] "var_198"
## [201,] "var_199"
santander.test.df

loading data for testing

Creating a task and learner

traintask <- makeClassifTask(data = santander.train.df,target = "ID_code")
svm <- makeLearner("classif.svm")

we provide “classif.svm” as an argument for Learner() to declare that we will use SVM.

Printing available SVM parameters

getParamSet("classif.svm")
##                        Type  len             Def
## type               discrete    - C-classifica...
## cost                numeric    -               1
## nu                  numeric    -             0.5
## class.weights numericvector <NA>               -
## kernel             discrete    -          radial
## degree              integer    -               3
## coef0               numeric    -               0
## gamma               numeric    -               -
## cachesize           numeric    -              40
## tolerance           numeric    -           0.001
## shrinking           logical    -            TRUE
## cross               integer    -               0
## fitted              logical    -            TRUE
## scale         logicalvector <NA>            TRUE
##                                           Constr Req Tunable Trafo
## type          C-classification,nu-classification   -    TRUE     -
## cost                                    0 to Inf   Y    TRUE     -
## nu                                   -Inf to Inf   Y    TRUE     -
## class.weights                           0 to Inf   -    TRUE     -
## kernel          linear,polynomial,radial,sigmoid   -    TRUE     -
## degree                                  1 to Inf   Y    TRUE     -
## coef0                                -Inf to Inf   Y    TRUE     -
## gamma                                   0 to Inf   Y    TRUE     -
## cachesize                            -Inf to Inf   -    TRUE     -
## tolerance                               0 to Inf   -    TRUE     -
## shrinking                                      -   -    TRUE     -
## cross                                   0 to Inf   -   FALSE     -
## fitted                                         -   -   FALSE     -
## scale                                          -   -    TRUE     -

we need to change our hyperparameter. To find out what it is hyperparameters in the optimization algorithm, we pass the names algorithm and input to getParamSet().

Extracting possible variables from hyperparameter

getParamSet("classif.svm")$pars$kernel$values
## $linear
## [1] "linear"
## 
## $polynomial
## [1] "polynomial"
## 
## $radial
## [1] "radial"
## 
## $sigmoid
## [1] "sigmoid"

To derive the possible hyperparameter values, add the $value to the call. For example, the getParamSet() extracts the kernel function

Defining the hyperparameter space for tuning

kernels <- c("polynomial", "radial", "sigmoid")
svmParamSpace <- makeParamSet(
makeDiscreteParam("kernel", values = kernels),
makeIntegerParam("degree", lower = 1, upper = 3),
makeNumericParam("cost", lower = 0.1, upper = 10),
makeNumericParam("gamma", lower = 0.1, 10))

Defining the random search

randSearch <- makeTuneControlRandom(maxit = 20)
cvForTuning <- makeResampleDesc("Holdout", split = 2/3)

The search grid is good because as long as you define a reasonable hyperparameter space. The question above always finds the best hyperparameters. But look We have defined a hyperparameter space for our SVM. If you want to test the example price and hyperparameter gamma from 0.1 to 10, in the order of 0.1 (ie 100 values). We test three kernel functions and three step hyperparameter values. Making a grid through this space requires discipline example 90,000 times! In which cases, if there is time, patience and budget to search for such a network.gris search is very helpful in reduction of the cost function.