Задание 1

Установить пакет CARET, выполнить команду names(getModelInfo()), ознакомиться со списком доступных методов выбора признаков. Выполните графический разведочный анализ данных с использование функции featurePlot() для набора данных из справочного файла пакета CARET:

Устанавливаем пакет caret

if(!require(caret)) 
{ 
  install.packages("caret") 
  library(caret) 
}
## Загрузка требуемого пакета: caret
## Загрузка требуемого пакета: ggplot2
## Загрузка требуемого пакета: lattice

Отображаем список методов

names(getModelInfo())
##   [1] "ada"                 "AdaBag"              "AdaBoost.M1"        
##   [4] "adaboost"            "amdai"               "ANFIS"              
##   [7] "avNNet"              "awnb"                "awtan"              
##  [10] "bag"                 "bagEarth"            "bagEarthGCV"        
##  [13] "bagFDA"              "bagFDAGCV"           "bam"                
##  [16] "bartMachine"         "bayesglm"            "binda"              
##  [19] "blackboost"          "blasso"              "blassoAveraged"     
##  [22] "bridge"              "brnn"                "BstLm"              
##  [25] "bstSm"               "bstTree"             "C5.0"               
##  [28] "C5.0Cost"            "C5.0Rules"           "C5.0Tree"           
##  [31] "cforest"             "chaid"               "CSimca"             
##  [34] "ctree"               "ctree2"              "cubist"             
##  [37] "dda"                 "deepboost"           "DENFIS"             
##  [40] "dnn"                 "dwdLinear"           "dwdPoly"            
##  [43] "dwdRadial"           "earth"               "elm"                
##  [46] "enet"                "evtree"              "extraTrees"         
##  [49] "fda"                 "FH.GBML"             "FIR.DM"             
##  [52] "foba"                "FRBCS.CHI"           "FRBCS.W"            
##  [55] "FS.HGD"              "gam"                 "gamboost"           
##  [58] "gamLoess"            "gamSpline"           "gaussprLinear"      
##  [61] "gaussprPoly"         "gaussprRadial"       "gbm_h2o"            
##  [64] "gbm"                 "gcvEarth"            "GFS.FR.MOGUL"       
##  [67] "GFS.LT.RS"           "GFS.THRIFT"          "glm.nb"             
##  [70] "glm"                 "glmboost"            "glmnet_h2o"         
##  [73] "glmnet"              "glmStepAIC"          "gpls"               
##  [76] "hda"                 "hdda"                "hdrda"              
##  [79] "HYFIS"               "icr"                 "J48"                
##  [82] "JRip"                "kernelpls"           "kknn"               
##  [85] "knn"                 "krlsPoly"            "krlsRadial"         
##  [88] "lars"                "lars2"               "lasso"              
##  [91] "lda"                 "lda2"                "leapBackward"       
##  [94] "leapForward"         "leapSeq"             "Linda"              
##  [97] "lm"                  "lmStepAIC"           "LMT"                
## [100] "loclda"              "logicBag"            "LogitBoost"         
## [103] "logreg"              "lssvmLinear"         "lssvmPoly"          
## [106] "lssvmRadial"         "lvq"                 "M5"                 
## [109] "M5Rules"             "manb"                "mda"                
## [112] "Mlda"                "mlp"                 "mlpKerasDecay"      
## [115] "mlpKerasDecayCost"   "mlpKerasDropout"     "mlpKerasDropoutCost"
## [118] "mlpML"               "mlpSGD"              "mlpWeightDecay"     
## [121] "mlpWeightDecayML"    "monmlp"              "msaenet"            
## [124] "multinom"            "mxnet"               "mxnetAdam"          
## [127] "naive_bayes"         "nb"                  "nbDiscrete"         
## [130] "nbSearch"            "neuralnet"           "nnet"               
## [133] "nnls"                "nodeHarvest"         "null"               
## [136] "OneR"                "ordinalNet"          "ordinalRF"          
## [139] "ORFlog"              "ORFpls"              "ORFridge"           
## [142] "ORFsvm"              "ownn"                "pam"                
## [145] "parRF"               "PART"                "partDSA"            
## [148] "pcaNNet"             "pcr"                 "pda"                
## [151] "pda2"                "penalized"           "PenalizedLDA"       
## [154] "plr"                 "pls"                 "plsRglm"            
## [157] "polr"                "ppr"                 "pre"                
## [160] "PRIM"                "protoclass"          "qda"                
## [163] "QdaCov"              "qrf"                 "qrnn"               
## [166] "randomGLM"           "ranger"              "rbf"                
## [169] "rbfDDA"              "Rborist"             "rda"                
## [172] "regLogistic"         "relaxo"              "rf"                 
## [175] "rFerns"              "RFlda"               "rfRules"            
## [178] "ridge"               "rlda"                "rlm"                
## [181] "rmda"                "rocc"                "rotationForest"     
## [184] "rotationForestCp"    "rpart"               "rpart1SE"           
## [187] "rpart2"              "rpartCost"           "rpartScore"         
## [190] "rqlasso"             "rqnc"                "RRF"                
## [193] "RRFglobal"           "rrlda"               "RSimca"             
## [196] "rvmLinear"           "rvmPoly"             "rvmRadial"          
## [199] "SBC"                 "sda"                 "sdwd"               
## [202] "simpls"              "SLAVE"               "slda"               
## [205] "smda"                "snn"                 "sparseLDA"          
## [208] "spikeslab"           "spls"                "stepLDA"            
## [211] "stepQDA"             "superpc"             "svmBoundrangeString"
## [214] "svmExpoString"       "svmLinear"           "svmLinear2"         
## [217] "svmLinear3"          "svmLinearWeights"    "svmLinearWeights2"  
## [220] "svmPoly"             "svmRadial"           "svmRadialCost"      
## [223] "svmRadialSigma"      "svmRadialWeights"    "svmSpectrumString"  
## [226] "tan"                 "tanSearch"           "treebag"            
## [229] "vbmpRadial"          "vglmAdjCat"          "vglmContRatio"      
## [232] "vglmCumulative"      "widekernelpls"       "WM"                 
## [235] "wsrf"                "xgbDART"             "xgbLinear"          
## [238] "xgbTree"             "xyf"

Генерируем данные

x <- matrix(rnorm(50*5), ncol = 5) 
y <- factor(rep(c("A", "B"), 25)) 
head(x)
##            [,1]       [,2]       [,3]       [,4]       [,5]
## [1,]  1.0366083  0.8278346  0.3491889 -0.9013591  1.1550878
## [2,]  1.2050843  0.8770722 -1.0615512  1.3692724 -1.8927438
## [3,] -1.6046110 -1.0601709  1.0693973  0.3977903 -0.5249970
## [4,]  0.2191387 -0.9584352 -0.2287909  0.7383253  0.9840672
## [5,]  0.9809709 -2.3513764 -1.4742715 -0.6477671  1.5500128
## [6,] -0.3032803 -0.1480190  0.3401708 -1.3347801 -0.7332655
head(y)
## [1] A B A B A B
## Levels: A B

Рисуем графики

featurePlot(x, y, plot = "density")

featurePlot(x, y, plot = "box")

featurePlot(x, y, plot = "pairs")

featurePlot(x, y, plot = "strip")

Сохраняем графики в файлы

jpeg("plot_density.jpg")
featurePlot(x, y, plot = "density")
jpeg("plot_box.jpg")
featurePlot(x, y, plot = "box")
jpeg("plot_pairs.jpg")
featurePlot(x, y, plot = "pairs")
jpeg("plot_strip.jpg")
featurePlot(x, y, plot = "strip")

Вывод

Плотностные графики для всех признаков показывают, что данные генерируются с нормальным распределением, что неудивительно, учитывая использование функции rnorm(). Так как данные были сгенерированы случайным образом, различие между классами A и B незначительно или вовсе отсутствует. Это видно по сильному перекрытию распределений для всех признаков (V1–V5).


Задание 2

С использование функций из пакета Fselector [2] определить важность признаков для решения задачи классификации. Использовать набор data(iris). Сделать выводы.

Устанавливаем пакет FSelector

if(!require(FSelector)) {
  install.packages("FSelector")
  library(FSelector)
}
## Загрузка требуемого пакета: FSelector

Загружаем данные

data(iris)

Вычисляем информационную ценность каждого признака

gain <- information.gain(Species ~ ., data = iris)
print(gain)
##              attr_importance
## Sepal.Length       0.4521286
## Sepal.Width        0.2672750
## Petal.Length       0.9402853
## Petal.Width        0.9554360

Вывод

Признаки Petal.Length и Petal.Width являются более значимыми


Задание 3

С использованием функции discretize() из пакета arules выполните преобразование непрерывной переменной в категориальную [3] различными методами: «interval» (равная ширина интервала), «frequency» (равная частота), «cluster» (кластеризация) и «fixed» (категории задают границы интервалов). Используйте набор данных iris. Сделайте выводы Устанавливаем пакет arules

Устанавливаем пакет Arules

if(!require(arules)) {
  install.packages("arules")
  library(arules)
}
## Загрузка требуемого пакета: arules
## Загрузка требуемого пакета: Matrix
## 
## Присоединяю пакет: 'arules'
## Следующие объекты скрыты от 'package:base':
## 
##     abbreviate, write

Загружаем данные

data(iris)

Дискретизация и просмотр результатов # Method 1: interval

iris$Petal.Length.interval <- discretize(iris$Petal.Length, method = "interval")

# Method 2: frequency
iris$Petal.Length.frequency <- discretize(iris$Petal.Length, method = "frequency")

# Method 3: cluster
iris$Petal.Length.cluster <- discretize(iris$Petal.Length, method = "cluster")

# Method 4: fixed
iris$Petal.Length.fixed <- discretize(iris$Petal.Length, method = "fixed", c(0, 2, 4, 6, 8))

# Просмотр результатов
print(iris[, c("Petal.Length", "Petal.Length.interval", "Petal.Length.frequency", "Petal.Length.cluster", "Petal.Length.fixed")])
##     Petal.Length Petal.Length.interval Petal.Length.frequency
## 1            1.4              [1,2.97)               [1,2.63)
## 2            1.4              [1,2.97)               [1,2.63)
## 3            1.3              [1,2.97)               [1,2.63)
## 4            1.5              [1,2.97)               [1,2.63)
## 5            1.4              [1,2.97)               [1,2.63)
## 6            1.7              [1,2.97)               [1,2.63)
## 7            1.4              [1,2.97)               [1,2.63)
## 8            1.5              [1,2.97)               [1,2.63)
## 9            1.4              [1,2.97)               [1,2.63)
## 10           1.5              [1,2.97)               [1,2.63)
## 11           1.5              [1,2.97)               [1,2.63)
## 12           1.6              [1,2.97)               [1,2.63)
## 13           1.4              [1,2.97)               [1,2.63)
## 14           1.1              [1,2.97)               [1,2.63)
## 15           1.2              [1,2.97)               [1,2.63)
## 16           1.5              [1,2.97)               [1,2.63)
## 17           1.3              [1,2.97)               [1,2.63)
## 18           1.4              [1,2.97)               [1,2.63)
## 19           1.7              [1,2.97)               [1,2.63)
## 20           1.5              [1,2.97)               [1,2.63)
## 21           1.7              [1,2.97)               [1,2.63)
## 22           1.5              [1,2.97)               [1,2.63)
## 23           1.0              [1,2.97)               [1,2.63)
## 24           1.7              [1,2.97)               [1,2.63)
## 25           1.9              [1,2.97)               [1,2.63)
## 26           1.6              [1,2.97)               [1,2.63)
## 27           1.6              [1,2.97)               [1,2.63)
## 28           1.5              [1,2.97)               [1,2.63)
## 29           1.4              [1,2.97)               [1,2.63)
## 30           1.6              [1,2.97)               [1,2.63)
## 31           1.6              [1,2.97)               [1,2.63)
## 32           1.5              [1,2.97)               [1,2.63)
## 33           1.5              [1,2.97)               [1,2.63)
## 34           1.4              [1,2.97)               [1,2.63)
## 35           1.5              [1,2.97)               [1,2.63)
## 36           1.2              [1,2.97)               [1,2.63)
## 37           1.3              [1,2.97)               [1,2.63)
## 38           1.4              [1,2.97)               [1,2.63)
## 39           1.3              [1,2.97)               [1,2.63)
## 40           1.5              [1,2.97)               [1,2.63)
## 41           1.3              [1,2.97)               [1,2.63)
## 42           1.3              [1,2.97)               [1,2.63)
## 43           1.3              [1,2.97)               [1,2.63)
## 44           1.6              [1,2.97)               [1,2.63)
## 45           1.9              [1,2.97)               [1,2.63)
## 46           1.4              [1,2.97)               [1,2.63)
## 47           1.6              [1,2.97)               [1,2.63)
## 48           1.4              [1,2.97)               [1,2.63)
## 49           1.5              [1,2.97)               [1,2.63)
## 50           1.4              [1,2.97)               [1,2.63)
## 51           4.7           [2.97,4.93)             [2.63,4.9)
## 52           4.5           [2.97,4.93)             [2.63,4.9)
## 53           4.9           [2.97,4.93)              [4.9,6.9]
## 54           4.0           [2.97,4.93)             [2.63,4.9)
## 55           4.6           [2.97,4.93)             [2.63,4.9)
## 56           4.5           [2.97,4.93)             [2.63,4.9)
## 57           4.7           [2.97,4.93)             [2.63,4.9)
## 58           3.3           [2.97,4.93)             [2.63,4.9)
## 59           4.6           [2.97,4.93)             [2.63,4.9)
## 60           3.9           [2.97,4.93)             [2.63,4.9)
## 61           3.5           [2.97,4.93)             [2.63,4.9)
## 62           4.2           [2.97,4.93)             [2.63,4.9)
## 63           4.0           [2.97,4.93)             [2.63,4.9)
## 64           4.7           [2.97,4.93)             [2.63,4.9)
## 65           3.6           [2.97,4.93)             [2.63,4.9)
## 66           4.4           [2.97,4.93)             [2.63,4.9)
## 67           4.5           [2.97,4.93)             [2.63,4.9)
## 68           4.1           [2.97,4.93)             [2.63,4.9)
## 69           4.5           [2.97,4.93)             [2.63,4.9)
## 70           3.9           [2.97,4.93)             [2.63,4.9)
## 71           4.8           [2.97,4.93)             [2.63,4.9)
## 72           4.0           [2.97,4.93)             [2.63,4.9)
## 73           4.9           [2.97,4.93)              [4.9,6.9]
## 74           4.7           [2.97,4.93)             [2.63,4.9)
## 75           4.3           [2.97,4.93)             [2.63,4.9)
## 76           4.4           [2.97,4.93)             [2.63,4.9)
## 77           4.8           [2.97,4.93)             [2.63,4.9)
## 78           5.0            [4.93,6.9]              [4.9,6.9]
## 79           4.5           [2.97,4.93)             [2.63,4.9)
## 80           3.5           [2.97,4.93)             [2.63,4.9)
## 81           3.8           [2.97,4.93)             [2.63,4.9)
## 82           3.7           [2.97,4.93)             [2.63,4.9)
## 83           3.9           [2.97,4.93)             [2.63,4.9)
## 84           5.1            [4.93,6.9]              [4.9,6.9]
## 85           4.5           [2.97,4.93)             [2.63,4.9)
## 86           4.5           [2.97,4.93)             [2.63,4.9)
## 87           4.7           [2.97,4.93)             [2.63,4.9)
## 88           4.4           [2.97,4.93)             [2.63,4.9)
## 89           4.1           [2.97,4.93)             [2.63,4.9)
## 90           4.0           [2.97,4.93)             [2.63,4.9)
## 91           4.4           [2.97,4.93)             [2.63,4.9)
## 92           4.6           [2.97,4.93)             [2.63,4.9)
## 93           4.0           [2.97,4.93)             [2.63,4.9)
## 94           3.3           [2.97,4.93)             [2.63,4.9)
## 95           4.2           [2.97,4.93)             [2.63,4.9)
## 96           4.2           [2.97,4.93)             [2.63,4.9)
## 97           4.2           [2.97,4.93)             [2.63,4.9)
## 98           4.3           [2.97,4.93)             [2.63,4.9)
## 99           3.0           [2.97,4.93)             [2.63,4.9)
## 100          4.1           [2.97,4.93)             [2.63,4.9)
## 101          6.0            [4.93,6.9]              [4.9,6.9]
## 102          5.1            [4.93,6.9]              [4.9,6.9]
## 103          5.9            [4.93,6.9]              [4.9,6.9]
## 104          5.6            [4.93,6.9]              [4.9,6.9]
## 105          5.8            [4.93,6.9]              [4.9,6.9]
## 106          6.6            [4.93,6.9]              [4.9,6.9]
## 107          4.5           [2.97,4.93)             [2.63,4.9)
## 108          6.3            [4.93,6.9]              [4.9,6.9]
## 109          5.8            [4.93,6.9]              [4.9,6.9]
## 110          6.1            [4.93,6.9]              [4.9,6.9]
## 111          5.1            [4.93,6.9]              [4.9,6.9]
## 112          5.3            [4.93,6.9]              [4.9,6.9]
## 113          5.5            [4.93,6.9]              [4.9,6.9]
## 114          5.0            [4.93,6.9]              [4.9,6.9]
## 115          5.1            [4.93,6.9]              [4.9,6.9]
## 116          5.3            [4.93,6.9]              [4.9,6.9]
## 117          5.5            [4.93,6.9]              [4.9,6.9]
## 118          6.7            [4.93,6.9]              [4.9,6.9]
## 119          6.9            [4.93,6.9]              [4.9,6.9]
## 120          5.0            [4.93,6.9]              [4.9,6.9]
## 121          5.7            [4.93,6.9]              [4.9,6.9]
## 122          4.9           [2.97,4.93)              [4.9,6.9]
## 123          6.7            [4.93,6.9]              [4.9,6.9]
## 124          4.9           [2.97,4.93)              [4.9,6.9]
## 125          5.7            [4.93,6.9]              [4.9,6.9]
## 126          6.0            [4.93,6.9]              [4.9,6.9]
## 127          4.8           [2.97,4.93)             [2.63,4.9)
## 128          4.9           [2.97,4.93)              [4.9,6.9]
## 129          5.6            [4.93,6.9]              [4.9,6.9]
## 130          5.8            [4.93,6.9]              [4.9,6.9]
## 131          6.1            [4.93,6.9]              [4.9,6.9]
## 132          6.4            [4.93,6.9]              [4.9,6.9]
## 133          5.6            [4.93,6.9]              [4.9,6.9]
## 134          5.1            [4.93,6.9]              [4.9,6.9]
## 135          5.6            [4.93,6.9]              [4.9,6.9]
## 136          6.1            [4.93,6.9]              [4.9,6.9]
## 137          5.6            [4.93,6.9]              [4.9,6.9]
## 138          5.5            [4.93,6.9]              [4.9,6.9]
## 139          4.8           [2.97,4.93)             [2.63,4.9)
## 140          5.4            [4.93,6.9]              [4.9,6.9]
## 141          5.6            [4.93,6.9]              [4.9,6.9]
## 142          5.1            [4.93,6.9]              [4.9,6.9]
## 143          5.1            [4.93,6.9]              [4.9,6.9]
## 144          5.9            [4.93,6.9]              [4.9,6.9]
## 145          5.7            [4.93,6.9]              [4.9,6.9]
## 146          5.2            [4.93,6.9]              [4.9,6.9]
## 147          5.0            [4.93,6.9]              [4.9,6.9]
## 148          5.2            [4.93,6.9]              [4.9,6.9]
## 149          5.4            [4.93,6.9]              [4.9,6.9]
## 150          5.1            [4.93,6.9]              [4.9,6.9]
##     Petal.Length.cluster Petal.Length.fixed
## 1               [1,2.95)              [0,2)
## 2               [1,2.95)              [0,2)
## 3               [1,2.95)              [0,2)
## 4               [1,2.95)              [0,2)
## 5               [1,2.95)              [0,2)
## 6               [1,2.95)              [0,2)
## 7               [1,2.95)              [0,2)
## 8               [1,2.95)              [0,2)
## 9               [1,2.95)              [0,2)
## 10              [1,2.95)              [0,2)
## 11              [1,2.95)              [0,2)
## 12              [1,2.95)              [0,2)
## 13              [1,2.95)              [0,2)
## 14              [1,2.95)              [0,2)
## 15              [1,2.95)              [0,2)
## 16              [1,2.95)              [0,2)
## 17              [1,2.95)              [0,2)
## 18              [1,2.95)              [0,2)
## 19              [1,2.95)              [0,2)
## 20              [1,2.95)              [0,2)
## 21              [1,2.95)              [0,2)
## 22              [1,2.95)              [0,2)
## 23              [1,2.95)              [0,2)
## 24              [1,2.95)              [0,2)
## 25              [1,2.95)              [0,2)
## 26              [1,2.95)              [0,2)
## 27              [1,2.95)              [0,2)
## 28              [1,2.95)              [0,2)
## 29              [1,2.95)              [0,2)
## 30              [1,2.95)              [0,2)
## 31              [1,2.95)              [0,2)
## 32              [1,2.95)              [0,2)
## 33              [1,2.95)              [0,2)
## 34              [1,2.95)              [0,2)
## 35              [1,2.95)              [0,2)
## 36              [1,2.95)              [0,2)
## 37              [1,2.95)              [0,2)
## 38              [1,2.95)              [0,2)
## 39              [1,2.95)              [0,2)
## 40              [1,2.95)              [0,2)
## 41              [1,2.95)              [0,2)
## 42              [1,2.95)              [0,2)
## 43              [1,2.95)              [0,2)
## 44              [1,2.95)              [0,2)
## 45              [1,2.95)              [0,2)
## 46              [1,2.95)              [0,2)
## 47              [1,2.95)              [0,2)
## 48              [1,2.95)              [0,2)
## 49              [1,2.95)              [0,2)
## 50              [1,2.95)              [0,2)
## 51           [2.95,5.13)              [4,6)
## 52           [2.95,5.13)              [4,6)
## 53           [2.95,5.13)              [4,6)
## 54           [2.95,5.13)              [4,6)
## 55           [2.95,5.13)              [4,6)
## 56           [2.95,5.13)              [4,6)
## 57           [2.95,5.13)              [4,6)
## 58           [2.95,5.13)              [2,4)
## 59           [2.95,5.13)              [4,6)
## 60           [2.95,5.13)              [2,4)
## 61           [2.95,5.13)              [2,4)
## 62           [2.95,5.13)              [4,6)
## 63           [2.95,5.13)              [4,6)
## 64           [2.95,5.13)              [4,6)
## 65           [2.95,5.13)              [2,4)
## 66           [2.95,5.13)              [4,6)
## 67           [2.95,5.13)              [4,6)
## 68           [2.95,5.13)              [4,6)
## 69           [2.95,5.13)              [4,6)
## 70           [2.95,5.13)              [2,4)
## 71           [2.95,5.13)              [4,6)
## 72           [2.95,5.13)              [4,6)
## 73           [2.95,5.13)              [4,6)
## 74           [2.95,5.13)              [4,6)
## 75           [2.95,5.13)              [4,6)
## 76           [2.95,5.13)              [4,6)
## 77           [2.95,5.13)              [4,6)
## 78           [2.95,5.13)              [4,6)
## 79           [2.95,5.13)              [4,6)
## 80           [2.95,5.13)              [2,4)
## 81           [2.95,5.13)              [2,4)
## 82           [2.95,5.13)              [2,4)
## 83           [2.95,5.13)              [2,4)
## 84           [2.95,5.13)              [4,6)
## 85           [2.95,5.13)              [4,6)
## 86           [2.95,5.13)              [4,6)
## 87           [2.95,5.13)              [4,6)
## 88           [2.95,5.13)              [4,6)
## 89           [2.95,5.13)              [4,6)
## 90           [2.95,5.13)              [4,6)
## 91           [2.95,5.13)              [4,6)
## 92           [2.95,5.13)              [4,6)
## 93           [2.95,5.13)              [4,6)
## 94           [2.95,5.13)              [2,4)
## 95           [2.95,5.13)              [4,6)
## 96           [2.95,5.13)              [4,6)
## 97           [2.95,5.13)              [4,6)
## 98           [2.95,5.13)              [4,6)
## 99           [2.95,5.13)              [2,4)
## 100          [2.95,5.13)              [4,6)
## 101           [5.13,6.9]              [6,8]
## 102          [2.95,5.13)              [4,6)
## 103           [5.13,6.9]              [4,6)
## 104           [5.13,6.9]              [4,6)
## 105           [5.13,6.9]              [4,6)
## 106           [5.13,6.9]              [6,8]
## 107          [2.95,5.13)              [4,6)
## 108           [5.13,6.9]              [6,8]
## 109           [5.13,6.9]              [4,6)
## 110           [5.13,6.9]              [6,8]
## 111          [2.95,5.13)              [4,6)
## 112           [5.13,6.9]              [4,6)
## 113           [5.13,6.9]              [4,6)
## 114          [2.95,5.13)              [4,6)
## 115          [2.95,5.13)              [4,6)
## 116           [5.13,6.9]              [4,6)
## 117           [5.13,6.9]              [4,6)
## 118           [5.13,6.9]              [6,8]
## 119           [5.13,6.9]              [6,8]
## 120          [2.95,5.13)              [4,6)
## 121           [5.13,6.9]              [4,6)
## 122          [2.95,5.13)              [4,6)
## 123           [5.13,6.9]              [6,8]
## 124          [2.95,5.13)              [4,6)
## 125           [5.13,6.9]              [4,6)
## 126           [5.13,6.9]              [6,8]
## 127          [2.95,5.13)              [4,6)
## 128          [2.95,5.13)              [4,6)
## 129           [5.13,6.9]              [4,6)
## 130           [5.13,6.9]              [4,6)
## 131           [5.13,6.9]              [6,8]
## 132           [5.13,6.9]              [6,8]
## 133           [5.13,6.9]              [4,6)
## 134          [2.95,5.13)              [4,6)
## 135           [5.13,6.9]              [4,6)
## 136           [5.13,6.9]              [6,8]
## 137           [5.13,6.9]              [4,6)
## 138           [5.13,6.9]              [4,6)
## 139          [2.95,5.13)              [4,6)
## 140           [5.13,6.9]              [4,6)
## 141           [5.13,6.9]              [4,6)
## 142          [2.95,5.13)              [4,6)
## 143          [2.95,5.13)              [4,6)
## 144           [5.13,6.9]              [4,6)
## 145           [5.13,6.9]              [4,6)
## 146           [5.13,6.9]              [4,6)
## 147          [2.95,5.13)              [4,6)
## 148           [5.13,6.9]              [4,6)
## 149           [5.13,6.9]              [4,6)
## 150          [2.95,5.13)              [4,6)

Вывод

Каждый из методов разбивает непрерывную переменную на интервалы, но делает это по-разному.


Задание 4

Установите пакет Boruta и проведите выбор признаков для набора данных data(“Ozone”) [4, 5, 6]. Построить график boxplot, сделать выводы. Устанавливаем пакеты Boruta и mlbench

Устанавливаем пакет Boruta

if(!require(Boruta)) {
  install.packages("Boruta")
  library(Boruta)
}
## Загрузка требуемого пакета: Boruta

Устанавливаем пакет mlbench

if(!require(mlbench)) {
  install.packages("mlbench")
  library(mlbench)
}
## Загрузка требуемого пакета: mlbench

Подготавливаем данные

data(Ozone)
Ozone <- na.omit(Ozone)
head(Ozone)

Ищем важные признаки для 4го признака

boruta_result <- Boruta(V4 ~ ., data = Ozone, doTrace = 2)
##  1. run of importance source...
##  2. run of importance source...
##  3. run of importance source...
##  4. run of importance source...
##  5. run of importance source...
##  6. run of importance source...
##  7. run of importance source...
##  8. run of importance source...
##  9. run of importance source...
##  10. run of importance source...
##  11. run of importance source...
## After 11 iterations, +0.63 secs:
##  confirmed 9 attributes: V1, V10, V11, V12, V13 and 4 more;
##  rejected 1 attribute: V3;
##  still have 2 attributes left.
##  12. run of importance source...
##  13. run of importance source...
##  14. run of importance source...
##  15. run of importance source...
##  16. run of importance source...
##  17. run of importance source...
##  18. run of importance source...
## After 18 iterations, +1 secs:
##  rejected 1 attribute: V2;
##  still have 1 attribute left.
##  19. run of importance source...
##  20. run of importance source...
##  21. run of importance source...
##  22. run of importance source...
##  23. run of importance source...
##  24. run of importance source...
##  25. run of importance source...
##  26. run of importance source...
##  27. run of importance source...
##  28. run of importance source...
##  29. run of importance source...
##  30. run of importance source...
##  31. run of importance source...
##  32. run of importance source...
##  33. run of importance source...
## After 33 iterations, +1.8 secs:
##  rejected 1 attribute: V6;
##  no more attributes left.

Рисуем графики

plot(boruta_result, cex.axis = 0.8)

important_vars  <- getSelectedAttributes(boruta_result, withTentative = TRUE)
boxplot(Ozone[, important_vars], main = "Selected Features Boxplot", las = 2, col = "lightblue")

Вывод

Вывод: На 4 признак больше всего влияют признаки: v9, v8, v12, v11, v7, v10, v13, v1, v5. Значения 5го признака значительно выше значений остальных. 10й признак имеет большой разброс.