Задание 1

Установить пакет CARET, выполнить команду names(getModelInfo()), ознакомиться со списком доступных методов выбора признаков.

names(getModelInfo())
##   [1] "ada"                 "AdaBag"              "AdaBoost.M1"        
##   [4] "adaboost"            "amdai"               "ANFIS"              
##   [7] "avNNet"              "awnb"                "awtan"              
##  [10] "bag"                 "bagEarth"            "bagEarthGCV"        
##  [13] "bagFDA"              "bagFDAGCV"           "bam"                
##  [16] "bartMachine"         "bayesglm"            "binda"              
##  [19] "blackboost"          "blasso"              "blassoAveraged"     
##  [22] "bridge"              "brnn"                "BstLm"              
##  [25] "bstSm"               "bstTree"             "C5.0"               
##  [28] "C5.0Cost"            "C5.0Rules"           "C5.0Tree"           
##  [31] "cforest"             "chaid"               "CSimca"             
##  [34] "ctree"               "ctree2"              "cubist"             
##  [37] "dda"                 "deepboost"           "DENFIS"             
##  [40] "dnn"                 "dwdLinear"           "dwdPoly"            
##  [43] "dwdRadial"           "earth"               "elm"                
##  [46] "enet"                "evtree"              "extraTrees"         
##  [49] "fda"                 "FH.GBML"             "FIR.DM"             
##  [52] "foba"                "FRBCS.CHI"           "FRBCS.W"            
##  [55] "FS.HGD"              "gam"                 "gamboost"           
##  [58] "gamLoess"            "gamSpline"           "gaussprLinear"      
##  [61] "gaussprPoly"         "gaussprRadial"       "gbm_h2o"            
##  [64] "gbm"                 "gcvEarth"            "GFS.FR.MOGUL"       
##  [67] "GFS.LT.RS"           "GFS.THRIFT"          "glm.nb"             
##  [70] "glm"                 "glmboost"            "glmnet_h2o"         
##  [73] "glmnet"              "glmStepAIC"          "gpls"               
##  [76] "hda"                 "hdda"                "hdrda"              
##  [79] "HYFIS"               "icr"                 "J48"                
##  [82] "JRip"                "kernelpls"           "kknn"               
##  [85] "knn"                 "krlsPoly"            "krlsRadial"         
##  [88] "lars"                "lars2"               "lasso"              
##  [91] "lda"                 "lda2"                "leapBackward"       
##  [94] "leapForward"         "leapSeq"             "Linda"              
##  [97] "lm"                  "lmStepAIC"           "LMT"                
## [100] "loclda"              "logicBag"            "LogitBoost"         
## [103] "logreg"              "lssvmLinear"         "lssvmPoly"          
## [106] "lssvmRadial"         "lvq"                 "M5"                 
## [109] "M5Rules"             "manb"                "mda"                
## [112] "Mlda"                "mlp"                 "mlpKerasDecay"      
## [115] "mlpKerasDecayCost"   "mlpKerasDropout"     "mlpKerasDropoutCost"
## [118] "mlpML"               "mlpSGD"              "mlpWeightDecay"     
## [121] "mlpWeightDecayML"    "monmlp"              "msaenet"            
## [124] "multinom"            "mxnet"               "mxnetAdam"          
## [127] "naive_bayes"         "nb"                  "nbDiscrete"         
## [130] "nbSearch"            "neuralnet"           "nnet"               
## [133] "nnls"                "nodeHarvest"         "null"               
## [136] "OneR"                "ordinalNet"          "ordinalRF"          
## [139] "ORFlog"              "ORFpls"              "ORFridge"           
## [142] "ORFsvm"              "ownn"                "pam"                
## [145] "parRF"               "PART"                "partDSA"            
## [148] "pcaNNet"             "pcr"                 "pda"                
## [151] "pda2"                "penalized"           "PenalizedLDA"       
## [154] "plr"                 "pls"                 "plsRglm"            
## [157] "polr"                "ppr"                 "pre"                
## [160] "PRIM"                "protoclass"          "qda"                
## [163] "QdaCov"              "qrf"                 "qrnn"               
## [166] "randomGLM"           "ranger"              "rbf"                
## [169] "rbfDDA"              "Rborist"             "rda"                
## [172] "regLogistic"         "relaxo"              "rf"                 
## [175] "rFerns"              "RFlda"               "rfRules"            
## [178] "ridge"               "rlda"                "rlm"                
## [181] "rmda"                "rocc"                "rotationForest"     
## [184] "rotationForestCp"    "rpart"               "rpart1SE"           
## [187] "rpart2"              "rpartCost"           "rpartScore"         
## [190] "rqlasso"             "rqnc"                "RRF"                
## [193] "RRFglobal"           "rrlda"               "RSimca"             
## [196] "rvmLinear"           "rvmPoly"             "rvmRadial"          
## [199] "SBC"                 "sda"                 "sdwd"               
## [202] "simpls"              "SLAVE"               "slda"               
## [205] "smda"                "snn"                 "sparseLDA"          
## [208] "spikeslab"           "spls"                "stepLDA"            
## [211] "stepQDA"             "superpc"             "svmBoundrangeString"
## [214] "svmExpoString"       "svmLinear"           "svmLinear2"         
## [217] "svmLinear3"          "svmLinearWeights"    "svmLinearWeights2"  
## [220] "svmPoly"             "svmRadial"           "svmRadialCost"      
## [223] "svmRadialSigma"      "svmRadialWeights"    "svmSpectrumString"  
## [226] "tan"                 "tanSearch"           "treebag"            
## [229] "vbmpRadial"          "vglmAdjCat"          "vglmContRatio"      
## [232] "vglmCumulative"      "widekernelpls"       "WM"                 
## [235] "wsrf"                "xgbDART"             "xgbLinear"          
## [238] "xgbTree"             "xyf"

Выполните графический разведочный анализ данных с использование функции featurePlot() для набора данных из справочного файла пакета CARET:

x <- matrix(rnorm(50*5),ncol=5)
y <- factor(rep(c("A", "B"), 25))

featurePlot(x, y, plot="box")

featurePlot(x, y, plot="strip")

featurePlot(x, y, plot="density")

featurePlot(x, y, plot="pairs")

Сохранить полученные графики в *.jpg файлы. Сделать выводы.

На основе случайно сгенерированных значений x с нормальным распределением были построены графики. При помощи использования фактора y с чередующимися 25 раз значениями A и B эти значения были разделены на 2 вида. В графиках вида box и strip эти виды отображены в виде пар, в то время как в графиках вида density и pairs эти виды обозначены разными цветами.

При этом стоит отметить и тот факт, что набор значений x также разделён на 5 столбцов при помощи функции matrix с параметром ncol=5, из-за чего и было показано по 5 рисунков для каждого столбца, а для графика pairs было и вовсе показано \(5*5-5=20\) рисунков.

Наиболее понятными я считаю графики box и density, так как они наглядно показывают плотность распределения значений. Так, density позволяет легко опознать пики значений.

Задание 2

С использованием функций из пакета Fselector определить важность признаков для решения задачи классификации. Использовать набор data(iris). Сделать выводы.

data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

Здесь мы видим характеристики чашелистников (Sepal) и лепестков (Petal) и их сорт. Очевидно, все характеристики зависят в первую очередь от сорта. Попробуем узнать, насколько при помощи функции information.gain из FSelector.

information.gain(Species ~ ., data = iris)
##              attr_importance
## Sepal.Length       0.4521286
## Sepal.Width        0.2672750
## Petal.Length       0.9402853
## Petal.Width        0.9554360

Мы видим, что наибольший вес важности у длины и ширины лепестков.

Задание 3

С использованием функции discretize() из пакета arules выполните преобразование непрерывной переменной в категориальную различными методами: «interval» (равная ширина интервала), «frequency» (равная частота), «cluster» (кластеризация) и «fixed» (категории задают границы интервалов). Используйте набор данных iris. Сделайте выводы

Взглянем на ширину лепестков так как это самый важный признак.

Ради интереса построю график

densityplot(iris$Petal.Width, plot="density")

Здесь явно выражено падение в районе ширины 0,8. Интересно, будет ли это отражено при дискретизации, и где будет обозначена граница между второй и третьей категорией.

discretize(iris$Petal.Width, method = "interval")
##   [1] [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9)
##   [8] [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9)
##  [15] [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9)
##  [22] [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9)
##  [29] [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9)
##  [36] [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9)
##  [43] [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9) [0.1,0.9)
##  [50] [0.1,0.9) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7)
##  [57] [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7)
##  [64] [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7)
##  [71] [1.7,2.5] [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7)
##  [78] [1.7,2.5] [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7)
##  [85] [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7)
##  [92] [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7) [0.9,1.7)
##  [99] [0.9,1.7) [0.9,1.7) [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5]
## [106] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5]
## [113] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5]
## [120] [0.9,1.7) [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5]
## [127] [1.7,2.5] [1.7,2.5] [1.7,2.5] [0.9,1.7) [1.7,2.5] [1.7,2.5] [1.7,2.5]
## [134] [0.9,1.7) [0.9,1.7) [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5]
## [141] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5] [1.7,2.5]
## [148] [1.7,2.5] [1.7,2.5] [1.7,2.5]
## attr(,"discretized:breaks")
## [1] 0.1 0.9 1.7 2.5
## attr(,"discretized:method")
## [1] interval
## Levels: [0.1,0.9) [0.9,1.7) [1.7,2.5]
discretize(iris$Petal.Width, method = "frequency")
##   [1] [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867)
##   [7] [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867)
##  [13] [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867)
##  [19] [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867)
##  [25] [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867)
##  [31] [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867)
##  [37] [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867)
##  [43] [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867) [0.1,0.867)
##  [49] [0.1,0.867) [0.1,0.867) [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6)
##  [55] [0.867,1.6) [0.867,1.6) [1.6,2.5]   [0.867,1.6) [0.867,1.6) [0.867,1.6)
##  [61] [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6)
##  [67] [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [1.6,2.5]   [0.867,1.6)
##  [73] [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [1.6,2.5]  
##  [79] [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [1.6,2.5]  
##  [85] [0.867,1.6) [1.6,2.5]   [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6)
##  [91] [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6)
##  [97] [0.867,1.6) [0.867,1.6) [0.867,1.6) [0.867,1.6) [1.6,2.5]   [1.6,2.5]  
## [103] [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]  
## [109] [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]  
## [115] [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [0.867,1.6)
## [121] [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]  
## [127] [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]  
## [133] [1.6,2.5]   [0.867,1.6) [0.867,1.6) [1.6,2.5]   [1.6,2.5]   [1.6,2.5]  
## [139] [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]  
## [145] [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]   [1.6,2.5]  
## attr(,"discretized:breaks")
## [1] 0.1000000 0.8666667 1.6000000 2.5000000
## attr(,"discretized:method")
## [1] frequency
## Levels: [0.1,0.867) [0.867,1.6) [1.6,2.5]
discretize(iris$Petal.Width, method = "cluster")
##   [1] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##   [6] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [11] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [16] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [21] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [26] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [31] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [36] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [41] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [46] [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785)  [0.1,0.785) 
##  [51] [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
##  [56] [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
##  [61] [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
##  [66] [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
##  [71] [1.69,2.5]   [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
##  [76] [0.785,1.69) [0.785,1.69) [1.69,2.5]   [0.785,1.69) [0.785,1.69)
##  [81] [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
##  [86] [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
##  [91] [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
##  [96] [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69) [0.785,1.69)
## [101] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]  
## [106] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]  
## [111] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]  
## [116] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [0.785,1.69)
## [121] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]  
## [126] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [0.785,1.69)
## [131] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [0.785,1.69) [0.785,1.69)
## [136] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]  
## [141] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]  
## [146] [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]   [1.69,2.5]  
## attr(,"discretized:breaks")
## [1] 0.1000000 0.7845385 1.6907051 2.5000000
## attr(,"discretized:method")
## [1] cluster
## Levels: [0.1,0.785) [0.785,1.69) [1.69,2.5]
discretize(iris$Petal.Width, method = "fixed", breaks = c(min(iris$Petal.Width), 1, 2, max(iris$Petal.Width)))
##   [1] [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1)
##  [10] [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1)
##  [19] [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1)
##  [28] [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1)
##  [37] [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1)
##  [46] [0.1,1) [0.1,1) [0.1,1) [0.1,1) [0.1,1) [1,2)   [1,2)   [1,2)   [1,2)  
##  [55] [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)  
##  [64] [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)  
##  [73] [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)  
##  [82] [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)  
##  [91] [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)   [1,2)  
## [100] [1,2)   [2,2.5] [1,2)   [2,2.5] [1,2)   [2,2.5] [2,2.5] [1,2)   [1,2)  
## [109] [1,2)   [2,2.5] [2,2.5] [1,2)   [2,2.5] [2,2.5] [2,2.5] [2,2.5] [1,2)  
## [118] [2,2.5] [2,2.5] [1,2)   [2,2.5] [2,2.5] [2,2.5] [1,2)   [2,2.5] [1,2)  
## [127] [1,2)   [1,2)   [2,2.5] [1,2)   [1,2)   [2,2.5] [2,2.5] [1,2)   [1,2)  
## [136] [2,2.5] [2,2.5] [1,2)   [1,2)   [2,2.5] [2,2.5] [2,2.5] [1,2)   [2,2.5]
## [145] [2,2.5] [2,2.5] [1,2)   [2,2.5] [2,2.5] [1,2)  
## attr(,"discretized:breaks")
## [1] 0.1 1.0 2.0 2.5
## attr(,"discretized:method")
## [1] fixed
## Levels: [0.1,1) [1,2) [2,2.5]

Мы видим, что метод interval просто разделяет значения на равных промежутках, frequency делит по встречаемости (как примерно и ожидалось при построении графика плотности), cluster делит по скоплениям, а fixed использует предопределённые значения.

Задание 4

Установите пакет Boruta и проведите выбор признаков для набора данных data(“Ozone”). Построить график boxplot, сделать выводы.

data("Ozone", package="mlbench")

Описание набора данных заявляет следующие описания столбцов:

  1. Month: 1 = January, …, 12 = December

  2. Day of month

  3. Day of week: 1 = Monday, …, 7 = Sunday

  4. Daily maximum one-hour-average ozone reading

  5. 500 millibar pressure height (m) measured at Vandenberg AFB

  6. Wind speed (mph) at Los Angeles International Airport (LAX)

  7. Humidity (%) at LAX

  8. Temperature (degrees F) measured at Sandburg, CA

  9. Temperature (degrees F) measured at El Monte, CA

  10. Inversion base height (feet) at LAX

  11. Pressure gradient (mm Hg) from LAX to Daggett, CA

  12. Inversion base temperature (degrees F) at LAX

  13. Visibility (miles) measured at LAX

Очевидно, мы будем рассматривать 4 столбик, так как ради именно его значений и был собран этот набор данных. Однако не все строки в 4 столбце имеют данные, из-за чего те строки были убраны.

Ozone <- Ozone[complete.cases(Ozone$V4),]
set.seed(777)
boruta_output <- Boruta(V4 ~ ., data=Ozone)
plot(boruta_output, sort=FALSE)

8 столбец оказался наиболее определяющим для измерений озона.