Задание 1

  1. Установить пакет CARET, выполнить команду names(getModelInfo()), ознакомиться со списком доступных методов выбора признаков. Выполните графический разведочный анализ данных с использование функции featurePlot() для набора данных из справочного файла пакета CARET:

x <- matrix(rnorm(50*5),ncol=5)

y <- factor(rep(c(“A”, “B”), 25))

Сохранить полученные графики в *.jpg файлы. Сделать выводы.

# Подключаем пакеты
library(ggplot2)
library(caret)

# Просмотр доступных моделей
names(getModelInfo())
##   [1] "ada"                 "AdaBag"              "AdaBoost.M1"        
##   [4] "adaboost"            "amdai"               "ANFIS"              
##   [7] "avNNet"              "awnb"                "awtan"              
##  [10] "bag"                 "bagEarth"            "bagEarthGCV"        
##  [13] "bagFDA"              "bagFDAGCV"           "bam"                
##  [16] "bartMachine"         "bayesglm"            "binda"              
##  [19] "blackboost"          "blasso"              "blassoAveraged"     
##  [22] "bridge"              "brnn"                "BstLm"              
##  [25] "bstSm"               "bstTree"             "C5.0"               
##  [28] "C5.0Cost"            "C5.0Rules"           "C5.0Tree"           
##  [31] "cforest"             "chaid"               "CSimca"             
##  [34] "ctree"               "ctree2"              "cubist"             
##  [37] "dda"                 "deepboost"           "DENFIS"             
##  [40] "dnn"                 "dwdLinear"           "dwdPoly"            
##  [43] "dwdRadial"           "earth"               "elm"                
##  [46] "enet"                "evtree"              "extraTrees"         
##  [49] "fda"                 "FH.GBML"             "FIR.DM"             
##  [52] "foba"                "FRBCS.CHI"           "FRBCS.W"            
##  [55] "FS.HGD"              "gam"                 "gamboost"           
##  [58] "gamLoess"            "gamSpline"           "gaussprLinear"      
##  [61] "gaussprPoly"         "gaussprRadial"       "gbm_h2o"            
##  [64] "gbm"                 "gcvEarth"            "GFS.FR.MOGUL"       
##  [67] "GFS.LT.RS"           "GFS.THRIFT"          "glm.nb"             
##  [70] "glm"                 "glmboost"            "glmnet_h2o"         
##  [73] "glmnet"              "glmStepAIC"          "gpls"               
##  [76] "hda"                 "hdda"                "hdrda"              
##  [79] "HYFIS"               "icr"                 "J48"                
##  [82] "JRip"                "kernelpls"           "kknn"               
##  [85] "knn"                 "krlsPoly"            "krlsRadial"         
##  [88] "lars"                "lars2"               "lasso"              
##  [91] "lda"                 "lda2"                "leapBackward"       
##  [94] "leapForward"         "leapSeq"             "Linda"              
##  [97] "lm"                  "lmStepAIC"           "LMT"                
## [100] "loclda"              "logicBag"            "LogitBoost"         
## [103] "logreg"              "lssvmLinear"         "lssvmPoly"          
## [106] "lssvmRadial"         "lvq"                 "M5"                 
## [109] "M5Rules"             "manb"                "mda"                
## [112] "Mlda"                "mlp"                 "mlpKerasDecay"      
## [115] "mlpKerasDecayCost"   "mlpKerasDropout"     "mlpKerasDropoutCost"
## [118] "mlpML"               "mlpSGD"              "mlpWeightDecay"     
## [121] "mlpWeightDecayML"    "monmlp"              "msaenet"            
## [124] "multinom"            "mxnet"               "mxnetAdam"          
## [127] "naive_bayes"         "nb"                  "nbDiscrete"         
## [130] "nbSearch"            "neuralnet"           "nnet"               
## [133] "nnls"                "nodeHarvest"         "null"               
## [136] "OneR"                "ordinalNet"          "ordinalRF"          
## [139] "ORFlog"              "ORFpls"              "ORFridge"           
## [142] "ORFsvm"              "ownn"                "pam"                
## [145] "parRF"               "PART"                "partDSA"            
## [148] "pcaNNet"             "pcr"                 "pda"                
## [151] "pda2"                "penalized"           "PenalizedLDA"       
## [154] "plr"                 "pls"                 "plsRglm"            
## [157] "polr"                "ppr"                 "pre"                
## [160] "PRIM"                "protoclass"          "qda"                
## [163] "QdaCov"              "qrf"                 "qrnn"               
## [166] "randomGLM"           "ranger"              "rbf"                
## [169] "rbfDDA"              "Rborist"             "rda"                
## [172] "regLogistic"         "relaxo"              "rf"                 
## [175] "rFerns"              "RFlda"               "rfRules"            
## [178] "ridge"               "rlda"                "rlm"                
## [181] "rmda"                "rocc"                "rotationForest"     
## [184] "rotationForestCp"    "rpart"               "rpart1SE"           
## [187] "rpart2"              "rpartCost"           "rpartScore"         
## [190] "rqlasso"             "rqnc"                "RRF"                
## [193] "RRFglobal"           "rrlda"               "RSimca"             
## [196] "rvmLinear"           "rvmPoly"             "rvmRadial"          
## [199] "SBC"                 "sda"                 "sdwd"               
## [202] "simpls"              "SLAVE"               "slda"               
## [205] "smda"                "snn"                 "sparseLDA"          
## [208] "spikeslab"           "spls"                "stepLDA"            
## [211] "stepQDA"             "superpc"             "svmBoundrangeString"
## [214] "svmExpoString"       "svmLinear"           "svmLinear2"         
## [217] "svmLinear3"          "svmLinearWeights"    "svmLinearWeights2"  
## [220] "svmPoly"             "svmRadial"           "svmRadialCost"      
## [223] "svmRadialSigma"      "svmRadialWeights"    "svmSpectrumString"  
## [226] "tan"                 "tanSearch"           "treebag"            
## [229] "vbmpRadial"          "vglmAdjCat"          "vglmContRatio"      
## [232] "vglmCumulative"      "widekernelpls"       "WM"                 
## [235] "wsrf"                "xgbDART"             "xgbLinear"          
## [238] "xgbTree"             "xyf"
# Генерация данных
set.seed(123)
x <- matrix(rnorm(50*5), ncol=5)
y <- factor(rep(c("A", "B"), 25))
df <- data.frame(x, y)

# Графический анализ данных
featurePlot(x=df[,1:5], y=df$y, plot="box")

Вывод: Значения распределены в основном промежутке от -1 до 1

Задание 2

С использование функций из пакета Fselector [2] определить важность признаков для решения задачи классификации. Использовать набор data(iris)

# Определение и вывод важности признаков
pairs(iris[,1:4],col=iris[,5],oma=c(4,4,6,12))
par(xpd=TRUE)
legend(0.85,0.6, as.vector(unique(iris$Species)),fill=c(1,2,3))

Вывод: Sepal.Length и Sepal.Width - не важны для решения задачи классификации, оставшиеся важны

Задание 3

С использованием функции discretize() из пакета arules выполните преобразование непрерывной переменной в категориальную [3] различными методами: «interval» (равная ширина интервала), «frequency» (равная частота), «cluster» (кластеризация) и «fixed» (категории задают границы интервалов). Используйте набор данных iris. Сделайте выводы

library(arules)
## Загрузка требуемого пакета: Matrix
## 
## Присоединяю пакет: 'arules'
## Следующие объекты скрыты от 'package:base':
## 
##     abbreviate, write

interval

iris_discrete <- discretize(iris$Sepal.Length, method = "interval", categories = 3)
## Warning in discretize(iris$Sepal.Length, method = "interval", categories = 3):
## Parameter categories is deprecated. Use breaks instead! Also, the default
## method is now frequency!
print(iris_discrete)
##   [1] [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5)
##   [8] [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5)
##  [15] [5.5,6.7) [5.5,6.7) [4.3,5.5) [4.3,5.5) [5.5,6.7) [4.3,5.5) [4.3,5.5)
##  [22] [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5)
##  [29] [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [5.5,6.7) [4.3,5.5)
##  [36] [4.3,5.5) [5.5,6.7) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5)
##  [43] [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5) [4.3,5.5)
##  [50] [4.3,5.5) [6.7,7.9] [5.5,6.7) [6.7,7.9] [5.5,6.7) [5.5,6.7) [5.5,6.7)
##  [57] [5.5,6.7) [4.3,5.5) [5.5,6.7) [4.3,5.5) [4.3,5.5) [5.5,6.7) [5.5,6.7)
##  [64] [5.5,6.7) [5.5,6.7) [6.7,7.9] [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7)
##  [71] [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7) [6.7,7.9]
##  [78] [6.7,7.9] [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7)
##  [85] [4.3,5.5) [5.5,6.7) [6.7,7.9] [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7)
##  [92] [5.5,6.7) [5.5,6.7) [4.3,5.5) [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7)
##  [99] [4.3,5.5) [5.5,6.7) [5.5,6.7) [5.5,6.7) [6.7,7.9] [5.5,6.7) [5.5,6.7)
## [106] [6.7,7.9] [4.3,5.5) [6.7,7.9] [6.7,7.9] [6.7,7.9] [5.5,6.7) [5.5,6.7)
## [113] [6.7,7.9] [5.5,6.7) [5.5,6.7) [5.5,6.7) [5.5,6.7) [6.7,7.9] [6.7,7.9]
## [120] [5.5,6.7) [6.7,7.9] [5.5,6.7) [6.7,7.9] [5.5,6.7) [6.7,7.9] [6.7,7.9]
## [127] [5.5,6.7) [5.5,6.7) [5.5,6.7) [6.7,7.9] [6.7,7.9] [6.7,7.9] [5.5,6.7)
## [134] [5.5,6.7) [5.5,6.7) [6.7,7.9] [5.5,6.7) [5.5,6.7) [5.5,6.7) [6.7,7.9]
## [141] [6.7,7.9] [6.7,7.9] [5.5,6.7) [6.7,7.9] [6.7,7.9] [6.7,7.9] [5.5,6.7)
## [148] [5.5,6.7) [5.5,6.7) [5.5,6.7)
## attr(,"discretized:breaks")
## [1] 4.3 5.5 6.7 7.9
## attr(,"discretized:method")
## [1] interval
## Levels: [4.3,5.5) [5.5,6.7) [6.7,7.9]

Метод “interval” (равные интервалы)

Разбивает диапазон значений на равные интервалы. Может быть неэффективен, если данные не равномерно распределены (в некоторых категориях может оказаться мало значений). ## fixed

iris_discrete <- discretize(iris$Sepal.Length, method = "fixed", categories = c(4.5, 5.5, 6.5))
## Warning in discretize(iris$Sepal.Length, method = "fixed", categories = c(4.5,
## : Parameter categories is deprecated. Use breaks instead! Also, the default
## method is now frequency!
print(iris_discrete)
##   [1] [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5)
##   [8] [4.5,5.5) <NA>      [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) <NA>     
##  [15] [5.5,6.5] [5.5,6.5] [4.5,5.5) [4.5,5.5) [5.5,6.5] [4.5,5.5) [4.5,5.5)
##  [22] [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5)
##  [29] [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [5.5,6.5] [4.5,5.5)
##  [36] [4.5,5.5) [5.5,6.5] [4.5,5.5) <NA>      [4.5,5.5) [4.5,5.5) [4.5,5.5)
##  [43] <NA>      [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5) [4.5,5.5)
##  [50] [4.5,5.5) <NA>      [5.5,6.5] <NA>      [5.5,6.5] [5.5,6.5] [5.5,6.5]
##  [57] [5.5,6.5] [4.5,5.5) <NA>      [4.5,5.5) [4.5,5.5) [5.5,6.5] [5.5,6.5]
##  [64] [5.5,6.5] [5.5,6.5] <NA>      [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5]
##  [71] [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5] <NA>      <NA>     
##  [78] <NA>      [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5]
##  [85] [4.5,5.5) [5.5,6.5] <NA>      [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5]
##  [92] [5.5,6.5] [5.5,6.5] [4.5,5.5) [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5]
##  [99] [4.5,5.5) [5.5,6.5] [5.5,6.5] [5.5,6.5] <NA>      [5.5,6.5] [5.5,6.5]
## [106] <NA>      [4.5,5.5) <NA>      <NA>      <NA>      [5.5,6.5] [5.5,6.5]
## [113] <NA>      [5.5,6.5] [5.5,6.5] [5.5,6.5] [5.5,6.5] <NA>      <NA>     
## [120] [5.5,6.5] <NA>      [5.5,6.5] <NA>      [5.5,6.5] <NA>      <NA>     
## [127] [5.5,6.5] [5.5,6.5] [5.5,6.5] <NA>      <NA>      <NA>      [5.5,6.5]
## [134] [5.5,6.5] [5.5,6.5] <NA>      [5.5,6.5] [5.5,6.5] [5.5,6.5] <NA>     
## [141] <NA>      <NA>      [5.5,6.5] <NA>      <NA>      <NA>      [5.5,6.5]
## [148] [5.5,6.5] [5.5,6.5] [5.5,6.5]
## attr(,"discretized:breaks")
## [1] 4.5 5.5 6.5
## attr(,"discretized:method")
## [1] fixed
## Levels: [4.5,5.5) [5.5,6.5]

Метод “fixed” (фиксированные границы интервалов)

Категории задаются вручную. Хороший вариант, если известны важные границы разбиения. ## frequency

iris_discrete <- discretize(iris$Sepal.Length, method = "frequency", categories = 3)
## Warning in discretize(iris$Sepal.Length, method = "frequency", categories = 3):
## Parameter categories is deprecated. Use breaks instead! Also, the default
## method is now frequency!
print(iris_discrete)
##   [1] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4)
##   [8] [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4) [4.3,5.4) [4.3,5.4)
##  [15] [5.4,6.3) [5.4,6.3) [5.4,6.3) [4.3,5.4) [5.4,6.3) [4.3,5.4) [5.4,6.3)
##  [22] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
##  [29] [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4) [5.4,6.3) [4.3,5.4)
##  [36] [4.3,5.4) [5.4,6.3) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
##  [43] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
##  [50] [4.3,5.4) [6.3,7.9] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9] [5.4,6.3)
##  [57] [6.3,7.9] [4.3,5.4) [6.3,7.9] [4.3,5.4) [4.3,5.4) [5.4,6.3) [5.4,6.3)
##  [64] [5.4,6.3) [5.4,6.3) [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
##  [71] [5.4,6.3) [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9]
##  [78] [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
##  [85] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3)
##  [92] [5.4,6.3) [5.4,6.3) [4.3,5.4) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
##  [99] [4.3,5.4) [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9]
## [106] [6.3,7.9] [4.3,5.4) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
## [113] [6.3,7.9] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
## [120] [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
## [127] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
## [134] [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9]
## [141] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
## [148] [6.3,7.9] [5.4,6.3) [5.4,6.3)
## attr(,"discretized:breaks")
## [1] 4.3 5.4 6.3 7.9
## attr(,"discretized:method")
## [1] frequency
## Levels: [4.3,5.4) [5.4,6.3) [6.3,7.9]

Метод “frequency” (равная частота)

Данные распределяются так, чтобы в каждой категории было примерно одинаковое количество значений. Может быть полезен, если данные имеют неравномерное распределение. ## cluster

iris_discrete <- discretize(iris$Sepal.Length, method = "cluster", categories = 3)
## Warning in discretize(iris$Sepal.Length, method = "cluster", categories = 3):
## Parameter categories is deprecated. Use breaks instead! Also, the default
## method is now frequency!
print(iris_discrete)
##   [1] [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45) 
##   [7] [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45) 
##  [13] [4.3,5.45)  [4.3,5.45)  [5.45,6.46) [5.45,6.46) [4.3,5.45)  [4.3,5.45) 
##  [19] [5.45,6.46) [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45) 
##  [25] [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45) 
##  [31] [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [5.45,6.46) [4.3,5.45)  [4.3,5.45) 
##  [37] [5.45,6.46) [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45) 
##  [43] [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45)  [4.3,5.45) 
##  [49] [4.3,5.45)  [4.3,5.45)  [6.46,7.9]  [5.45,6.46) [6.46,7.9]  [5.45,6.46)
##  [55] [6.46,7.9]  [5.45,6.46) [5.45,6.46) [4.3,5.45)  [6.46,7.9]  [4.3,5.45) 
##  [61] [4.3,5.45)  [5.45,6.46) [5.45,6.46) [5.45,6.46) [5.45,6.46) [6.46,7.9] 
##  [67] [5.45,6.46) [5.45,6.46) [5.45,6.46) [5.45,6.46) [5.45,6.46) [5.45,6.46)
##  [73] [5.45,6.46) [5.45,6.46) [5.45,6.46) [6.46,7.9]  [6.46,7.9]  [6.46,7.9] 
##  [79] [5.45,6.46) [5.45,6.46) [5.45,6.46) [5.45,6.46) [5.45,6.46) [5.45,6.46)
##  [85] [4.3,5.45)  [5.45,6.46) [6.46,7.9]  [5.45,6.46) [5.45,6.46) [5.45,6.46)
##  [91] [5.45,6.46) [5.45,6.46) [5.45,6.46) [4.3,5.45)  [5.45,6.46) [5.45,6.46)
##  [97] [5.45,6.46) [5.45,6.46) [4.3,5.45)  [5.45,6.46) [5.45,6.46) [5.45,6.46)
## [103] [6.46,7.9]  [5.45,6.46) [6.46,7.9]  [6.46,7.9]  [4.3,5.45)  [6.46,7.9] 
## [109] [6.46,7.9]  [6.46,7.9]  [6.46,7.9]  [5.45,6.46) [6.46,7.9]  [5.45,6.46)
## [115] [5.45,6.46) [5.45,6.46) [6.46,7.9]  [6.46,7.9]  [6.46,7.9]  [5.45,6.46)
## [121] [6.46,7.9]  [5.45,6.46) [6.46,7.9]  [5.45,6.46) [6.46,7.9]  [6.46,7.9] 
## [127] [5.45,6.46) [5.45,6.46) [5.45,6.46) [6.46,7.9]  [6.46,7.9]  [6.46,7.9] 
## [133] [5.45,6.46) [5.45,6.46) [5.45,6.46) [6.46,7.9]  [5.45,6.46) [5.45,6.46)
## [139] [5.45,6.46) [6.46,7.9]  [6.46,7.9]  [6.46,7.9]  [5.45,6.46) [6.46,7.9] 
## [145] [6.46,7.9]  [6.46,7.9]  [5.45,6.46) [6.46,7.9]  [5.45,6.46) [5.45,6.46)
## attr(,"discretized:breaks")
## [1] 4.300000 5.452320 6.461111 7.900000
## attr(,"discretized:method")
## [1] cluster
## Levels: [4.3,5.45) [5.45,6.46) [6.46,7.9]

Метод “cluster” (кластеризация)

Использует алгоритмы кластеризации для разделения данных. Может адаптироваться к форме данных, но зависит от выбора количества кластеров.

Общий вывод:

Метод “frequency” подходит, если важно сбалансированное распределение данных. Метод “interval” удобен для анализа равномерно распределенных данных. Метод “cluster” полезен, если данные имеют сложную структуру. Метод “fixed” требуется, если необходимо строгое разбиение по заданным критериям.

Задание 4

Установите пакет Boruta и проведите выбор признаков для набора данных data(“Ozone”) [4, 5, 6]. Построить график boxplot, сделать выводы.

library(Boruta)
library(mlbench)
data("Ozone", package = "mlbench")
# Выбор признаков для данных Ozone и вывод результатов
Ozone <- na.omit(Ozone)
boruta_result <- Boruta(V4 ~ ., data = Ozone, doTrace = 2)
##  1. run of importance source...
##  2. run of importance source...
##  3. run of importance source...
##  4. run of importance source...
##  5. run of importance source...
##  6. run of importance source...
##  7. run of importance source...
##  8. run of importance source...
##  9. run of importance source...
##  10. run of importance source...
##  11. run of importance source...
## After 11 iterations, +0.86 secs:
##  confirmed 9 attributes: V1, V10, V11, V12, V13 and 4 more;
##  rejected 1 attribute: V3;
##  still have 2 attributes left.
##  12. run of importance source...
##  13. run of importance source...
##  14. run of importance source...
##  15. run of importance source...
##  16. run of importance source...
##  17. run of importance source...
##  18. run of importance source...
##  19. run of importance source...
##  20. run of importance source...
##  21. run of importance source...
##  22. run of importance source...
##  23. run of importance source...
##  24. run of importance source...
## After 24 iterations, +1.8 secs:
##  rejected 1 attribute: V6;
##  still have 1 attribute left.
##  25. run of importance source...
##  26. run of importance source...
##  27. run of importance source...
##  28. run of importance source...
##  29. run of importance source...
##  30. run of importance source...
##  31. run of importance source...
##  32. run of importance source...
##  33. run of importance source...
##  34. run of importance source...
##  35. run of importance source...
##  36. run of importance source...
## After 36 iterations, +2.6 secs:
##  rejected 1 attribute: V2;
##  no more attributes left.
print(boruta_result)
## Boruta performed 36 iterations in 2.638832 secs.
##  9 attributes confirmed important: V1, V10, V11, V12, V13 and 4 more;
##  3 attributes confirmed unimportant: V2, V3, V6;
plot(boruta_result)

Вывод: Результаты расположены по важности