Установка пакетов

CARET

set.seed(123)
x <- matrix(rnorm(50 * 5), ncol = 5)
y <- factor(rep(c("A", "B"), 25))
featurePlot(x = iris[, 1:4], y = iris$Species, plot = "density")

featurePlot(x = x, y = y, plot = "boxplot")

featurePlot(x = x, y = y, plot = "pairs")

Fselector

# Загрузка набора данных iris
data(iris)

# Определение важности признаков
weights <- information.gain(Species ~ ., iris)
print(weights)
##              attr_importance
## Sepal.Length       0.4521286
## Sepal.Width        0.2672750
## Petal.Length       0.9402853
## Petal.Width        0.9554360

arules

# Преобразование переменной Sepal.Length различными методами
iris$Sepal.Length_interval <- discretize(iris$Sepal.Length, method = "interval", breaks = 3)
iris$Sepal.Length_frequency <- discretize(iris$Sepal.Length, method = "frequency", breaks = 3)
iris$Sepal.Length_cluster <- discretize(iris$Sepal.Length, method = "cluster", breaks = 3)
iris$Sepal.Length_fixed <- discretize(iris$Sepal.Length, method = "fixed", breaks = c(-Inf, 5.5, 6.5, Inf))

# Вывод результатов
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
##   Sepal.Length_interval Sepal.Length_frequency Sepal.Length_cluster
## 1             [4.3,5.5)              [4.3,5.4)           [4.3,5.37)
## 2             [4.3,5.5)              [4.3,5.4)           [4.3,5.37)
## 3             [4.3,5.5)              [4.3,5.4)           [4.3,5.37)
## 4             [4.3,5.5)              [4.3,5.4)           [4.3,5.37)
## 5             [4.3,5.5)              [4.3,5.4)           [4.3,5.37)
## 6             [4.3,5.5)              [5.4,6.3)          [5.37,6.36)
##   Sepal.Length_fixed
## 1         [-Inf,5.5)
## 2         [-Inf,5.5)
## 3         [-Inf,5.5)
## 4         [-Inf,5.5)
## 5         [-Inf,5.5)
## 6         [-Inf,5.5)

Установка пакета Boruta

data(Ozone)

ozone_data <- na.omit(Ozone)

set.seed(123)

boruta_output <- Boruta(V4 ~ ., data = ozone_data, doTrace = 2)
##  1. run of importance source...
##  2. run of importance source...
##  3. run of importance source...
##  4. run of importance source...
##  5. run of importance source...
##  6. run of importance source...
##  7. run of importance source...
##  8. run of importance source...
##  9. run of importance source...
##  10. run of importance source...
##  11. run of importance source...
## After 11 iterations, +0.63 secs:
##  confirmed 9 attributes: V1, V10, V11, V12, V13 and 4 more;
##  rejected 2 attributes: V3, V6;
##  still have 1 attribute left.
##  12. run of importance source...
##  13. run of importance source...
##  14. run of importance source...
##  15. run of importance source...
##  16. run of importance source...
##  17. run of importance source...
##  18. run of importance source...
##  19. run of importance source...
##  20. run of importance source...
##  21. run of importance source...
##  22. run of importance source...
##  23. run of importance source...
##  24. run of importance source...
## After 24 iterations, +1.3 secs:
##  rejected 1 attribute: V2;
##  no more attributes left.
print(boruta_output)
## Boruta performed 24 iterations in 1.32677 secs.
##  9 attributes confirmed important: V1, V10, V11, V12, V13 and 4 more;
##  3 attributes confirmed unimportant: V2, V3, V6;
plot(boruta_output, cex.axis = 0.7, las = 2, xlab = "", main = "Важность признаков (Boruta)")

confirmed_features <- getSelectedAttributes(boruta_output, withTentative = FALSE) 
par(mar = c(4, 4, 2, 1))
par(mfrow = c(1, length(confirmed_features)))
for (feature in confirmed_features) { boxplot(ozone_data[[feature]], main = feature, ylab = "Значения") }