Установка пакетов
CARET
set.seed(123)
x <- matrix(rnorm(50 * 5), ncol = 5)
y <- factor(rep(c("A", "B"), 25))
featurePlot(x = iris[, 1:4], y = iris$Species, plot = "density")

featurePlot(x = x, y = y, plot = "boxplot")

featurePlot(x = x, y = y, plot = "pairs")

Fselector
# Загрузка набора данных iris
data(iris)
# Определение важности признаков
weights <- information.gain(Species ~ ., iris)
print(weights)
## attr_importance
## Sepal.Length 0.4521286
## Sepal.Width 0.2672750
## Petal.Length 0.9402853
## Petal.Width 0.9554360
arules
# Преобразование переменной Sepal.Length различными методами
iris$Sepal.Length_interval <- discretize(iris$Sepal.Length, method = "interval", breaks = 3)
iris$Sepal.Length_frequency <- discretize(iris$Sepal.Length, method = "frequency", breaks = 3)
iris$Sepal.Length_cluster <- discretize(iris$Sepal.Length, method = "cluster", breaks = 3)
iris$Sepal.Length_fixed <- discretize(iris$Sepal.Length, method = "fixed", breaks = c(-Inf, 5.5, 6.5, Inf))
# Вывод результатов
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## Sepal.Length_interval Sepal.Length_frequency Sepal.Length_cluster
## 1 [4.3,5.5) [4.3,5.4) [4.3,5.37)
## 2 [4.3,5.5) [4.3,5.4) [4.3,5.37)
## 3 [4.3,5.5) [4.3,5.4) [4.3,5.37)
## 4 [4.3,5.5) [4.3,5.4) [4.3,5.37)
## 5 [4.3,5.5) [4.3,5.4) [4.3,5.37)
## 6 [4.3,5.5) [5.4,6.3) [5.37,6.36)
## Sepal.Length_fixed
## 1 [-Inf,5.5)
## 2 [-Inf,5.5)
## 3 [-Inf,5.5)
## 4 [-Inf,5.5)
## 5 [-Inf,5.5)
## 6 [-Inf,5.5)
Установка пакета Boruta
data(Ozone)
ozone_data <- na.omit(Ozone)
set.seed(123)
boruta_output <- Boruta(V4 ~ ., data = ozone_data, doTrace = 2)
## 1. run of importance source...
## 2. run of importance source...
## 3. run of importance source...
## 4. run of importance source...
## 5. run of importance source...
## 6. run of importance source...
## 7. run of importance source...
## 8. run of importance source...
## 9. run of importance source...
## 10. run of importance source...
## 11. run of importance source...
## After 11 iterations, +0.63 secs:
## confirmed 9 attributes: V1, V10, V11, V12, V13 and 4 more;
## rejected 2 attributes: V3, V6;
## still have 1 attribute left.
## 12. run of importance source...
## 13. run of importance source...
## 14. run of importance source...
## 15. run of importance source...
## 16. run of importance source...
## 17. run of importance source...
## 18. run of importance source...
## 19. run of importance source...
## 20. run of importance source...
## 21. run of importance source...
## 22. run of importance source...
## 23. run of importance source...
## 24. run of importance source...
## After 24 iterations, +1.3 secs:
## rejected 1 attribute: V2;
## no more attributes left.
print(boruta_output)
## Boruta performed 24 iterations in 1.32677 secs.
## 9 attributes confirmed important: V1, V10, V11, V12, V13 and 4 more;
## 3 attributes confirmed unimportant: V2, V3, V6;
plot(boruta_output, cex.axis = 0.7, las = 2, xlab = "", main = "Важность признаков (Boruta)")

confirmed_features <- getSelectedAttributes(boruta_output, withTentative = FALSE)
par(mar = c(4, 4, 2, 1))
par(mfrow = c(1, length(confirmed_features)))
for (feature in confirmed_features) { boxplot(ozone_data[[feature]], main = feature, ylab = "Значения") }
