Установка пакетов

Классификация k-ближайших

data(iris)

normalize <- function(x) {
  return ((x - min(x)) / (max(x) - min(x)))
}

iris_norm <- as.data.frame(lapply(iris[1:4], normalize))

# Разделение данных на обучающую и тестовую выборки
set.seed(123)
indices <- sample(1:nrow(iris_norm), size = 0.7 * nrow(iris_norm))
train_data <- iris_norm[indices, ]
test_data <- iris_norm[-indices, ]
train_labels <- iris[indices, 5]
test_labels <- iris[-indices, 5]

# Классификация методом k-ближайших соседей
knn_pred <- knn(train = train_data, test = test_data, cl = train_labels, k = 3)

# Оценка модели
CrossTable(x = test_labels, y = knn_pred, prop.chisq = FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  45 
## 
##  
##              | knn_pred 
##  test_labels |     setosa | versicolor |  virginica |  Row Total | 
## -------------|------------|------------|------------|------------|
##       setosa |         14 |          0 |          0 |         14 | 
##              |      1.000 |      0.000 |      0.000 |      0.311 | 
##              |      1.000 |      0.000 |      0.000 |            | 
##              |      0.311 |      0.000 |      0.000 |            | 
## -------------|------------|------------|------------|------------|
##   versicolor |          0 |         17 |          1 |         18 | 
##              |      0.000 |      0.944 |      0.056 |      0.400 | 
##              |      0.000 |      0.944 |      0.077 |            | 
##              |      0.000 |      0.378 |      0.022 |            | 
## -------------|------------|------------|------------|------------|
##    virginica |          0 |          1 |         12 |         13 | 
##              |      0.000 |      0.077 |      0.923 |      0.289 | 
##              |      0.000 |      0.056 |      0.923 |            | 
##              |      0.000 |      0.022 |      0.267 |            | 
## -------------|------------|------------|------------|------------|
## Column Total |         14 |         18 |         13 |         45 | 
##              |      0.311 |      0.400 |      0.289 |            | 
## -------------|------------|------------|------------|------------|
## 
## 
# Матрица ошибок (Confusion Matrix)
confusion_matrix <- table(test_labels, knn_pred)
print(confusion_matrix)
##             knn_pred
## test_labels  setosa versicolor virginica
##   setosa         14          0         0
##   versicolor      0         17         1
##   virginica       0          1        12
# Диагональная оценка качества прогноза
diagonal_accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Диагональная точность:", diagonal_accuracy))
## [1] "Диагональная точность: 0.955555555555556"

Метод опорных векторов (SVM)

set.seed(123)
indices <- sample(1:nrow(iris), size = 0.7 * nrow(iris))
train_data <- iris[indices, ]
test_data <- iris[-indices, ]

svm_model <- svm(Species ~ ., data = train_data, kernel = "linear", cross = 10)

svm_pred <- predict(svm_model, test_data)

confusion_matrix_svm <- table(test_data$Species, svm_pred)
print(confusion_matrix_svm)
##             svm_pred
##              setosa versicolor virginica
##   setosa         14          0         0
##   versicolor      0         17         1
##   virginica       0          0        13
svm_accuracy <- sum(diag(confusion_matrix_svm)) / sum(confusion_matrix_svm)
print(paste("Точность SVM:", svm_accuracy))
## [1] "Точность SVM: 0.977777777777778"

Анализ главных компонент (PCA)

pca_result <- rda(iris[, 1:4], scale = TRUE)

summary(pca_result)
## 
## Call:
## rda(X = iris[, 1:4], scale = TRUE) 
## 
## Partitioning of correlations:
##               Inertia Proportion
## Total               4          1
## Unconstrained       4          1
## 
## Eigenvalues, and their contribution to the correlations 
## 
## Importance of components:
##                          PC1    PC2     PC3      PC4
## Eigenvalue            2.9185 0.9140 0.14676 0.020715
## Proportion Explained  0.7296 0.2285 0.03669 0.005179
## Cumulative Proportion 0.7296 0.9581 0.99482 1.000000
plot(pca_result, display = "sites", type = "text")

points(pca_result, display = "sites", col = as.numeric(iris$Species), pch = 16)
legend("topright", legend = levels(iris$Species), col = 1:3, pch = 16)