Установка пакетов
Классификация k-ближайших
data(iris)
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
iris_norm <- as.data.frame(lapply(iris[1:4], normalize))
# Разделение данных на обучающую и тестовую выборки
set.seed(123)
indices <- sample(1:nrow(iris_norm), size = 0.7 * nrow(iris_norm))
train_data <- iris_norm[indices, ]
test_data <- iris_norm[-indices, ]
train_labels <- iris[indices, 5]
test_labels <- iris[-indices, 5]
# Классификация методом k-ближайших соседей
knn_pred <- knn(train = train_data, test = test_data, cl = train_labels, k = 3)
# Оценка модели
CrossTable(x = test_labels, y = knn_pred, prop.chisq = FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 45
##
##
## | knn_pred
## test_labels | setosa | versicolor | virginica | Row Total |
## -------------|------------|------------|------------|------------|
## setosa | 14 | 0 | 0 | 14 |
## | 1.000 | 0.000 | 0.000 | 0.311 |
## | 1.000 | 0.000 | 0.000 | |
## | 0.311 | 0.000 | 0.000 | |
## -------------|------------|------------|------------|------------|
## versicolor | 0 | 17 | 1 | 18 |
## | 0.000 | 0.944 | 0.056 | 0.400 |
## | 0.000 | 0.944 | 0.077 | |
## | 0.000 | 0.378 | 0.022 | |
## -------------|------------|------------|------------|------------|
## virginica | 0 | 1 | 12 | 13 |
## | 0.000 | 0.077 | 0.923 | 0.289 |
## | 0.000 | 0.056 | 0.923 | |
## | 0.000 | 0.022 | 0.267 | |
## -------------|------------|------------|------------|------------|
## Column Total | 14 | 18 | 13 | 45 |
## | 0.311 | 0.400 | 0.289 | |
## -------------|------------|------------|------------|------------|
##
##
# Матрица ошибок (Confusion Matrix)
confusion_matrix <- table(test_labels, knn_pred)
print(confusion_matrix)
## knn_pred
## test_labels setosa versicolor virginica
## setosa 14 0 0
## versicolor 0 17 1
## virginica 0 1 12
# Диагональная оценка качества прогноза
diagonal_accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Диагональная точность:", diagonal_accuracy))
## [1] "Диагональная точность: 0.955555555555556"
Метод опорных векторов (SVM)
set.seed(123)
indices <- sample(1:nrow(iris), size = 0.7 * nrow(iris))
train_data <- iris[indices, ]
test_data <- iris[-indices, ]
svm_model <- svm(Species ~ ., data = train_data, kernel = "linear", cross = 10)
svm_pred <- predict(svm_model, test_data)
confusion_matrix_svm <- table(test_data$Species, svm_pred)
print(confusion_matrix_svm)
## svm_pred
## setosa versicolor virginica
## setosa 14 0 0
## versicolor 0 17 1
## virginica 0 0 13
svm_accuracy <- sum(diag(confusion_matrix_svm)) / sum(confusion_matrix_svm)
print(paste("Точность SVM:", svm_accuracy))
## [1] "Точность SVM: 0.977777777777778"
Анализ главных компонент (PCA)
pca_result <- rda(iris[, 1:4], scale = TRUE)
summary(pca_result)
##
## Call:
## rda(X = iris[, 1:4], scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 4 1
## Unconstrained 4 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4
## Eigenvalue 2.9185 0.9140 0.14676 0.020715
## Proportion Explained 0.7296 0.2285 0.03669 0.005179
## Cumulative Proportion 0.7296 0.9581 0.99482 1.000000
plot(pca_result, display = "sites", type = "text")
points(pca_result, display = "sites", col = as.numeric(iris$Species), pch = 16)
legend("topright", legend = levels(iris$Species), col = 1:3, pch = 16)
