Задание 2: k-ближайшие соседи (kNN)

data(iris)
iris <- unique(iris)
iris_scaled <- as.data.frame(scale(iris[, 1:4]))
iris_scaled$Species <- iris$Species
set.seed(123)
train_index <- createDataPartition(iris_scaled$Species, p = 0.8, list = FALSE)
train_data <- iris_scaled[train_index, ]
test_data <- iris_scaled[-train_index, ]
train_x <- train_data[, 1:4]
test_x <- test_data[, 1:4]
train_y <- train_data$Species
test_y <- test_data$Species
k_optimal <- floor(sqrt(nrow(train_data)))
knn_pred <- knn(train = train_x, test = test_x, cl = train_y, k = k_optimal)
conf_matrix <- table(Predicted = knn_pred, Actual = test_y)
print("Матрица ошибок (kNN):")
## [1] "Матрица ошибок (kNN):"
conf_matrix
##             Actual
## Predicted    setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0          9         0
##   virginica       0          1         9
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("\nТочность модели kNN:", round(accuracy, 4), "\n\n")
## 
## Точность модели kNN: 0.9655
CrossTable(x = test_y, y = knn_pred, prop.chisq = FALSE,
           prop.c = FALSE, prop.r = FALSE,
           dnn = c('Фактический вид', 'Предсказанный вид'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  29 
## 
##  
##                 | Предсказанный вид 
## Фактический вид |     setosa | versicolor |  virginica |  Row Total | 
## ----------------|------------|------------|------------|------------|
##          setosa |         10 |          0 |          0 |         10 | 
##                 |      0.345 |      0.000 |      0.000 |            | 
## ----------------|------------|------------|------------|------------|
##      versicolor |          0 |          9 |          1 |         10 | 
##                 |      0.000 |      0.310 |      0.034 |            | 
## ----------------|------------|------------|------------|------------|
##       virginica |          0 |          0 |          9 |          9 | 
##                 |      0.000 |      0.000 |      0.310 |            | 
## ----------------|------------|------------|------------|------------|
##    Column Total |         10 |          9 |         10 |         29 | 
## ----------------|------------|------------|------------|------------|
## 
## 

Задание 3: Метод опорных векторов (SVM)

data(iris)
iris <- unique(iris)
set.seed(123)
train_index <- sample(1:nrow(iris), 0.8 * nrow(iris))
train_iris <- iris[train_index, ]
test_iris <- iris[-train_index, ]
set.seed(123)
tuned_model <- tune(svm, Species ~ ., data = train_iris, kernel = "linear",
                    ranges = list(cost = c(0.1, 1, 10, 100)),
                    tunecontrol = tune.control(cross = 10))
best_svm <- tuned_model$best.model
svm_pred <- predict(best_svm, newdata = test_iris)
conf_matrix_svm <- table(Predicted = svm_pred, Actual = test_iris$Species)
print("Матрица ошибок (SVM):")
## [1] "Матрица ошибок (SVM):"
conf_matrix_svm
##             Actual
## Predicted    setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         15         1
##   virginica       0          0         4
accuracy_svm <- sum(diag(conf_matrix_svm)) / sum(conf_matrix_svm)
cat("\nТочность модели SVM:", round(accuracy_svm, 4), "\n")
## 
## Точность модели SVM: 0.9667

Задание 4: Анализ главных компонент (PCA)

data(iris)
iris_num <- iris[, 1:4]
iris_pca <- rda(iris_num, scale = TRUE)
summary(iris_pca)
## 
## Call:
## rda(X = iris_num, scale = TRUE) 
## 
## Partitioning of correlations:
##               Inertia Proportion
## Total               4          1
## Unconstrained       4          1
## 
## Eigenvalues, and their contribution to the correlations 
## 
## Importance of components:
##                          PC1    PC2     PC3      PC4
## Eigenvalue            2.9185 0.9140 0.14676 0.020715
## Proportion Explained  0.7296 0.2285 0.03669 0.005179
## Cumulative Proportion 0.7296 0.9581 0.99482 1.000000
plot(iris_pca, type = "n", main = "PCA ординация видов iris")
points(iris_pca, display = "sites", pch = 19, col = as.numeric(iris$Species))
text(iris_pca, display = "species", col = "blue", cex = 0.8)
legend("topright", legend = levels(iris$Species), 
       col = 1:3, pch = 19, title = "Вид")

pca_prcomp <- prcomp(iris_num, scale. = TRUE)
biplot(pca_prcomp, col = c("grey60", "red"), cex = c(0.7, 1),
       main = "PCA биплот (prcomp)")

Выводы