data(iris)
iris_normalized <- as.data.frame(scale(iris[, -5]))
iris_normalized$Species <- iris$Species
set.seed(42)
train_index <- sample(1:nrow(iris), size = 0.7 * nrow(iris))
train_data <- iris_normalized[train_index, ]
test_data <- iris_normalized[-train_index, ]
train_features <- train_data[, -5]
test_features <- test_data[, -5]
train_labels <- train_data[, 5]
test_labels <- test_data[, 5]
k_value <- 11
predicted_classes <- knn(train = train_features, test = test_features, cl = train_labels, k = k_value)
cat("Матрица ошибок и статистика для KNN (k=", k_value, "):\n")
## Матрица ошибок и статистика для KNN (k= 11 ):
CrossTable(x = test_labels, y = predicted_classes, prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE, dnn = c("Actual Class", "Predicted Class"))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 45
##
##
## | Predicted Class
## Actual Class | setosa | versicolor | virginica | Row Total |
## -------------|------------|------------|------------|------------|
## setosa | 12 | 0 | 0 | 12 |
## | 0.267 | 0.000 | 0.000 | |
## -------------|------------|------------|------------|------------|
## versicolor | 0 | 14 | 1 | 15 |
## | 0.000 | 0.311 | 0.022 | |
## -------------|------------|------------|------------|------------|
## virginica | 0 | 1 | 17 | 18 |
## | 0.000 | 0.022 | 0.378 | |
## -------------|------------|------------|------------|------------|
## Column Total | 12 | 15 | 18 | 45 |
## -------------|------------|------------|------------|------------|
##
##
accuracy <- sum(test_labels == predicted_classes) / length(test_labels)
cat("\nДоля правильных прогнозов (Accuracy):", round(accuracy, 4), "\n")
##
## Доля правильных прогнозов (Accuracy): 0.9556
iris_svm <- iris
iris_svm$Species <- as.factor(iris_svm$Species)
model_svm <- svm(Species ~ ., data = iris_svm, kernel = "linear", cross = 10, scale = TRUE)
print(model_svm)
##
## Call:
## svm(formula = Species ~ ., data = iris_svm, kernel = "linear", cross = 10,
## scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 29
svm_predictions <- predict(model_svm, iris_svm)
cat("\nМатрица ошибок для SVM (Linear Kernel):\n")
##
## Матрица ошибок для SVM (Linear Kernel):
table_svm <- table(Predicted = svm_predictions, Actual = iris_svm$Species)
print(table_svm)
## Actual
## Predicted setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 46 1
## virginica 0 4 49
svm_accuracy <- sum(diag(table_svm)) / sum(table_svm)
cat("\nДоля правильных прогнозов (SVM Accuracy):", round(svm_accuracy, 4), "\n")
##
## Доля правильных прогнозов (SVM Accuracy): 0.9667
iris_numeric <- iris[, 1:4]
pca_result <- rda(iris_numeric, scale = TRUE)
summary(pca_result)
##
## Call:
## rda(X = iris_numeric, scale = TRUE)
##
## Partitioning of correlations:
## Inertia Proportion
## Total 4 1
## Unconstrained 4 1
##
## Eigenvalues, and their contribution to the correlations
##
## Importance of components:
## PC1 PC2 PC3 PC4
## Eigenvalue 2.9185 0.9140 0.14676 0.020715
## Proportion Explained 0.7296 0.2285 0.03669 0.005179
## Cumulative Proportion 0.7296 0.9581 0.99482 1.000000
eigenvalues <- pca_result$CA$eig
variance_explained <- eigenvalues / sum(eigenvalues)
cumulative_variance <- cumsum(variance_explained)
cat("\nОбъясненная дисперсия по компонентам:\n")
##
## Объясненная дисперсия по компонентам:
print(round(data.frame(PC = 1:4, Variance = variance_explained, Cumulative = cumulative_variance), 4))
## PC Variance Cumulative
## PC1 1 0.7296 0.7296
## PC2 2 0.2285 0.9581
## PC3 3 0.0367 0.9948
## PC4 4 0.0052 1.0000
par(mar = c(5, 4, 4, 2) + 0.1)
plot(pca_result, scaling = 2, main = "PCA Biplot (vegan::rda)", xlab = "PC1", ylab = "PC2")
points(pca_result, display = "sites", col = as.numeric(iris$Species), pch = 19, cex = 0.8)
legend("topright", legend = levels(iris$Species), col = 1:3, pch = 19, title = "Species")

scores_sites <- scores(pca_result, display = "sites", scaling = 2)
scores_species <- scores(pca_result, display = "species", scaling = 2)
df_sites <- as.data.frame(scores_sites)
df_sites$Species <- iris$Species
df_species <- as.data.frame(scores_species)
df_species$Variable <- rownames(df_species)
ggplot() +
geom_point(data = df_sites, aes(x = PC1, y = PC2, color = Species), alpha = 0.7, size = 3) +
geom_segment(data = df_species, aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.2, "cm")), color = "black") +
geom_text(data = df_species, aes(x = PC1, y = PC2, label = Variable),
vjust = 1.5, hjust = 0.5, color = "black", size = 4) +
theme_minimal() +
labs(title = "Ординационная диаграмма PCA (vegan::rda)",
x = paste0("PC1 (", round(variance_explained[1]*100, 1), "%)"),
y = paste0("PC2 (", round(variance_explained[2]*100, 1), "%)")) +
theme(legend.title = element_blank())
