options(warn = -1) library(imager) library(caret) library(dplyr) library(nnet) # For multinomial logistic regression library(factoextra) library(keras) library(tensorflow) library(pROC) # For ROC analysis options(warn = -1) # Define the path to the image folders base_dir <- “C:/Users/raksh/Downloads/natural_images” # Adjust path as necessary classes <- list.dirs(base_dir, full.names = TRUE, recursive = FALSE)
images <- list() labels <- c()
for (class in classes) { class_label <- basename(class) img_files <- list.files(class, full.names = TRUE)
for (img_path in img_files) { # Read and convert to grayscale img <- load.image(img_path) img_gray <- grayscale(img) img_resized <- resize(img_gray, 64, 64) # Resize images to a uniform size
# Data Augmentation
img_flipped <- imrotate(img_resized, 180) # Flip image
img_rotated <- imrotate(img_resized, 90) # Rotate image
img_brightened <- img_resized * 1.2 # Increase brightness
img_darker <- img_resized * 0.8 # Decrease brightness
# Flatten the images and append to list
images[[length(images) + 1]] <- as.vector(img_resized)
labels <- c(labels, class_label)
# Append augmented images
images[[length(images) + 1]] <- as.vector(img_flipped)
labels <- c(labels, class_label)
images[[length(images) + 1]] <- as.vector(img_rotated)
labels <- c(labels, class_label)
images[[length(images) + 1]] <- as.vector(img_brightened)
labels <- c(labels, class_label)
images[[length(images) + 1]] <- as.vector(img_darker)
labels <- c(labels, class_label)
} }
X <- do.call(rbind, images) y <- factor(labels)
X <- scale(X)
pca <- prcomp(X, center = TRUE, scale. = TRUE)
explained_variance <- summary(pca)\(importance[2, ] num_components <- which(cumsum(explained_variance) >= 0.95)[1] # Retain components for 95% variance num_components <- min(num_components, 50) # Limit number of components if too high X_pca <- pca\)x[, 1:num_components]
set.seed(42) # For reproducibility train_index <- createDataPartition(y, p = 0.8, list = FALSE) X_train <- X_pca[train_index, ] y_train <- y[train_index] X_test <- X_pca[-train_index, ] y_test <- y[-train_index]
logistic_model <- multinom(y_train ~ ., data = as.data.frame(X_train))
pred_probs <- predict(logistic_model, newdata = as.data.frame(X_test), type = “class”)
confusion_matrix <- table(Predicted = pred_probs, Actual = y_test) accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste(“Accuracy:”, round(accuracy * 100, 2), “%”)) print(“Confusion Matrix:”) print(confusion_matrix)
report <- confusionMatrix(confusion_matrix) print(report)
set.seed(42) # For reproducibility k <- 5 # Choose the number of clusters kmeans_result <- kmeans(X_pca, centers = k, nstart = 25)
X_clustered <- as.data.frame(X_pca) X_clustered\(Cluster <- as.factor(kmeans_result\)cluster)
original_data <- data.frame(Features = I(images), Class = labels) original_data\(Cluster <- as.factor(kmeans_result\)cluster)
cluster_summary <- original_data %>% group_by(Cluster) %>% summarize(Most_Common_Category = names(which.max(table(Class)))) # Adjust if Class column is named differently
p <- fviz_cluster(kmeans_result, data = X_clustered[, -ncol(X_clustered)], geom = “point”, ellipse.type = “convex”, ggtheme = theme_minimal())
p <- p + labs(color = “Cluster”) # This adds a legend for cluster colors
print(p)
plot(X_clustered[, 1:2], col = X_clustered$Cluster, pch = 19, xlab = “PCA 1”, ylab = “PCA 2”, main = “K-means Clustering”)
points(kmeans_result$centers, col = 1:k, pch = 8, cex = 2)
for (i in 1:k) { text(kmeans_result\(centers[i, 1], kmeans_result\)centers[i, 2], labels = cluster_summary$Most_Common_Category[i], pos = 3, col = “black”, cex = 0.8) }
legend(“topright”, legend = paste(“Cluster”, 1:k), col = 1:k, pch = 19, title = “Clusters”)
library(imager) library(dplyr) library(nnet) library(FactoMineR) library(factoextra)
image_dir <- “C:/Users/raksh/Downloads/natural_images” classes <- list.files(image_dir)
img_width <- 32 img_height <- 32
process_image <- function(file_path, img_width, img_height) { img <- load.image(file_path) %>% resize(img_width, img_height) as.vector(img) # Flatten the image to a vector }
image_data <- list() image_labels <- c()
for (class_name in classes) { class_dir <- file.path(image_dir, class_name) image_files <- list.files(class_dir, full.names = TRUE, pattern = “\.jpg\(|\\.png\)”)
for (image_file in image_files) { image_vector <- process_image(image_file, img_width, img_height) image_data <- append(image_data, list(image_vector)) image_labels <- append(image_labels, class_name) # Assign class label } }
image_matrix <- do.call(rbind, image_data) image_df <- as.data.frame(image_matrix) image_df$label <- factor(image_labels)
set.seed(123) # For reproducibility sample_index <- sample(seq_len(nrow(image_df)), size = 0.8 * nrow(image_df)) train_df <- image_df[sample_index, ] validation_df <- image_df[-sample_index, ]
pca_result <- PCA(image_df[, -ncol(image_df)], graph = FALSE)
max_components <- ncol(pca_result\(ind\)coord)
num_components <- min(150, max_components) # Use 150 or the maximum available components cat(“Using”, num_components, “PCA components…”)
pca_features <- pca_result\(ind\)coord[, 1:num_components]
pca_features <- scale(pca_features)
pca_df <- as.data.frame(pca_features) pca_df\(label <- image_df\)label
train_df <- pca_df[sample_index, ] validation_df <- pca_df[-sample_index, ]
logistic_accuracy <- accuracy # Accuracy from the logistic regression model
nn_accuracies <- c( 5 hidden units = 0.5416667,
10 hidden units = 0.5, 20 hidden units =
0.5416667, 50 hidden units = 0.5 )
comparison_df <- data.frame( Model = c(“Logistic Regression”, names(nn_accuracies)), Accuracy = c(logistic_accuracy, nn_accuracies) )
library(ggplot2)
ggplot(comparison_df, aes(x = Model, y = Accuracy, fill = Model)) + geom_bar(stat = “identity”, position = position_dodge(), width = 0.7) + labs(title = “Model Accuracy Comparison”, x = “Model”, y = “Accuracy”) + scale_fill_brewer(palette = “Set1”) + theme_minimal() + geom_text(aes(label = round(Accuracy, 2)), position = position_dodge(0.7), vjust = -0.5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
options(warn = 0)