Load necessary libraries

options(warn = -1) library(imager) library(caret) library(dplyr) library(nnet) # For multinomial logistic regression library(factoextra) library(keras) library(tensorflow) library(pROC) # For ROC analysis options(warn = -1) # Define the path to the image folders base_dir <- “C:/Users/raksh/Downloads/natural_images” # Adjust path as necessary classes <- list.dirs(base_dir, full.names = TRUE, recursive = FALSE)

Prepare lists for images and labels

images <- list() labels <- c()

Load images, convert to grayscale, and store them

for (class in classes) { class_label <- basename(class) img_files <- list.files(class, full.names = TRUE)

for (img_path in img_files) { # Read and convert to grayscale img <- load.image(img_path) img_gray <- grayscale(img) img_resized <- resize(img_gray, 64, 64) # Resize images to a uniform size

# Data Augmentation
img_flipped <- imrotate(img_resized, 180)  # Flip image
img_rotated <- imrotate(img_resized, 90)   # Rotate image
img_brightened <- img_resized * 1.2  # Increase brightness
img_darker <- img_resized * 0.8  # Decrease brightness

# Flatten the images and append to list
images[[length(images) + 1]] <- as.vector(img_resized)
labels <- c(labels, class_label)

# Append augmented images
images[[length(images) + 1]] <- as.vector(img_flipped)
labels <- c(labels, class_label)

images[[length(images) + 1]] <- as.vector(img_rotated)
labels <- c(labels, class_label)

images[[length(images) + 1]] <- as.vector(img_brightened)
labels <- c(labels, class_label)

images[[length(images) + 1]] <- as.vector(img_darker)
labels <- c(labels, class_label)

} }

Convert lists to data frame

X <- do.call(rbind, images) y <- factor(labels)

Normalize the feature set

X <- scale(X)

Dimensionality reduction using PCA

pca <- prcomp(X, center = TRUE, scale. = TRUE)

Determine the number of components to retain based on explained variance

explained_variance <- summary(pca)$importance[2, ] num_components <- which(cumsum(explained_variance) >= 0.95)[1] # Retain components for 95% variance num_components <- min(num_components, 50) # Limit number of components if too high X_pca <- pca$x[, 1:num_components]

Split the data into training and testing sets

set.seed(42) # For reproducibility train_index <- createDataPartition(y, p = 0.8, list = FALSE) X_train <- X_pca[train_index, ] y_train <- y[train_index] X_test <- X_pca[-train_index, ] y_test <- y[-train_index]

Train the Multinomial Logistic Regression model

logistic_model <- multinom(y_train ~ ., data = as.data.frame(X_train))

Make predictions on the test set

pred_probs <- predict(logistic_model, newdata = as.data.frame(X_test), type = “class”)

Evaluate the model

confusion_matrix <- table(Predicted = pred_probs, Actual = y_test) accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)

print(paste(“Accuracy:”, round(accuracy * 100, 2), “%”)) print(“Confusion Matrix:”) print(confusion_matrix)

Optionally, create a classification report

report <- confusionMatrix(confusion_matrix) print(report)

Step 2: Clustering

Using PCA reduced data or scaled data for clustering

set.seed(42) # For reproducibility k <- 5 # Choose the number of clusters kmeans_result <- kmeans(X_pca, centers = k, nstart = 25)

Add cluster assignments to the original data

X_clustered <- as.data.frame(X_pca) X_clustered$Cluster <- as.factor(kmeans_result$cluster)

Add cluster assignments to the original labels for summary

original_data <- data.frame(Features = I(images), Class = labels) original_data$Cluster <- as.factor(kmeans_result$cluster)

Create a summary of the most common category in each cluster

cluster_summary <- original_data %>% group_by(Cluster) %>% summarize(Most_Common_Category = names(which.max(table(Class)))) # Adjust if Class column is named differently

Visualize clustering results with a legend

Create the cluster plot

p <- fviz_cluster(kmeans_result, data = X_clustered[, -ncol(X_clustered)], geom = “point”, ellipse.type = “convex”, ggtheme = theme_minimal())

Add the legend title

p <- p + labs(color = “Cluster”) # This adds a legend for cluster colors

Print the clustering plot

print(p)

Plotting the first two PCA dimensions with cluster assignments

plot(X_clustered[, 1:2], col = X_clustered$Cluster, pch = 19, xlab = “PCA 1”, ylab = “PCA 2”, main = “K-means Clustering”)

Add cluster centers

points(kmeans_result$centers, col = 1:k, pch = 8, cex = 2)

Add labels for the most common category in each cluster

for (i in 1:k) { text(kmeans_result$centers[i, 1], kmeans_result$centers[i, 2], labels = cluster_summary$Most_Common_Category[i], pos = 3, col = “black”, cex = 0.8) }

Create a custom legend

legend(“topright”, legend = paste(“Cluster”, 1:k), col = 1:k, pch = 19, title = “Clusters”)

Step 3: Neural network

Load necessary libraries

library(imager) library(dplyr) library(nnet) library(FactoMineR) library(factoextra)

Set the directory and classes

image_dir <- “C:/Users/raksh/Downloads/natural_images” classes <- list.files(image_dir)

Define image dimensions (Reduced size)

img_width <- 32 img_height <- 32

Helper function to load and preprocess images

process_image <- function(file_path, img_width, img_height) { img <- load.image(file_path) %>% resize(img_width, img_height) as.vector(img) # Flatten the image to a vector }

Create an empty list to store data

image_data <- list() image_labels <- c()

Read images and create a dataframe

for (class_name in classes) { class_dir <- file.path(image_dir, class_name) image_files <- list.files(class_dir, full.names = TRUE, pattern = “\.jpg$|\\.png$”)

for (image_file in image_files) { image_vector <- process_image(image_file, img_width, img_height) image_data <- append(image_data, list(image_vector)) image_labels <- append(image_labels, class_name) # Assign class label } }

Create the data frame with image vectors and labels

image_matrix <- do.call(rbind, image_data) image_df <- as.data.frame(image_matrix) image_df$label <- factor(image_labels)

Split the data into train and validation sets (80-20 split)

set.seed(123) # For reproducibility sample_index <- sample(seq_len(nrow(image_df)), size = 0.8 * nrow(image_df)) train_df <- image_df[sample_index, ] validation_df <- image_df[-sample_index, ]

Apply PCA to reduce dimensionality of the image data (excluding labels)

pca_result <- PCA(image_df[, -ncol(image_df)], graph = FALSE)

Determine the maximum number of components available

max_components <- ncol(pca_result$ind$coord)

Choose a higher number of components (e.g., 150), ensuring it does not exceed the maximum

num_components <- min(150, max_components) # Use 150 or the maximum available components cat(“Using”, num_components, “PCA components…”)

Extract the top components from PCA results

pca_features <- pca_result$ind$coord[, 1:num_components]

Normalize the PCA features

pca_features <- scale(pca_features)

Create a new dataframe with PCA features and labels

pca_df <- as.data.frame(pca_features) pca_df$label <- image_df$label

Split the PCA-transformed data into train and validation sets (80-20 split)

train_df <- pca_df[sample_index, ] validation_df <- pca_df[-sample_index, ]

Define number of neurons in the hidden layer for experiments

hidden_layer_sizes <- c(5, 10, 20, 50) # Increased layer size for experimentation

Train and evaluate models with different hidden layer sizes using nnet

for (units in hidden_layer_sizes) { cat(“Training model with”, units, “hidden units using PCA features…”)

# Train the neural network model nn_model <- nnet( label ~ ., data = train_df, size = units, maxit = 300, # Increase the number of iterations decay = 0.001, # Adjust regularization parameter trace = FALSE # Suppress iteration output )

# Predict on validation data validation_pred <- predict(nn_model, validation_df, type = “class”)

# Calculate accuracy accuracy <- mean(validation_pred == validation_df$label) cat(“Model with”, units, “hidden units - Accuracy with PCA features:”, accuracy, “”) }

Assuming the following accuracy values are already defined

logistic_accuracy <- accuracy # Accuracy from the logistic regression model

Neural network accuracies from previous evaluations

nn_accuracies <- c( 5 hidden units = 0.5416667, 10 hidden units = 0.5, 20 hidden units = 0.5416667, 50 hidden units = 0.5 )

Create a comparison data frame

comparison_df <- data.frame( Model = c(“Logistic Regression”, names(nn_accuracies)), Accuracy = c(logistic_accuracy, nn_accuracies) )

Plot the accuracies using ggplot2

library(ggplot2)

ggplot(comparison_df, aes(x = Model, y = Accuracy, fill = Model)) + geom_bar(stat = “identity”, position = position_dodge(), width = 0.7) + labs(title = “Model Accuracy Comparison”, x = “Model”, y = “Accuracy”) + scale_fill_brewer(palette = “Set1”) + theme_minimal() + geom_text(aes(label = round(Accuracy, 2)), position = position_dodge(0.7), vjust = -0.5) + theme(axis.text.x = element_text(angle = 45, hjust = 1))

options(warn = 0)