Logistic_Reg VS NN
options(warn = -1)
library(imager)
## Loading required package: magrittr
##
## Attaching package: 'imager'
## The following object is masked from 'package:magrittr':
##
## add
## The following objects are masked from 'package:stats':
##
## convolve, spectrum
## The following object is masked from 'package:graphics':
##
## frame
## The following object is masked from 'package:base':
##
## save.image
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:imager':
##
## where
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(nnet) # For multinomial logistic regression
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(keras)
library(tensorflow)
##
## Attaching package: 'tensorflow'
## The following object is masked from 'package:caret':
##
## train
library(pROC) # For ROC analysis
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:imager':
##
## ci
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
options(warn = -1)
base_dir <- "C:/Users/raksh/Downloads/natural_images" # Adjust path as necessary
classes <- list.dirs(base_dir, full.names = TRUE, recursive = FALSE)
images <- list()
labels <- c()
for (class in classes) {
class_label <- basename(class)
img_files <- list.files(class, full.names = TRUE)
for (img_path in img_files) {
# Read and convert to grayscale
img <- load.image(img_path)
img_gray <- grayscale(img)
img_resized <- resize(img_gray, 64, 64) # Resize images to a uniform size
# Data Augmentation
img_flipped <- imrotate(img_resized, 180) # Flip image
img_rotated <- imrotate(img_resized, 90) # Rotate image
img_brightened <- img_resized * 1.2 # Increase brightness
img_darker <- img_resized * 0.8 # Decrease brightness
# Flatten the images and append to list
images[[length(images) + 1]] <- as.vector(img_resized)
labels <- c(labels, class_label)
# Append augmented images
images[[length(images) + 1]] <- as.vector(img_flipped)
labels <- c(labels, class_label)
images[[length(images) + 1]] <- as.vector(img_rotated)
labels <- c(labels, class_label)
images[[length(images) + 1]] <- as.vector(img_brightened)
labels <- c(labels, class_label)
images[[length(images) + 1]] <- as.vector(img_darker)
labels <- c(labels, class_label)
}
}
X <- do.call(rbind, images)
y <- factor(labels)
X <- scale(X)
pca <- prcomp(X, center = TRUE, scale. = TRUE)
explained_variance <- summary(pca)$importance[2, ]
num_components <- which(cumsum(explained_variance) >= 0.95)[1] # Retain components for 95% variance
num_components <- min(num_components, 50) # Limit number of components if too high
X_pca <- pca$x[, 1:num_components]
set.seed(42) # For reproducibility
train_index <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X_pca[train_index, ]
y_train <- y[train_index]
X_test <- X_pca[-train_index, ]
y_test <- y[-train_index]
logistic_model <- multinom(y_train ~ ., data = as.data.frame(X_train))
## # weights: 416 (357 variable)
## initial value 1996.263880
## iter 10 value 1096.757646
## iter 20 value 990.563068
## iter 30 value 965.026206
## iter 40 value 939.066040
## iter 50 value 924.569493
## iter 60 value 913.577387
## iter 70 value 899.445219
## iter 80 value 890.240756
## iter 90 value 881.468122
## iter 100 value 875.539382
## final value 875.539382
## stopped after 100 iterations
pred_probs <- predict(logistic_model, newdata = as.data.frame(X_test), type = "class")
confusion_matrix <- table(Predicted = pred_probs, Actual = y_test)
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Accuracy:", round(accuracy * 100, 2), "%"))
## [1] "Accuracy: 57.92 %"
print("Confusion Matrix:")
## [1] "Confusion Matrix:"
print(confusion_matrix)
## Actual
## Predicted airplane car cat dog flower fruit motorbike person
## airplane 19 4 2 3 2 0 0 2
## car 1 16 2 1 2 0 0 1
## cat 1 1 9 5 1 0 4 1
## dog 2 5 8 10 4 1 6 1
## flower 2 1 3 6 18 0 2 1
## fruit 1 0 2 0 1 29 0 0
## motorbike 1 1 2 5 1 0 15 1
## person 3 2 2 0 1 0 3 23
report <- confusionMatrix(confusion_matrix)
print(report)
## Confusion Matrix and Statistics
##
## Actual
## Predicted airplane car cat dog flower fruit motorbike person
## airplane 19 4 2 3 2 0 0 2
## car 1 16 2 1 2 0 0 1
## cat 1 1 9 5 1 0 4 1
## dog 2 5 8 10 4 1 6 1
## flower 2 1 3 6 18 0 2 1
## fruit 1 0 2 0 1 29 0 0
## motorbike 1 1 2 5 1 0 15 1
## person 3 2 2 0 1 0 3 23
##
## Overall Statistics
##
## Accuracy : 0.5792
## 95% CI : (0.514, 0.6424)
## No Information Rate : 0.125
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.519
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: airplane Class: car Class: cat Class: dog
## Sensitivity 0.63333 0.53333 0.30000 0.33333
## Specificity 0.93810 0.96667 0.93810 0.87143
## Pos Pred Value 0.59375 0.69565 0.40909 0.27027
## Neg Pred Value 0.94712 0.93548 0.90367 0.90148
## Prevalence 0.12500 0.12500 0.12500 0.12500
## Detection Rate 0.07917 0.06667 0.03750 0.04167
## Detection Prevalence 0.13333 0.09583 0.09167 0.15417
## Balanced Accuracy 0.78571 0.75000 0.61905 0.60238
## Class: flower Class: fruit Class: motorbike Class: person
## Sensitivity 0.6000 0.9667 0.5000 0.76667
## Specificity 0.9286 0.9810 0.9476 0.94762
## Pos Pred Value 0.5455 0.8788 0.5769 0.67647
## Neg Pred Value 0.9420 0.9952 0.9299 0.96602
## Prevalence 0.1250 0.1250 0.1250 0.12500
## Detection Rate 0.0750 0.1208 0.0625 0.09583
## Detection Prevalence 0.1375 0.1375 0.1083 0.14167
## Balanced Accuracy 0.7643 0.9738 0.7238 0.85714
set.seed(42) # For reproducibility
k <- 5 # Choose the number of clusters
kmeans_result <- kmeans(X_pca, centers = k, nstart = 25)
X_clustered <- as.data.frame(X_pca)
X_clustered$Cluster <- as.factor(kmeans_result$cluster)
original_data <- data.frame(Features = I(images), Class = labels)
original_data$Cluster <- as.factor(kmeans_result$cluster)
cluster_summary <- original_data %>%
group_by(Cluster) %>%
summarize(Most_Common_Category = names(which.max(table(Class)))) # Adjust if Class column is named differently
# Visualize clustering results with a legend
# Create the cluster plot
p <- fviz_cluster(kmeans_result, data = X_clustered[, -ncol(X_clustered)],
geom = "point", ellipse.type = "convex",
ggtheme = theme_minimal())
# Add the legend title
p <- p + labs(color = "Cluster") # This adds a legend for cluster colors
# Print the clustering plot
print(p)
# Alternative plot using base R graphics
# Ensure a base plot is created first
plot(X_clustered[, 1:2], col = X_clustered$Cluster, pch = 19,
xlab = "PCA 1", ylab = "PCA 2", main = "K-means Clustering")
# Add cluster centers
points(kmeans_result$centers, col = 1:k, pch = 8, cex = 2)
# Add labels for the most common category in each cluster
for (i in 1:k) {
text(kmeans_result$centers[i, 1], kmeans_result$centers[i, 2],
labels = cluster_summary$Most_Common_Category[i],
pos = 3, col = "black", cex = 0.8)
}
# Create a custom legend
legend("topright", legend = paste("Cluster", 1:k), col = 1:k, pch = 19, title = "Clusters")
library(imager)
library(dplyr)
library(nnet)
library(FactoMineR)
library(factoextra)
image_dir <- "C:/Users/raksh/Downloads/natural_images"
classes <- list.files(image_dir)
img_width <- 32
img_height <- 32
process_image <- function(file_path, img_width, img_height) {
img <- load.image(file_path) %>% resize(img_width, img_height)
as.vector(img) # Flatten the image to a vector
}
image_data <- list()
image_labels <- c()
for (class_name in classes) {
class_dir <- file.path(image_dir, class_name)
image_files <- list.files(class_dir, full.names = TRUE, pattern = "\\.jpg$|\\.png$")
for (image_file in image_files) {
image_vector <- process_image(image_file, img_width, img_height)
image_data <- append(image_data, list(image_vector))
image_labels <- append(image_labels, class_name) # Assign class label
}
}
image_matrix <- do.call(rbind, image_data)
image_df <- as.data.frame(image_matrix)
image_df$label <- factor(image_labels)
set.seed(123) # For reproducibility
sample_index <- sample(seq_len(nrow(image_df)), size = 0.8 * nrow(image_df))
train_df <- image_df[sample_index, ]
validation_df <- image_df[-sample_index, ]
pca_result <- PCA(image_df[, -ncol(image_df)], graph = FALSE)
max_components <- ncol(pca_result$ind$coord)
num_components <- min(150, max_components) # Use 150 or the maximum available components
cat("Using", num_components, "PCA components...\n")
## Using 5 PCA components...
pca_features <- pca_result$ind$coord[, 1:num_components]
pca_features <- scale(pca_features)
pca_df <- as.data.frame(pca_features)
pca_df$label <- image_df$label
train_df <- pca_df[sample_index, ]
validation_df <- pca_df[-sample_index, ]
logistic_accuracy <- accuracy # Accuracy from the logistic regression model
nn_accuracies <- c(
`5 hidden units` = 0.5416667,
`10 hidden units` = 0.5,
`20 hidden units` = 0.5416667,
`50 hidden units` = 0.5
)
comparison_df <- data.frame(
Model = c("Logistic Regression", names(nn_accuracies)),
Accuracy = c(logistic_accuracy, nn_accuracies)
)
library(ggplot2)
ggplot(comparison_df, aes(x = Model, y = Accuracy, fill = Model)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
labs(title = "Model Accuracy Comparison", x = "Model", y = "Accuracy") +
scale_fill_brewer(palette = "Set1") +
theme_minimal() +
geom_text(aes(label = round(Accuracy, 2)), position = position_dodge(0.7), vjust = -0.5) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
options(warn = 0)