## #LOADING REQUIRED LIBRARIES
library(imager)
## Warning: package 'imager' was built under R version 4.3.3
## Loading required package: magrittr
##
## Attaching package: 'imager'
## The following object is masked from 'package:magrittr':
##
## add
## The following objects are masked from 'package:stats':
##
## convolve, spectrum
## The following object is masked from 'package:graphics':
##
## frame
## The following object is masked from 'package:base':
##
## save.image
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:imager':
##
## where
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ imager::add() masks magrittr::add()
## ✖ stringr::boundary() masks imager::boundary()
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ tidyr::fill() masks imager::fill()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ✖ dplyr::where() masks imager::where()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
install.packages("BiocManager")
##
## The downloaded binary packages are in
## /var/folders/92/s_f3d4816pz3mzm229sxs9k80000gn/T//RtmpeJdrAE/downloaded_packages
BiocManager::install("EBImage")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cloud.r-project.org/
## Bioconductor version 3.18 (BiocManager 1.30.25), R 4.3.1 (2023-06-16)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'EBImage'
## Old packages: 'uwot'
# Set the path to your dataset
image_path <- "/Users/saairam/Downloads/project"
# Get a list of all categories (folders)
categories <- list.dirs(image_path, recursive = FALSE)
labels <- basename(categories) # Use folder names as labels
# Function to resize images
resize_image <- function(img_path, img_size = 32) {
img <- load.image(img_path) # Load the image
img <- resize(img, img_size, img_size) # Resize to specified dimensions
as.numeric(img) # Convert to a numeric vector
}
# Function to load all images and create a dataset
load_images <- function(image_path, categories) {
image_data <- data.frame() # Initialize an empty data frame
for (category in categories) {
label <- basename(category) # Get the label from the folder name
image_files <- list.files(category, full.names = TRUE) # List all image files in the category
for (image_file in image_files) {
img_vector <- resize_image(image_file) # Resize the image and convert to a vector
image_data <- rbind(image_data, data.frame(label = label, img_vector = I(list(img_vector)))) # Add to data frame
}
}
return(image_data) # Return the populated data frame
}
# Load the images into a data frame
image_data <- load_images(image_path, categories)
# Check the dimensions of the image data
dim(image_data) # Should show the number of loaded images and columns
## [1] 299 2
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
table(image_data$label)
##
## ani fruits plants
## 82 115 102
# Check the structure of the data
str(image_data)
## 'data.frame': 299 obs. of 2 variables:
## $ label : chr "ani" "ani" "ani" "ani" ...
## $ img_vector:List of 299
## ..$ : num 0.624 0.667 0.482 0.471 0.549 ...
## ..$ : num 0.51 0.494 0.525 0.553 0.553 ...
## ..$ : num 0.475 0.157 0.176 0.141 0.141 ...
## ..$ : num 0.651 0.494 0.624 0.663 0.698 ...
## ..$ : num 0.165 0.145 0.169 0.247 0.2 ...
## ..$ : num 0.282 0.247 0.161 0.212 0.196 ...
## ..$ : num 0.5294 0.4471 0.651 0.6431 0.0392 ...
## ..$ : num 0.325 0.212 0.706 0.718 0.647 ...
## ..$ : num 0.0784 0.0902 0.0863 0.0392 0.1765 ...
## ..$ : num 0.596 0.753 0.765 0.757 0.867 ...
## ..$ : num 0.592 0.624 0.655 0.702 0.706 ...
## ..$ : num 0.369 0.275 0.267 0.208 0.255 ...
## ..$ : num 0.227 0.247 0.204 0.165 0.141 ...
## ..$ : num 0.482 0.506 0.518 0.482 0.467 ...
## ..$ : num 0.643 0.71 0.565 0.467 0.537 ...
## ..$ : num 0.118 0.102 0.137 0.137 0.129 ...
## ..$ : num 0.667 0.667 0.667 0.667 0.667 ...
## ..$ : num 0.667 0.675 0.631 0.616 0.604 ...
## ..$ : num 0.318 0.396 0.204 0.165 0.294 ...
## ..$ : num 0.157 0.18 0.18 0.18 0.18 ...
## ..$ : num 0.639 0.631 0.62 0.624 0.596 ...
## ..$ : num 0.188 0.208 0.173 0.329 0.412 ...
## ..$ : num 0.529 0.71 0.682 0.624 0.804 ...
## ..$ : num 0.604 0.6 0.596 0.592 0.592 ...
## ..$ : num 0.192 0.361 0.478 0.333 0.278 ...
## ..$ : num 0.263 0.435 0.494 0.51 0.49 ...
## ..$ : num 0.098 0.0235 0.3412 0.3608 0.1686 ...
## ..$ : num 0.973 0.973 0.973 0.973 0.973 ...
## ..$ : num 0.314 0.314 0.137 0.204 0.137 ...
## ..$ : num 0.0353 0.0588 0.1059 0.0431 0.0588 ...
## ..$ : num 0.451 0.486 0.616 0.482 0.604 ...
## ..$ : num 0.0941 0.1098 0.1098 0.0745 0.051 ...
## ..$ : num 0.0902 0.1098 0.1804 0.1098 0.1216 ...
## ..$ : num 0.6353 0 0.0784 0.2588 0.349 ...
## ..$ : num 0.0471 0.0667 0.1255 0.3098 0.4392 ...
## ..$ : num 0.149 0.1804 0.1412 0.0824 0.1294 ...
## ..$ : num 0.878 0.969 0.906 0.906 0.89 ...
## ..$ : num 0.675 0.608 0.533 0.529 0.596 ...
## ..$ : num 0.173 0.169 0.176 0.373 0.506 ...
## ..$ : num 0.761 0.71 0.643 0.659 0.659 ...
## ..$ : num 0.314 0.631 0.792 0.616 0.447 ...
## ..$ : num 0.384 0.384 0.243 0.173 0.259 ...
## ..$ : num 0.584 0.588 0.573 0.62 0.651 ...
## ..$ : num 0.3333 0.1725 0.1882 0.1686 0.0863 ...
## ..$ : num 0.118 0.102 0.188 0.165 0.196 ...
## ..$ : num 0.18 0.212 0.247 0.255 0.267 ...
## ..$ : num 0.1882 0.2392 0.302 0.2353 0.0863 ...
## ..$ : num 0.00392 0.01961 0.01961 0.03137 0.03529 ...
## ..$ : num 0.612 0.62 0.627 0.639 0.663 ...
## ..$ : num 0.161 0.38 0.573 0.718 0.588 ...
## ..$ : num 0 0.0314 0.0118 0 0 ...
## ..$ : num 0.129 0.133 0.137 0.149 0.157 ...
## ..$ : num 0.522 0.467 0.478 0.514 0.467 ...
## ..$ : num 0.557 0.576 0.443 0.271 0.337 ...
## ..$ : num 0 0 0.557 0.325 0.624 ...
## ..$ : num 0.114 0.173 0.439 0.42 0.337 ...
## ..$ : num 0.914 0.906 0.925 0.933 0.945 ...
## ..$ : num 0.694 0.678 0.659 0.631 0.596 ...
## ..$ : num 0.584 0.584 0.592 0.6 0.6 ...
## ..$ : num 0.439 0.243 0.2 0.384 0.604 ...
## ..$ : num 0.769 0.71 0.725 0.741 0.757 ...
## ..$ : num 0.255 0.243 0.239 0.239 0.251 ...
## ..$ : num 0.514 0.471 0.502 0.498 0.506 ...
## ..$ : num 0.831 0.808 0.82 0.831 0.827 ...
## ..$ : num 0.447 0.459 0.451 0.482 0.494 ...
## ..$ : num 0.624 0.647 0.659 0.643 0.627 ...
## ..$ : num 0.31 0.302 0.302 0.31 0.298 ...
## ..$ : num 0.769 0.749 0.757 0.753 0.725 ...
## ..$ : num 0.361 0.396 0.196 0.22 0.184 ...
## ..$ : num 0.745 0.757 0.788 0.788 0.816 ...
## ..$ : num 0.345 0.333 0.357 0.31 0.314 ...
## ..$ : num 0.0471 0.1333 0.0863 0.1137 0 ...
## ..$ : num 0.698 0.424 0.345 0.404 0.369 ...
## ..$ : num 0.553 0.51 0.541 0.647 0.518 ...
## ..$ : num 0.0902 0.2118 0.2706 0.1294 0.0353 ...
## ..$ : num 0.416 0.439 0.494 0.506 0.455 ...
## ..$ : num 0.224 0.22 0.267 0.31 0.333 ...
## ..$ : num 0.475 0.467 0.439 0.286 0.529 ...
## ..$ : num 0 0 0 0 0.267 ...
## ..$ : num 0.651 0.706 0.698 0.647 0.537 ...
## ..$ : num 0.322 0.369 0.384 0.392 0.42 ...
## ..$ : num 0.596 0.608 0.608 0.616 0.62 ...
## ..$ : num 1 1 1 1 1 ...
## ..$ : num 1 1 1 1 1 1 1 1 1 1 ...
## ..$ : num 1 1 1 1 1 1 1 1 1 1 ...
## ..$ : num 0.992 0.988 0.973 0.996 0.922 ...
## ..$ : num 1 1 1 1 1 1 1 1 1 1 ...
## ..$ : num 0.992 1 0.996 0.996 0.992 ...
## ..$ : num 0.282 0.325 0.224 0.953 1 ...
## ..$ : num 1 1 1 1 1 1 1 1 1 1 ...
## ..$ : num 0.01176 0.00784 0.00784 0.00784 0.01176 ...
## ..$ : num 0.145 0.18 0.169 0.137 0.408 ...
## ..$ : num 1 1 1 1 1 1 1 1 1 1 ...
## ..$ : num 0.353 0.349 0.353 0.38 0.392 ...
## ..$ : num 0.988 0.988 0.988 0.988 0.988 ...
## ..$ : num 1 1 1 1 1 1 1 1 1 1 ...
## ..$ : num 1 1 1 1 1 ...
## ..$ : num 0.851 0.855 0.863 0.859 0.859 ...
## ..$ : num 0.796 0.78 0.737 0.329 0.286 ...
## .. [list output truncated]
## ..- attr(*, "class")= chr "AsIs"
# Display the names of the columns
colnames(image_data)
## [1] "label" "img_vector"
# Print the first few rows of the dataset
head(image_data)
# Load necessary libraries
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
install.packages("nnet")
##
## The downloaded binary packages are in
## /var/folders/92/s_f3d4816pz3mzm229sxs9k80000gn/T//RtmpeJdrAE/downloaded_packages
library(nnet)
# Flatten the img_vector column into multiple pixel columns
img_matrix <- do.call(rbind, lapply(image_data$img_vector, as.vector))
# Create a new data frame with flattened image data
image_data_flat <- data.frame(label = image_data$label, img_matrix)
# Check the new dimensions
print(dim(image_data_flat)) # Should show the number of loaded images and the number of pixel columns
## [1] 299 3073
# Convert labels to a factor if they are not already
image_data_flat$label <- as.factor(image_data_flat$label)
# Normalize the features
image_data_scaled <- scale(image_data_flat[, -which(names(image_data_flat) == "label")])
# Perform PCA
pca_result <- prcomp(image_data_scaled, center = TRUE, scale. = TRUE)
# Decide the number of components to keep (e.g., 95% variance)
explained_variance <- summary(pca_result)$importance[3,]
num_components <- min(which(cumsum(explained_variance) >= 0.95))
# Create a new dataset with the PCA components
image_data_pca <- data.frame(pca_result$x[, 1:num_components])
image_data_pca$label <- image_data_flat$label
# Split the data into training and testing sets
set.seed(123) # For reproducibility
train_index <- createDataPartition(image_data_pca$label, p = 0.7, list = FALSE)
train_data <- image_data_pca[train_index, ]
test_data <- image_data_pca[-train_index, ]
# Fit the logistic regression model on training data
logistic_model <- multinom(label ~ ., data = train_data) # Using multinom for logistic regression in multi-class case
## # weights: 15 (8 variable)
## initial value 231.807193
## iter 10 value 171.384719
## final value 170.351731
## converged
# Check the summary of the model
summary(logistic_model)
## Call:
## multinom(formula = label ~ ., data = train_data)
##
## Coefficients:
## (Intercept) PC1 PC2 PC3
## fruits 0.03502584 0.03595141 -0.06328572 -0.03084746
## plants -0.22233688 -0.01989788 0.06386126 -0.03402685
##
## Std. Errors:
## (Intercept) PC1 PC2 PC3
## fruits 0.2117822 0.007271005 0.01818642 0.01502432
## plants 0.2385523 0.008490523 0.01881783 0.01741396
##
## Residual Deviance: 340.7035
## AIC: 356.7035
# Predict the probabilities for the test dataset
predicted_probs <- predict(logistic_model, newdata = test_data, type = "prob")
# Predict the class labels for the test dataset
predicted_classes <- predict(logistic_model, newdata = test_data)
# View the predicted probabilities and class labels
head(predicted_probs)
## ani fruits plants
## 1 0.2077034 0.2601034 0.53219323
## 2 0.4155087 0.2920042 0.29248707
## 3 0.4195379 0.3261922 0.25426986
## 4 0.3640130 0.5448666 0.09112042
## 11 0.2057989 0.6897261 0.10447499
## 19 0.4081748 0.1428221 0.44900311
head(predicted_classes)
## [1] plants ani ani fruits fruits plants
## Levels: ani fruits plants
# Create a confusion matrix for the test set
confusion_matrix <- table(Actual = test_data$label, Predicted = predicted_classes)
# View the confusion matrix
print(confusion_matrix)
## Predicted
## Actual ani fruits plants
## ani 5 9 10
## fruits 6 23 5
## plants 8 3 19
# Calculate accuracy
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
print(paste("Test Accuracy:", accuracy))
## [1] "Test Accuracy: 0.534090909090909"
# Install and load the 'clustertend' package if you haven't already
install.packages("clustertend")
##
## The downloaded binary packages are in
## /var/folders/92/s_f3d4816pz3mzm229sxs9k80000gn/T//RtmpeJdrAE/downloaded_packages
library(clustertend)
## Package `clustertend` is deprecated. Use package `hopkins` instead.
# Compute Hopkins statistic
set.seed(123) # Set a seed for reproducibility
hopkins_stat <- hopkins(image_data_pca[, -which(names(image_data_pca) == "label")], n = nrow(image_data_pca) - 1)
## Warning: Package `clustertend` is deprecated. Use package `hopkins` instead.
print(paste("Hopkins Statistic: ", hopkins_stat))
## [1] "Hopkins Statistic: 0.285333384588006"
##HEIRARCHIAL CLUSTERING WITH DENDROGRAM:
# Compute the distance matrix
dist_matrix <- dist(image_data_pca[, -which(names(image_data_pca) == "label")])
# Perform hierarchical clustering
hclust_result <- hclust(dist_matrix)
# Plot the dendrogram
plot(hclust_result, labels = FALSE, main = "Hierarchical Clustering Dendrogram")
# Cut the dendrogram to create clusters
image_data_pca$hclust_cluster <- cutree(hclust_result, k = 3) # Use the same number of clusters as above
# View the first few rows with cluster assignments
head(image_data_pca)
# Set the number of clusters (k)
set.seed(123) # For reproducibility
k <- 4 # You can also specify a different number of clusters
str(image_data_pca)# Assuming kmeans has been applied, e.g., using kmeans_result$cluster
## 'data.frame': 299 obs. of 5 variables:
## $ PC1 : num -8.37 -19.32 -6.19 -5.18 -44.64 ...
## $ PC2 : num 3.406 -6.37 -2.475 -14.046 0.155 ...
## $ PC3 : num -22.9 3.13 7.16 10.84 6.27 ...
## $ label : Factor w/ 3 levels "ani","fruits",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ hclust_cluster: int 1 2 2 2 2 2 1 2 2 1 ...
kmeans_result <- kmeans(image_data_pca[,1:2], centers = 3) # Example with 3 clusters
image_data_pca$cluster <- as.factor(kmeans_result$cluster) # Add cluster as a factor
# View the first few rows with cluster assignments
head(image_data_pca)
# Optional: Visualize the clusters in a 2D plot (using the first two PCA components)
library(ggplot2)
ggplot(image_data_pca, aes(x = PC1, y = PC2, color = cluster)) +
geom_point(alpha = 0.6) +
labs(title = "K-means Clustering of Image Data",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_minimal()
# Check the structure of the dataset
# Load necessary libraries
library(nnet)
library(caret)
# Set seed for reproducibility
set.seed(123)
# Function to fit a neural network model and return the accuracy
fit_nn_model <- function(hidden_neurons, train_data) {
# Fit the neural network model
nn_model <- nnet(label ~ ., data = train_data, size = hidden_neurons, maxit = 100, trace = FALSE)
# Predict the class labels
predicted_classes <- predict(nn_model, train_data, type = "class")
# Create a confusion matrix
confusion_matrix <- table(Actual = train_data$label, Predicted = predicted_classes)
# Calculate accuracy
accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
return(accuracy)
}
# Fit models with different hidden layer sizes
neurons_list <- c(5, 10, 20)
accuracy_results <- data.frame(Hidden_Neurons = neurons_list, Accuracy = NA)
for (neurons in neurons_list) {
accuracy <- fit_nn_model(neurons, image_data_pca)
accuracy_results[accuracy_results$Hidden_Neurons == neurons, "Accuracy"] <- accuracy
}
# Print the results
print(accuracy_results)
## Hidden_Neurons Accuracy
## 1 5 0.7157191
## 2 10 0.7491639
## 3 20 0.8193980
# Printing the neurons list
print(neurons_list)
## [1] 5 10 20