options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("magick")
## Installing package into 'C:/Users/Naman/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'magick' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'magick'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\Naman\AppData\Local\R\win-library\4.4\00LOCK\magick\libs\x64\magick.dll
## to C:\Users\Naman\AppData\Local\R\win-library\4.4\magick\libs\x64\magick.dll:
## Permission denied
## Warning: restored 'magick'
##
## The downloaded binary packages are in
## C:\Users\Naman\AppData\Local\Temp\RtmpIxKL9K\downloaded_packages
library(magick)
## Linking to ImageMagick 6.9.12.98
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11
# Define the path to your images
image_folder<-"C:\\Users\\Naman\\Downloads\\Namana Herle G"
# Get a list of all categories (folders)
categories <- list.dirs(image_folder, recursive = FALSE)
labels <- basename(categories) # Use folder names as labels
# Function to resize and convert images
resize_image <- function(img_path, img_size = 32) {
tryCatch({
img <- image_read(img_path) # Load the image using magick
img <- image_scale(img, paste0(img_size, "x", img_size)) # Resize to specified dimensions
return(as.numeric(as.vector(image_data(img)))) # Convert to a numeric vector
}, error = function(e) {
cat("Error loading image:", img_path, "\n", e$message, "\n")
return(NULL) # Return NULL if there's an error
})
}
# Function to load images from categories
load_images <- function(image_folder, categories) {
image_data <- data.frame() # Initialize an empty data frame
for (category in categories) {
label <- basename(category) # Get the label from the folder name
# List all image files in the category (support JPG, PNG, BMP)
image_files <- list.files(category, pattern = "\\.(jpg|jpeg|png|bmp)$", full.names = TRUE, ignore.case = TRUE)
for (image_file in image_files) {
img_vector <- resize_image(image_file) # Resize the image and convert to a vector
if (!is.null(img_vector)) { # Only add if img_vector is not NULL
image_data <- rbind(image_data, data.frame(label = label, img_vector = I(list(img_vector))))
}
}
}
return(image_data) # Return the populated data frame
}
# Load the images into a data frame
image_data <- load_images(image_folder, categories)
# Check the dimensions of the image data
dim(image_data) # Should show the number of loaded images and columns
## [1] 251 2
# Unlist the image vectors into a matrix
image_matrix <- do.call(rbind, lapply(image_data$img_vector, unlist))
## Warning in (function (..., deparse.level = 1) : number of columns of result is
## not a multiple of vector length (arg 1)
# Check the dimensions of the matrix (each row corresponds to an image)
dim(image_matrix)
## [1] 251 3072
LOGISTIC REGRESSION
# Load necessary libraries
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(nnet)
library(ggplot2)
# Step 1: Flatten Image Data and Prepare Dataset
# Assuming image_data contains img_vector for images and label for image categories
# Convert image vectors into matrix form
img_matrix <- do.call(rbind, lapply(image_data$img_vector, as.vector))
## Warning in (function (..., deparse.level = 1) : number of columns of result is
## not a multiple of vector length (arg 1)
# Create a data frame with the flattened image data and labels
image_data_flat <- data.frame(label = image_data$label, img_matrix)
# Ensure labels are treated as factors (for classification)
image_data_flat$label <- as.factor(image_data_flat$label)
# Step 2: Normalize Features and Perform PCA
# Normalize the pixel data (excluding the label column)
image_data_scaled <- scale(image_data_flat[, -1]) # Exclude the label column for normalization
# Perform Principal Component Analysis (PCA)
pca_result <- prcomp(image_data_scaled, center = TRUE, scale. = TRUE)
# Calculate the explained variance and select components for 95% variance retention
explained_variance <- summary(pca_result)$importance[3,]
num_components <- which(cumsum(explained_variance) >= 0.95)[1]
# Create a new dataset using the selected PCA components along with the labels
image_data_pca <- data.frame(pca_result$x[, 1:num_components])
image_data_pca$label <- image_data_flat$label
# Step 3: Train Logistic Regression Model (Multinomial)
# Fit a multinomial logistic regression model using the PCA components
multinom_model <- multinom(label ~ ., data = image_data_pca)
## # weights: 20 (12 variable)
## initial value 403.968916
## iter 10 value 355.326246
## final value 348.220079
## converged
# Step 4: Make Predictions
# Get predicted class probabilities and predicted class labels
predicted_classes <- predict(multinom_model)
# Step 5: Evaluate Model Performance
# Generate a confusion matrix and calculate accuracy
confusion_matrix <- confusionMatrix(as.factor(predicted_classes), image_data_pca$label)
accuracy <- confusion_matrix$overall['Accuracy']
# Step 6: Display Results
# Output accuracy and the confusion matrix
cat("Logistic Regression Accuracy:", round(accuracy, 4), "\n")
## Logistic Regression Accuracy: 0.3944
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(confusion_matrix$table)
## Reference
## Prediction Animals Birds Flowers Fruits Trees
## Animals 7 11 3 9 12
## Birds 11 16 9 1 5
## Flowers 13 14 32 4 5
## Fruits 12 7 4 31 15
## Trees 8 2 2 5 13
# Step 7: Visualize PCA Components (Optional)
# Plot the first two principal components with labels
ggplot(data = image_data_pca, aes(x = PC1, y = PC2, color = label)) +
geom_point(size = 2, alpha = 0.6) +
labs(title = "PCA of Image Data", x = "Principal Component 1", y = "Principal Component 2") + theme_minimal()

# Step 7: Visualize PCA Components (Optional)
# Plot the first two principal components with labels
ggplot(data = image_data_pca, aes(x = PC1, y = PC2, color = label)) +
geom_point(size = 2, alpha = 0.6) + labs(title = "PCA of Image Data",
x = "Principal Component 1",
y = "Principal Component 2") + theme_minimal()

# Load necessary libraries
library(nnet)
library(caret)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Set seed for reproducibility
set.seed(123)
# Step 1: Function to Fit Neural Network with Cross-Validation
fit_nn_model_cv <- function(hidden_neurons, train_data, folds = 5) {
# Define control method for cross-validation
train_control <- trainControl(method = "cv", number = folds)
# Define the model formula (label as dependent, rest as independent variables)
formula <- label ~ .
# Train the neural network model with cross-validation
nn_model_cv <- train(
formula,
data = train_data,
method = "nnet",
trace = FALSE,
linout = FALSE,
maxit = 100, # Maximum iterations
tuneGrid = expand.grid(size = hidden_neurons, decay = 0), # Tune the hidden layer size
trControl = train_control # Cross-validation control
)
# Extract and return the best accuracy from cross-validation results
best_accuracy <- max(nn_model_cv$results$Accuracy)
return(best_accuracy)
}
# Step 2: Data Preparation
# Assuming 'image_data_pca' is already PCA-processed image data with labels
# Example: image_data_pca <- your_pca_data
# Step 3: Evaluate Neural Network with Different Hidden Layer Sizes
# Define a list of hidden neuron sizes to evaluate
neurons_list <- c(5, 10, 20)
# Create an empty data frame to store the results
accuracy_results <- data.frame(Hidden_Neurons = neurons_list, Accuracy = NA)
# Loop through each hidden neuron size and compute accuracy
for (neurons in neurons_list) {
# Fit the neural network and compute accuracy
accuracy <- fit_nn_model_cv(neurons, image_data_pca)
# Store the accuracy in the results data frame
accuracy_results[accuracy_results$Hidden_Neurons == neurons, "Accuracy"] <- accuracy
}
# Step 4: Display the Results
print("Cross-Validated Accuracy for Different Hidden Neuron Sizes:")
## [1] "Cross-Validated Accuracy for Different Hidden Neuron Sizes:"
print(accuracy_results)
## Hidden_Neurons Accuracy
## 1 5 0.3187451
## 2 10 0.3263529
## 3 20 0.3585882
# Optional: Visualize the Accuracy Results
ggplot(accuracy_results, aes(x = Hidden_Neurons, y = Accuracy)) +
geom_line() +
geom_point(size = 3) +
labs(title = "Neural Network Accuracy with Varying Hidden Neurons",
x = "Number of Hidden Neurons",
y = "Cross-Validated Accuracy") +
theme_minimal()

## Hierarchical Clustering with Dendrogram
# Compute distance matrix and perform hierarchical clustering
dist_matrix <- dist(image_data_pca[, -which(names(image_data_pca) == "label")])
hclust_result <- hclust(dist_matrix)
# Plot the dendrogram
plot(hclust_result, labels = FALSE, main = "Hierarchical Clustering Dendrogram")

# Cut the dendrogram to assign clusters
image_data_pca$hclust_cluster <- cutree(hclust_result, k = 3) # Define number of clusters
# View the first few rows with cluster assignments
head(image_data_pca)
## PC1 PC2 label hclust_cluster
## 1 -45.57601 -10.815621 Animals 1
## 2 -51.72867 9.364695 Animals 1
## 3 10.01604 12.084211 Animals 2
## 4 -62.18010 3.390804 Animals 1
## 5 1.88313 10.511209 Animals 2
## 6 44.95109 -11.087802 Animals 3
install.packages("pheatmap")
## Installing package into 'C:/Users/Naman/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'pheatmap' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Naman\AppData\Local\Temp\RtmpIxKL9K\downloaded_packages
library(pheatmap)
library(ggplot2)
K-means Clustering
# Set seed and perform K-means clustering
set.seed(123)
k <- 4 # Define number of clusters for K-means
kmeans_result <- kmeans(image_data_pca[, -which(names(image_data_pca) == "label")], centers = k)
# Assign clusters to the dataset
image_data_pca$kmeans_cluster <- as.factor(kmeans_result$cluster)
# Step 6: Visualization
# Plot K-means clustering results with the first two principal components
ggplot(image_data_pca, aes(x = PC1, y = PC2, color = kmeans_cluster)) +
geom_point(alpha = 0.6) +
labs(title = "K-means Clustering of Image Data",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_minimal()

# Apply 10-fold cross-validation
set.seed(123)
train_control <- trainControl(method = "cv", number = 10)
cv_model <- train(label ~ ., data = image_data_pca, method = "multinom", trControl = train_control)
## # weights: 40 (28 variable)
## initial value 362.123530
## iter 10 value 316.060857
## iter 20 value 299.989193
## iter 30 value 299.531757
## iter 40 value 299.512328
## iter 50 value 299.511736
## final value 299.511731
## converged
## # weights: 40 (28 variable)
## initial value 362.123530
## iter 10 value 316.063570
## iter 20 value 301.731437
## final value 301.518255
## converged
## # weights: 40 (28 variable)
## initial value 362.123530
## iter 10 value 316.060860
## iter 20 value 299.991796
## iter 30 value 299.535922
## iter 40 value 299.519144
## final value 299.519038
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.590670
## iter 20 value 305.571376
## iter 30 value 304.650243
## iter 40 value 304.622313
## iter 50 value 304.620797
## final value 304.620781
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.593416
## iter 20 value 307.913754
## final value 307.593004
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.590673
## iter 20 value 305.575401
## iter 30 value 304.656878
## iter 40 value 304.631641
## final value 304.631153
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 321.234630
## iter 20 value 302.555716
## iter 30 value 301.829782
## iter 40 value 301.789714
## iter 50 value 301.788457
## iter 50 value 301.788454
## iter 50 value 301.788454
## final value 301.788454
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 321.240221
## iter 20 value 305.020400
## final value 304.801919
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 321.234636
## iter 20 value 302.560062
## iter 30 value 301.838110
## iter 40 value 301.804617
## final value 301.804561
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 324.684150
## iter 20 value 308.481379
## iter 30 value 308.162682
## iter 40 value 308.144567
## iter 50 value 308.141978
## final value 308.141963
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 324.686289
## iter 20 value 310.912274
## final value 310.878824
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 324.684152
## iter 20 value 308.485409
## iter 30 value 308.168565
## iter 40 value 308.152405
## final value 308.151312
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 322.612635
## iter 20 value 305.624309
## iter 30 value 305.148674
## iter 40 value 305.131230
## iter 50 value 305.130710
## final value 305.130705
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 322.615339
## iter 20 value 307.605912
## final value 307.423483
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 322.612637
## iter 20 value 305.627448
## iter 30 value 305.153434
## iter 40 value 305.138526
## final value 305.138471
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.914664
## iter 20 value 310.342372
## iter 30 value 309.970113
## iter 40 value 309.954599
## iter 50 value 309.954199
## iter 50 value 309.954197
## iter 50 value 309.954197
## final value 309.954197
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.917037
## iter 20 value 312.145823
## final value 312.079031
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.914667
## iter 20 value 310.345412
## iter 30 value 309.975141
## iter 40 value 309.962688
## final value 309.962642
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 319.807747
## iter 20 value 303.625568
## iter 30 value 303.180657
## iter 40 value 303.161144
## iter 50 value 303.160102
## final value 303.160096
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 319.810609
## iter 20 value 305.844262
## final value 305.677404
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 319.807750
## iter 20 value 303.629189
## iter 30 value 303.186013
## iter 40 value 303.169199
## final value 303.168984
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.763026
## iter 20 value 308.626785
## iter 30 value 307.755843
## iter 40 value 307.737155
## iter 50 value 307.735272
## final value 307.735266
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.765323
## iter 20 value 310.387373
## final value 310.029463
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.763028
## iter 20 value 308.629766
## iter 30 value 307.761478
## iter 40 value 307.744906
## final value 307.744318
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.315693
## iter 20 value 309.039660
## iter 30 value 308.694379
## iter 40 value 308.684256
## iter 50 value 308.683974
## iter 50 value 308.683971
## iter 50 value 308.683971
## final value 308.683971
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.317305
## iter 20 value 310.916096
## final value 310.795246
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 323.315694
## iter 20 value 309.042820
## iter 30 value 308.699441
## iter 40 value 308.691736
## final value 308.691712
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 320.097757
## iter 20 value 302.785069
## iter 30 value 302.358744
## iter 40 value 302.338744
## iter 50 value 302.337749
## final value 302.337739
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 320.100294
## iter 20 value 304.494980
## final value 304.348439
## converged
## # weights: 40 (28 variable)
## initial value 363.732968
## iter 10 value 320.097760
## iter 20 value 302.787686
## iter 30 value 302.362943
## iter 40 value 302.345502
## final value 302.345292
## converged
## # weights: 40 (28 variable)
## initial value 403.968916
## iter 10 value 355.215005
## iter 20 value 341.069368
## iter 30 value 340.502608
## iter 40 value 340.485752
## final value 340.485742
## converged
# View cross-validation results
print(cv_model)
## Penalized Multinomial Regression
##
## 251 samples
## 4 predictor
## 5 classes: 'Animals', 'Birds', 'Flowers', 'Fruits', 'Trees'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 225, 226, 226, 226, 226, 226, ...
## Resampling results across tuning parameters:
##
## decay Accuracy Kappa
## 0e+00 0.3544615 0.1931054
## 1e-04 0.3544615 0.1931054
## 1e-01 0.3506154 0.1884502
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was decay = 1e-04.
# Try different values for k (number of clusters) in K-means
set.seed(123)
wss <- sapply(1:10, function(k){
kmeans(image_data_pca[, -which(names(image_data_pca) == "label")], centers = k, nstart = 10)$tot.withinss
})
# Plot the total within-cluster sum of squares to determine the optimal k
plot(1:10, wss, type = "b", pch = 19, frame = FALSE,
xlab = "Number of Clusters", ylab = "Total Within-cluster SS",
main = "Elbow Method for Optimal K")
