#Load required libraries library(imager) library(caret) library(magick) library(tidyverse) library(skimr) library(DataExplorer) library(ggplot2) library(neuralnet) library(dplyr) library(nnet)

Step 1: Load images, convert to grayscale, and create dataset

image_folder <- (“C:/Users/Kavyadinesh/Downloads/New”) # Function to convert an image to grayscale convert_to_grayscale <- function(image_path, save_path) { # Read the image img <- image_read(image_path) # Convert to grayscale grayscale_img <- image_convert(img, colorspace = ‘gray’) # Save the grayscale image image_write(grayscale_img, save_path) }

Loop through each subfolder and each image in the main directory

subdirs <- list.dirs(image_folder, full.names = TRUE, recursive = TRUE)

for (subdir in subdirs) { image_files <- list.files(subdir, pattern = “\.(jpg|png)$”, full.names = TRUE)

for (file in image_files) { # Define where to save the grayscale image save_path <- file.path(subdir, paste0(“grayscale_”, basename(file)))

Convert the image to grayscale and save it

convert_to_grayscale(file, save_path)

Print a message indicating the conversion

cat("Converted", basename(file), "to grayscale and saved as", save_path, "\n")

} }

Define the directory containing your grayscale images

image_folder <- (“C:/Users/Kavyadinesh/Downloads/Grayscale”)

Function to convert an image to a vector of pixel values

image_to_vector <- function(image_path) { # Read the image img <- image_read(image_path) # Convert image to grayscale (if not already grayscale) grayscale_img <- image_convert(img, colorspace = ‘gray’) # Resize the image to a fixed size (optional, e.g., 100x100 pixels) resized_img <- image_resize(grayscale_img, “100x100!”) # Convert the image to a matrix of pixel values img_matrix <- as.integer(image_data(resized_img)[1,,]) # Flatten the matrix into a vector as.vector(img_matrix) }

Initialize an empty list to store image data

image_data_list <- list() image_labels <- c()

Loop through each subfolder and each image in the main directory

subdirs <- list.dirs(image_folder, full.names = TRUE, recursive = TRUE) for (subdir in subdirs) { image_files <- list.files(subdir, pattern = “\.(jpg|png)$”, full.names = TRUE) # Get the label (subfolder name) label <- basename(subdir) for (file in image_files) { # Convert the image to a vector of pixel values image_vector <- image_to_vector(file) # Add the image vector to the list image_data_list[[length(image_data_list) + 1]] <- image_vector # Store the label for this image image_labels <- c(image_labels, label) } }

Combine the image vectors into a data frame

image_data_df <- do.call(rbind, image_data_list) image_data_df <- as.data.frame(image_data_df) # Add the labels as a new column image_data_df$label <- image_labels

Define the path where you want to save the CSV file

csv_file_path <- (“C:/Users/Kavyadinesh/Downloads/Grayscale/data.csv”)

Save the data frame as a CSV file

write.csv(image_data_df, csv_file_path, row.names = FALSE)

Confirmation message

cat(“Dataset has been saved as a CSV at”,csv_file_path)

#PREPROCESSIN AND EDA # Load the CSV file data <- read.csv(“C:/Users/Kavyadinesh/Downloads/Grayscale/data.csv”)

View the first few rows of the dataset

head(data)

Get the structure of the dataset

str(data)

Get a summary of the dataset

summary(data)

Check for missing values

sum(is.na(data)) # Total missing values

Count of missing values by column

colSums(is.na(data))

Remove rows with missing values

data_clean <- na.omit(data) data_clean

Check for duplicates

duplicated_rows <- data[duplicated(data), ]

Remove duplicates

data_clean <- data %>% distinct()

Convert character variables to factors

data_clean <- data_clean %>% mutate(across(where(is.character), as.factor))

View the distribution of categorical variables

table(data_clean$your_categorical_column)

#PERFORMING LOGISTIC REGRESSION data$label <- as.factor(data$label) data$label

Split the data into training and test sets (70% training, 30% testing)

set.seed(123) # Set seed for reproducibility sample <- sample(c(TRUE, FALSE), nrow(data), replace = TRUE, prob = c(0.7, 0.3)) train_data <- data[sample, ] test_data <- data[!sample, ]

str(train_data)

Fit logistic regression model

logistic_model <- glm(label ~ ., data = train_data, family = binomial)

Summary of the logistic regression model

summary(logistic_model)

#K-MEANS CLUSTERING

library(tidyverse) names(data) clustering_data <- data %>% select(V1, V2, V3)

Set seed for reproducibility

set.seed(123)

Perform K-means clustering

kmeans_result <- kmeans(clustering_data, centers = 3)

View the clustering result

print(kmeans_result)

Add cluster assignments to the original dataset

data$cluster <- as.factor(kmeans_result$cluster)

View first few rows with cluster labels

head(data)

#NEURAL NETWORK # NEURAL NETWORK WITH 1 HIDDEN LAYER 5 NEURONS # Set seed for reproducibility set.seed(123)

Create a sample dataset

data <- data.frame( feature1 = rnorm(100), feature2 = rnorm(100), label = as.factor(sample(c(0, 1), 100, replace = TRUE)) # Binary target )

Split data into training and testing sets

train_index <- createDataPartition(data$label, p = 0.8, list = FALSE) train_data <- data[train_index, ] test_data <- data[-train_index, ]

Scale the predictor variables (excluding the target variable)

train_data_scaled <- as.data.frame(scale(train_data[, -3])) # Scale features only train_data_scaled$label <- train_data$label # Add label back

Fit the neural network model with 1 hidden layer and 5 neurons

nn_model <- neuralnet(label ~ ., data = train_data_scaled, hidden = 5, linear.output = FALSE)

NEURAL NETWORK WITH 1 HIDDEN LAYER 10 NEURONS

set.seed(123)

Generate data

data <- data.frame( feature1 = rnorm(100), feature2 = rnorm(100), label = as.factor(sample(c(0, 1), 100, replace = TRUE)) # Binary target )

Split data into training and testing sets

train_index <- createDataPartition(data$label, p = 0.8, list = FALSE) train_data <- data[train_index, ] test_data <- data[-train_index, ]

Scale the predictor variables (excluding the target variable)

train_data_scaled <- as.data.frame(scale(train_data[, -3])) # Scale features only train_data_scaled$label <- train_data$label # Add label back

Fit the neural network with 10 neurons in 1 hidden layer

nn_model <- neuralnet(label ~ ., data = train_data_scaled, hidden = 10, linear.output = FALSE)

NEURAL NETWORK WITH 1 HIDDEN LAYER 20 NEURONS

set.seed(123)

Generate data

data <- data.frame( feature1 = rnorm(100), feature2 = rnorm(100), label = as.factor(sample(c(0, 1), 100, replace = TRUE)) # Binary target )

Split data into training and testing sets

train_index <- createDataPartition(data$label, p = 0.8, list = FALSE) train_data <- data[train_index, ] test_data <- data[-train_index, ]

Scale the predictor variables (excluding the target variable)

train_data_scaled <- as.data.frame(scale(train_data[, -3])) # Scale features only train_data_scaled$label <- train_data$label # Add label back

Fit the neural network with 1 hidden layer and 20 neurons

nn_model <- neuralnet(label ~ ., data = train_data_scaled, hidden = 20, linear.output = FALSE)

Assignment

Kavyashree

2024-10-24

Step 1: Load images, convert to grayscale, and create dataset

Loop through each subfolder and each image in the main directory

Convert the image to grayscale and save it

Print a message indicating the conversion

Define the directory containing your grayscale images

Function to convert an image to a vector of pixel values

Initialize an empty list to store image data

Loop through each subfolder and each image in the main directory

Combine the image vectors into a data frame

Define the path where you want to save the CSV file

Save the data frame as a CSV file

Confirmation message

View the first few rows of the dataset

Get the structure of the dataset

Get a summary of the dataset

Check for missing values

Count of missing values by column

Remove rows with missing values

Check for duplicates

Remove duplicates

Convert character variables to factors

View the distribution of categorical variables

Split the data into training and test sets (70% training, 30% testing)

Fit logistic regression model

Summary of the logistic regression model

Set seed for reproducibility

Perform K-means clustering

View the clustering result

Add cluster assignments to the original dataset

View first few rows with cluster labels

Create a sample dataset

Split data into training and testing sets

Scale the predictor variables (excluding the target variable)

Fit the neural network model with 1 hidden layer and 5 neurons

NEURAL NETWORK WITH 1 HIDDEN LAYER 10 NEURONS

Generate data

Split data into training and testing sets

Scale the predictor variables (excluding the target variable)

Fit the neural network with 10 neurons in 1 hidden layer

NEURAL NETWORK WITH 1 HIDDEN LAYER 20 NEURONS

Generate data

Split data into training and testing sets

Scale the predictor variables (excluding the target variable)

Fit the neural network with 1 hidden layer and 20 neurons