# Load necessary libraries
library(ggplot2)
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.3.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load the data
df<-read.csv("E:/Alliance University/Sem 3/ML2/wine.csv")
df
## X Wine Hedonic Meat Dessert Price Sugar Alcohol Acidity
## 1 1 Wine_1 14 7 8 7 7 13 7
## 2 2 Wine_2 10 7 6 4 3 14 7
## 3 3 Wine_3 8 5 5 10 5 12 5
## 4 4 Wine_4 2 4 7 16 7 11 3
## 5 5 Wine_5 6 2 4 13 3 10 3
# Remove non-numeric or unnecessary columns (if needed)
# Here, I am assuming 'X' is an identifier, and PCA is applied on numeric columns.
numeric_data <- df[, 3:9] # Selecting numeric columns only
# Ensure that the data is numeric
numeric_data_cleaned <- numeric_data %>%
mutate_if(is.factor, as.numeric)
# Perform PCA
pca_result <- prcomp(numeric_data_cleaned, scale. = TRUE)
# View the PCA result summary
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 2.1824 1.3454 0.59393 0.27276 5.09e-17
## Proportion of Variance 0.6804 0.2586 0.05039 0.01063 0.00e+00
## Cumulative Proportion 0.6804 0.9390 0.98937 1.00000 1.00e+00
# Biplot visualization using ggfortify
# This will visualize both the principal components and the loadings (contributions of original variables)
autoplot(pca_result, data = df, label = TRUE, label.size = 3,
loadings = TRUE, loadings.label = TRUE, loadings.label.size = 3) +
labs(title = "PCA Biplot")

# Scree plot to show variance explained by each principal component
# Calculate the proportion of variance explained
variance_explained <- pca_result$sdev^2 / sum(pca_result$sdev^2)
# Create a data frame for plotting
pc_df <- data.frame(PC = paste0("PC", 1:length(variance_explained)),
Variance = variance_explained)
# Plot the scree plot (dotplot) using ggplot2
scree_plot <- ggplot(pc_df, aes(x = PC, y = Variance)) +
geom_point() +
geom_line(aes(group = 1)) + # Ensure the line connects points
labs(title = "Scree Plot",
x = "Principal Component",
y = "Proportion of Variance Explained") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Adjust x-axis text for readability
# Print the scree plot
print(scree_plot)
