Load necessary libraries

library(ggplot2) library(ggfortify)
library(dplyr)

Load the data

df<-read.csv(“C:/Users/Kavyadinesh/Downloads/wine.csv”) df

Remove non-numeric or unnecessary columns (if needed)

Here, I am assuming ‘X’ is an identifier, and PCA is applied on numeric columns.

numeric_data <- df[, 3:9] # Selecting numeric columns only

Ensure that the data is numeric

numeric_data_cleaned <- numeric_data %>% mutate_if(is.factor, as.numeric)

Perform PCA

pca_result <- prcomp(numeric_data_cleaned, scale. = TRUE)

View the PCA result summary

summary(pca_result)

Biplot visualization using ggfortify

This will visualize both the principal components and the loadings (contributions of original variables)

autoplot(pca_result, data = df, label = TRUE, label.size = 3, loadings = TRUE, loadings.label = TRUE, loadings.label.size = 3) + labs(title = “PCA Biplot”)

Scree plot to show variance explained by each principal component

Calculate the proportion of variance explained

variance_explained <- pca_result\(sdev^2 / sum(pca_result\)sdev^2)

Create a data frame for plotting

pc_df <- data.frame(PC = paste0(“PC”, 1:length(variance_explained)), Variance = variance_explained)

Plot the scree plot (dotplot) using ggplot2

scree_plot <- ggplot(pc_df, aes(x = PC, y = Variance)) + geom_point() + geom_line(aes(group = 1)) + # Ensure the line connects points labs(title = “Scree Plot”, x = “Principal Component”, y = “Proportion of Variance Explained”) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Adjust x-axis text for readability