library(factoextra)                                       # Load factoextra package
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(plotly)                                           # Load plotly package
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
data(airquality)                                          # Load airquality data
head(airquality)   
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
my_aq <- na.omit(airquality)            
map_to_season <- function(month, day) {                   # Convert month and day to seasons
 
  if((month == 3 && day >= 20) || month == 4 || month == 5 || (month == 6 && day < 21)) {
 
    "Spring"
 
  } else if((month == 6 && day >= 21) || month == 7 || month == 8 || (month == 9 && day < 23)) {
 
    "Summer"
 
  } else if((month == 9 && day >= 23) || month == 10 || month == 11 || (month == 12 && day < 21)) {
 
    "Fall"
 
  } else {
 
    "Winter"                                              # Not applicable in this data set
  }
}
my_aq$Season <- mapply(map_to_season,                     # Apply function to create Season column
                       my_aq$Month,
                       my_aq$Day)
 
my_aq$Season <- factor(my_aq$Season,                      # Convert Season to ordered factor
                       levels = c("Spring", "Summer", "Fall"))
 
 
my_aq <- my_aq[ , !colnames(my_aq) %in% c("Month", "Day")]  # Remove Month and Day columns
head(my_aq)
##   Ozone Solar.R Wind Temp Season
## 1    41     190  7.4   67 Spring
## 2    36     118  8.0   72 Spring
## 3    12     149 12.6   74 Spring
## 4    18     313 11.5   62 Spring
## 7    23     299  8.6   65 Spring
## 8    19      99 13.8   59 Spring
my_aq_pca <- prcomp(my_aq[, !colnames(my_aq) %in% "Season"], # Apply PCA
                    scale = TRUE)
 
my_aq_pca_scores <- as.data.frame(my_aq_pca$x)            # Data with scores & Season
my_aq_pca_scores$Season <- my_aq$Season
head(my_aq_pca_scores)
##          PC1         PC2        PC3         PC4 Season
## 1 -0.2726394 -0.18433164 -1.2576791 -0.32622834 Spring
## 2 -0.4052513 -0.84419442 -0.5938775 -0.08837888 Spring
## 3 -1.2489240  0.05003039  0.2599028  0.24522285 Spring
## 4 -1.1149542  1.59939991 -1.2610867 -0.22256616 Spring
## 7 -0.4959531  1.08198111 -1.5569659  0.04440281 Spring
## 8 -2.3366141 -0.21435233 -0.3663025 -0.89109860 Spring
ggp_scatt <- ggplot(my_aq_pca_scores,                     # Scatterplot of two principal components
                    aes(x = PC1,
                        y = PC2,
                        color = Season)) +
  geom_point(size = 2) +
  scale_color_manual(values = c("Spring" = "#3fdf05", 
                                "Summer" = "#f25c10", 
                                "Fall" = "#1b98e0")) +
  theme_minimal()
ggp_scatt

ggp_scatt +                                               # Scatterplot with ellipses by Season
  stat_ellipse(type = "t", level = 0.95)

fviz_pca_var(my_aq_pca,                                   # Loading plot
             col.var = "contrib",                         # Coloring variables by contribution
             gradient.cols = c("#3fdf05",
                               "#f25c10")) +
  labs(x = "PC1",                                         # Customize axis labels
       y = "PC2")

round(cor(my_aq[, 1:4]), 2)                               # Correlation matrix
##         Ozone Solar.R  Wind  Temp
## Ozone    1.00    0.35 -0.61  0.70
## Solar.R  0.35    1.00 -0.13  0.29
## Wind    -0.61   -0.13  1.00 -0.50
## Temp     0.70    0.29 -0.50  1.00
fviz_pca_biplot(my_aq_pca,                                # Biplot
                label = "var",                            # Avoid numbering of points
                col.var = "#353436",                      # Color of variable arrows
                habillage = my_aq$Season,                 # Color of individuals
                palette = c("#3fdf05",
                            "#f25c10",
                            "#1b98e0")) +
  labs(x = "PC1",                                         # Customize axis labels
       y = "PC2")

plot_ly(my_aq_pca_scores,                                 # Interactive 3D plot
        x = ~ PC1,
        y = ~ PC2,
        z = ~ PC3,
        color = ~ Season,
        type = "scatter3d",
        mode = "markers")
set.seed(2831167)                                         # Set seed for reproducibility
my_kmeans_aq <- kmeans(my_aq_pca_scores[ , 1:2],          # Perform k-means clustering
                       centers = length(unique(my_aq$Season)))
fviz_pca_ind(my_aq_pca,                                   # Visualize PCA results with clusters
             habillage = my_kmeans_aq$cluster,            # Use cluster assignments for coloring
             repel = TRUE,                                # Avoid text overlapping
             addEllipses = TRUE,                          # Add ellipses around clusters
             ellipse.type = "convex") +                   # Use convex type for ellipses
  guides(color = guide_legend(override.aes = list(label = ""))) + # Customize legend
  labs(x = "PC1",                                         # Customize axis labels
       y = "PC2")

### What are your realizations of the provided type of analysis? With this type of analysis which is PCA, it was performed through visualization on variables like ozone, solar radiation, wind, and temperature by season which is very helpful for weather forecast in our country and for the whole world to see the behavior of the season and whether there are climate change.