library(factoextra) # Load factoextra package
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(plotly) # Load plotly package
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
data(airquality) # Load airquality data
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
my_aq <- na.omit(airquality)
map_to_season <- function(month, day) { # Convert month and day to seasons
if((month == 3 && day >= 20) || month == 4 || month == 5 || (month == 6 && day < 21)) {
"Spring"
} else if((month == 6 && day >= 21) || month == 7 || month == 8 || (month == 9 && day < 23)) {
"Summer"
} else if((month == 9 && day >= 23) || month == 10 || month == 11 || (month == 12 && day < 21)) {
"Fall"
} else {
"Winter" # Not applicable in this data set
}
}
my_aq$Season <- mapply(map_to_season, # Apply function to create Season column
my_aq$Month,
my_aq$Day)
my_aq$Season <- factor(my_aq$Season, # Convert Season to ordered factor
levels = c("Spring", "Summer", "Fall"))
my_aq <- my_aq[ , !colnames(my_aq) %in% c("Month", "Day")] # Remove Month and Day columns
head(my_aq)
## Ozone Solar.R Wind Temp Season
## 1 41 190 7.4 67 Spring
## 2 36 118 8.0 72 Spring
## 3 12 149 12.6 74 Spring
## 4 18 313 11.5 62 Spring
## 7 23 299 8.6 65 Spring
## 8 19 99 13.8 59 Spring
my_aq_pca <- prcomp(my_aq[, !colnames(my_aq) %in% "Season"], # Apply PCA
scale = TRUE)
my_aq_pca_scores <- as.data.frame(my_aq_pca$x) # Data with scores & Season
my_aq_pca_scores$Season <- my_aq$Season
head(my_aq_pca_scores)
## PC1 PC2 PC3 PC4 Season
## 1 -0.2726394 -0.18433164 -1.2576791 -0.32622834 Spring
## 2 -0.4052513 -0.84419442 -0.5938775 -0.08837888 Spring
## 3 -1.2489240 0.05003039 0.2599028 0.24522285 Spring
## 4 -1.1149542 1.59939991 -1.2610867 -0.22256616 Spring
## 7 -0.4959531 1.08198111 -1.5569659 0.04440281 Spring
## 8 -2.3366141 -0.21435233 -0.3663025 -0.89109860 Spring
ggp_scatt <- ggplot(my_aq_pca_scores, # Scatterplot of two principal components
aes(x = PC1,
y = PC2,
color = Season)) +
geom_point(size = 2) +
scale_color_manual(values = c("Spring" = "#3fdf05",
"Summer" = "#f25c10",
"Fall" = "#1b98e0")) +
theme_minimal()
ggp_scatt
ggp_scatt + # Scatterplot with ellipses by Season
stat_ellipse(type = "t", level = 0.95)
fviz_pca_var(my_aq_pca, # Loading plot
col.var = "contrib", # Coloring variables by contribution
gradient.cols = c("#3fdf05",
"#f25c10")) +
labs(x = "PC1", # Customize axis labels
y = "PC2")
round(cor(my_aq[, 1:4]), 2) # Correlation matrix
## Ozone Solar.R Wind Temp
## Ozone 1.00 0.35 -0.61 0.70
## Solar.R 0.35 1.00 -0.13 0.29
## Wind -0.61 -0.13 1.00 -0.50
## Temp 0.70 0.29 -0.50 1.00
fviz_pca_biplot(my_aq_pca, # Biplot
label = "var", # Avoid numbering of points
col.var = "#353436", # Color of variable arrows
habillage = my_aq$Season, # Color of individuals
palette = c("#3fdf05",
"#f25c10",
"#1b98e0")) +
labs(x = "PC1", # Customize axis labels
y = "PC2")
plot_ly(my_aq_pca_scores, # Interactive 3D plot
x = ~ PC1,
y = ~ PC2,
z = ~ PC3,
color = ~ Season,
type = "scatter3d",
mode = "markers")
set.seed(2831167) # Set seed for reproducibility
my_kmeans_aq <- kmeans(my_aq_pca_scores[ , 1:2], # Perform k-means clustering
centers = length(unique(my_aq$Season)))
fviz_pca_ind(my_aq_pca, # Visualize PCA results with clusters
habillage = my_kmeans_aq$cluster, # Use cluster assignments for coloring
repel = TRUE, # Avoid text overlapping
addEllipses = TRUE, # Add ellipses around clusters
ellipse.type = "convex") + # Use convex type for ellipses
guides(color = guide_legend(override.aes = list(label = ""))) + # Customize legend
labs(x = "PC1", # Customize axis labels
y = "PC2")
### What are your realizations of the provided type of analysis? With
this type of analysis which is PCA, it was performed through
visualization on variables like ozone, solar radiation, wind, and
temperature by season which is very helpful for weather forecast in our
country and for the whole world to see the behavior of the season and
whether there are climate change.