options(repos = c(CRAN = "https://cloud.r-project.org/"))

# Load libraries
install.packages("factoextra")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'factoextra' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\DELL\AppData\Local\Temp\RtmpSUNxrr\downloaded_packages
install.packages("cluster")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'cluster' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'cluster'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\DELL\AppData\Local\R\win-library\4.2\00LOCK\cluster\libs\x64\cluster.dll
## to C:\Users\DELL\AppData\Local\R\win-library\4.2\cluster\libs\x64\cluster.dll:
## Permission denied
## Warning: restored 'cluster'
## 
## The downloaded binary packages are in
##  C:\Users\DELL\AppData\Local\Temp\RtmpSUNxrr\downloaded_packages
install.packages("fpc")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## 
##   There is a binary version available but the source version is later:
##     binary source needs_compilation
## fpc 2.2-11 2.2-13             FALSE
## installing the source package 'fpc'
# Load libraries
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
## Warning: package 'cluster' was built under R version 4.2.3
library(fpc)

# Load the USArrests dataset
data("USArrests")

# Inspect the dataset
head(USArrests)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
# Scale the data
df_scaled <- scale(USArrests)

# Elbow Method
set.seed(123)
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
  labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
set.seed(123)
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
  labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
  labs(subtitle = "Gap Statistic")
print(gap_plot)

data("iris")

# Inspect the dataset
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# Scale the data
df_scaled <- scale(iris[, -5]) 
# Elbow Method
set.seed(123)
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
  labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
set.seed(123)
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
  labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
  labs(subtitle = "Gap Statistic")
print(gap_plot)

# Load the iris dataset
data("iris")

# Inspect the dataset
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# Change the species column to numerical values
iris$Species <- as.numeric(factor(iris$Species, levels = c("setosa", "versicolor", "virginica")))

# Inspect the modified dataset
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2       1
## 2          4.9         3.0          1.4         0.2       1
## 3          4.7         3.2          1.3         0.2       1
## 4          4.6         3.1          1.5         0.2       1
## 5          5.0         3.6          1.4         0.2       1
## 6          5.4         3.9          1.7         0.4       1
# Scale only the numeric columns (first four columns)
df_scaled <- scale(iris[, -5])  # Exclude the modified species column

# Elbow Method
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
  labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
  labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
  labs(subtitle = "Gap Statistic")
print(gap_plot)

# Load libraries
install.packages("factoextra")
## Warning: package 'factoextra' is in use and will not be installed
install.packages("cluster")
## Warning: package 'cluster' is in use and will not be installed
install.packages("fpc")
## Warning: package 'fpc' is in use and will not be installed
# Load libraries
library(factoextra)
library(cluster)
library(fpc)

data("iris")

# Inspect the dataset
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# Scale the data
df_scaled <- scale(iris[, -5]) 
# Elbow Method
set.seed(123)
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
  labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
set.seed(123)
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
  labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
  labs(subtitle = "Gap Statistic")
print(gap_plot)

# Load the iris dataset
data("iris3")

# Inspect the dataset
head(iris3)
## , , Setosa
## 
##      Sepal L. Sepal W. Petal L. Petal W.
## [1,]      5.1      3.5      1.4      0.2
## [2,]      4.9      3.0      1.4      0.2
## [3,]      4.7      3.2      1.3      0.2
## [4,]      4.6      3.1      1.5      0.2
## [5,]      5.0      3.6      1.4      0.2
## [6,]      5.4      3.9      1.7      0.4
## 
## , , Versicolor
## 
##      Sepal L. Sepal W. Petal L. Petal W.
## [1,]      7.0      3.2      4.7      1.4
## [2,]      6.4      3.2      4.5      1.5
## [3,]      6.9      3.1      4.9      1.5
## [4,]      5.5      2.3      4.0      1.3
## [5,]      6.5      2.8      4.6      1.5
## [6,]      5.7      2.8      4.5      1.3
## 
## , , Virginica
## 
##      Sepal L. Sepal W. Petal L. Petal W.
## [1,]      6.3      3.3      6.0      2.5
## [2,]      5.8      2.7      5.1      1.9
## [3,]      7.1      3.0      5.9      2.1
## [4,]      6.3      2.9      5.6      1.8
## [5,]      6.5      3.0      5.8      2.2
## [6,]      7.6      3.0      6.6      2.1
# Change the species column to numerical values
iris$Species <- as.numeric(factor(iris$Species, levels = c("setosa", "versicolor", "virginica")))

# Inspect the modified dataset
head(iris3)
## , , Setosa
## 
##      Sepal L. Sepal W. Petal L. Petal W.
## [1,]      5.1      3.5      1.4      0.2
## [2,]      4.9      3.0      1.4      0.2
## [3,]      4.7      3.2      1.3      0.2
## [4,]      4.6      3.1      1.5      0.2
## [5,]      5.0      3.6      1.4      0.2
## [6,]      5.4      3.9      1.7      0.4
## 
## , , Versicolor
## 
##      Sepal L. Sepal W. Petal L. Petal W.
## [1,]      7.0      3.2      4.7      1.4
## [2,]      6.4      3.2      4.5      1.5
## [3,]      6.9      3.1      4.9      1.5
## [4,]      5.5      2.3      4.0      1.3
## [5,]      6.5      2.8      4.6      1.5
## [6,]      5.7      2.8      4.5      1.3
## 
## , , Virginica
## 
##      Sepal L. Sepal W. Petal L. Petal W.
## [1,]      6.3      3.3      6.0      2.5
## [2,]      5.8      2.7      5.1      1.9
## [3,]      7.1      3.0      5.9      2.1
## [4,]      6.3      2.9      5.6      1.8
## [5,]      6.5      3.0      5.8      2.2
## [6,]      7.6      3.0      6.6      2.1
# Scale only the numeric columns (first four columns)
df_scaled <- scale(iris[, -5])  # Exclude the modified species column

# Elbow Method
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
  labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
  labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
  labs(subtitle = "Gap Statistic")
print(gap_plot)