options(repos = c(CRAN = "https://cloud.r-project.org/"))
# Load libraries
install.packages("factoextra")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'factoextra' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\DELL\AppData\Local\Temp\RtmpSUNxrr\downloaded_packages
install.packages("cluster")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'cluster' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'cluster'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\DELL\AppData\Local\R\win-library\4.2\00LOCK\cluster\libs\x64\cluster.dll
## to C:\Users\DELL\AppData\Local\R\win-library\4.2\cluster\libs\x64\cluster.dll:
## Permission denied
## Warning: restored 'cluster'
##
## The downloaded binary packages are in
## C:\Users\DELL\AppData\Local\Temp\RtmpSUNxrr\downloaded_packages
install.packages("fpc")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
##
## There is a binary version available but the source version is later:
## binary source needs_compilation
## fpc 2.2-11 2.2-13 FALSE
## installing the source package 'fpc'
# Load libraries
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
## Warning: package 'cluster' was built under R version 4.2.3
library(fpc)
# Load the USArrests dataset
data("USArrests")
# Inspect the dataset
head(USArrests)
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
# Scale the data
df_scaled <- scale(USArrests)
# Elbow Method
set.seed(123)
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
set.seed(123)
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
labs(subtitle = "Gap Statistic")
print(gap_plot)

data("iris")
# Inspect the dataset
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# Scale the data
df_scaled <- scale(iris[, -5])
# Elbow Method
set.seed(123)
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
set.seed(123)
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
labs(subtitle = "Gap Statistic")
print(gap_plot)

# Load the iris dataset
data("iris")
# Inspect the dataset
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# Change the species column to numerical values
iris$Species <- as.numeric(factor(iris$Species, levels = c("setosa", "versicolor", "virginica")))
# Inspect the modified dataset
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 1
## 2 4.9 3.0 1.4 0.2 1
## 3 4.7 3.2 1.3 0.2 1
## 4 4.6 3.1 1.5 0.2 1
## 5 5.0 3.6 1.4 0.2 1
## 6 5.4 3.9 1.7 0.4 1
# Scale only the numeric columns (first four columns)
df_scaled <- scale(iris[, -5]) # Exclude the modified species column
# Elbow Method
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
labs(subtitle = "Gap Statistic")
print(gap_plot)

# Load libraries
install.packages("factoextra")
## Warning: package 'factoextra' is in use and will not be installed
install.packages("cluster")
## Warning: package 'cluster' is in use and will not be installed
install.packages("fpc")
## Warning: package 'fpc' is in use and will not be installed
# Load libraries
library(factoextra)
library(cluster)
library(fpc)
data("iris")
# Inspect the dataset
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# Scale the data
df_scaled <- scale(iris[, -5])
# Elbow Method
set.seed(123)
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
set.seed(123)
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
labs(subtitle = "Gap Statistic")
print(gap_plot)

# Load the iris dataset
data("iris3")
# Inspect the dataset
head(iris3)
## , , Setosa
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
##
## , , Versicolor
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 7.0 3.2 4.7 1.4
## [2,] 6.4 3.2 4.5 1.5
## [3,] 6.9 3.1 4.9 1.5
## [4,] 5.5 2.3 4.0 1.3
## [5,] 6.5 2.8 4.6 1.5
## [6,] 5.7 2.8 4.5 1.3
##
## , , Virginica
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 6.3 3.3 6.0 2.5
## [2,] 5.8 2.7 5.1 1.9
## [3,] 7.1 3.0 5.9 2.1
## [4,] 6.3 2.9 5.6 1.8
## [5,] 6.5 3.0 5.8 2.2
## [6,] 7.6 3.0 6.6 2.1
# Change the species column to numerical values
iris$Species <- as.numeric(factor(iris$Species, levels = c("setosa", "versicolor", "virginica")))
# Inspect the modified dataset
head(iris3)
## , , Setosa
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
##
## , , Versicolor
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 7.0 3.2 4.7 1.4
## [2,] 6.4 3.2 4.5 1.5
## [3,] 6.9 3.1 4.9 1.5
## [4,] 5.5 2.3 4.0 1.3
## [5,] 6.5 2.8 4.6 1.5
## [6,] 5.7 2.8 4.5 1.3
##
## , , Virginica
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 6.3 3.3 6.0 2.5
## [2,] 5.8 2.7 5.1 1.9
## [3,] 7.1 3.0 5.9 2.1
## [4,] 6.3 2.9 5.6 1.8
## [5,] 6.5 3.0 5.8 2.2
## [6,] 7.6 3.0 6.6 2.1
# Scale only the numeric columns (first four columns)
df_scaled <- scale(iris[, -5]) # Exclude the modified species column
# Elbow Method
elbow_plot <- fviz_nbclust(df_scaled, kmeans, method = "wss") +
labs(subtitle = "Elbow Method")
print(elbow_plot)

# Silhouette Method
silhouette_plot <- fviz_nbclust(df_scaled, kmeans, method = "silhouette") +
labs(subtitle = "Silhouette Method")
print(silhouette_plot)

# Gap Statistic
set.seed(123)
gap_stat <- clusGap(df_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_plot <- fviz_gap_stat(gap_stat) +
labs(subtitle = "Gap Statistic")
print(gap_plot)
