winequality.red <- read.csv(“~/Downloads/wine+quality/winequality-red.csv”, sep=“;”)
head(winequality.red) str(winequality.red) dim(winequality.red)
data <- winequality.red[,1:11]
head(data)
install.packages(“cluster”) install.packages(“factoextra”) install.packages(“corrplot”) install.packages(“dbscan”) install.packages(“NbClust”)
library(cluster) library(factoextra) library(corrplot) library(dbscan) library(NbClust)
cor_matrix <- cor(data)
corrplot(cor_matrix, method = “color”, type = “upper”, tl.cex = 0.7)
fviz_nbclust( scaled_data, kmeans, method = “wss” )
set.seed(123)
kmeans_result <- kmeans( scaled_data, centers = 3, nstart = 25 )
kmeans_result
fviz_cluster( kmeans_result, data = scaled_data )
dist_matrix <- dist(scaled_data)
hc <- hclust( dist_matrix, method = “ward.D2” )
plot(hc)
rect.hclust( hc, k = 3, border = “red” )
db <- dbscan( scaled_data, eps = 1.5, minPts = 5 )
db
fviz_cluster( db, scaled_data )
pam_result <- pam( scaled_data, k = 3 )
pam_result
fviz_cluster( pam_result, data = scaled_data )
clara_result <- clara( scaled_data, k = 3 )
clara_result
fviz_cluster( clara_result, data = scaled_data )
comparison <- data.frame( KMeans = kmeans_result\(cluster, Hierarchical = cutree(hc, k = 3), DBSCAN = db\)cluster, PAM = pam_result\(clustering, CLARA = clara_result\)clustering )
head(comparison)
table(kmeans_result\(cluster) table(cutree(hc, k = 3)) table(db\)cluster) table(pam_result\(clustering) table(clara_result\)clustering)
sil_kmeans <- silhouette( kmeans_result$cluster, dist(scaled_data) )
fviz_silhouette(sil_kmeans)
summary(winequality.red)
fviz_nbclust(scaled_data, kmeans, method = “wss”)
kmeans_result <- kmeans(scaled_data, centers = 3, nstart = 25)
fviz_cluster(kmeans_result, data = scaled_data)
hc <- hclust(dist(scaled_data)) plot(hc)
db <- dbscan(scaled_data, eps=0.5, minPts=5) fviz_cluster(db, scaled_data)
pam_result <- pam(scaled_data, 3) fviz_cluster(pam_result, scaled_data)
clara_result <- clara(scaled_data, 3) fviz_cluster(clara_result, scaled_data)
winequality.red\(cluster <- kmeans_result\)cluster
aggregate( winequality.red, by=list(Cluster=kmeans_result$cluster), mean )
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.