这个是公众号小明的数据分析笔记本 2019年11月30号推文的示例数据和代码
library(factoextra)## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
df<-read.csv("Wine.csv",header = T)
winescale<-scale(df[,2:14])
head(winescale)## Alcohol Malic.acid Ash Acl Mg Phenols
## [1,] 1.5143408 -0.56066822 0.2313998 -1.1663032 1.90852151 0.8067217
## [2,] 0.2455968 -0.49800856 -0.8256672 -2.4838405 0.01809398 0.5670481
## [3,] 0.1963252 0.02117152 1.1062139 -0.2679823 0.08810981 0.8067217
## [4,] 1.6867914 -0.34583508 0.4865539 -0.8069748 0.92829983 2.4844372
## [5,] 0.2948684 0.22705328 1.8352256 0.4506745 1.27837900 0.8067217
## [6,] 1.4773871 -0.51591132 0.3043010 -1.2860793 0.85828399 1.5576991
## Flavanoids Nonflavanoid.phenols Proanth Color.int Hue OD
## [1,] 1.0319081 -0.6577078 1.2214385 0.2510088 0.3611585 1.8427215
## [2,] 0.7315653 -0.8184106 -0.5431887 -0.2924962 0.4049085 1.1103172
## [3,] 1.2121137 -0.4970050 2.1299594 0.2682629 0.3174085 0.7863692
## [4,] 1.4623994 -0.9791134 1.0292513 1.1827317 -0.4263410 1.1807407
## [5,] 0.6614853 0.2261576 0.4002753 -0.3183774 0.3611585 0.4483365
## [6,] 1.3622851 -0.1755994 0.6623487 0.7298108 0.4049085 0.3356589
## Proline
## [1,] 1.01015939
## [2,] 0.96252635
## [3,] 1.39122370
## [4,] 2.32800680
## [5,] -0.03776747
## [6,] 2.23274072
fviz_nbclust(winescale,kmeans,method='wss')+
geom_vline(xintercept=3,linetype=5,col="darkred")winekmeans<-kmeans(winescale,3,nstart=25)
winekmeans## K-means clustering with 3 clusters of sizes 51, 62, 65
##
## Cluster means:
## Alcohol Malic.acid Ash Acl Mg Phenols
## 1 0.1644436 0.8690954 0.1863726 0.5228924 -0.07526047 -0.97657548
## 2 0.8328826 -0.3029551 0.3636801 -0.6084749 0.57596208 0.88274724
## 3 -0.9234669 -0.3929331 -0.4931257 0.1701220 -0.49032869 -0.07576891
## Flavanoids Nonflavanoid.phenols Proanth Color.int Hue OD
## 1 -1.21182921 0.72402116 -0.77751312 0.9388902 -1.1615122 -1.2887761
## 2 0.97506900 -0.56050853 0.57865427 0.1705823 0.4726504 0.7770551
## 3 0.02075402 -0.03343924 0.05810161 -0.8993770 0.4605046 0.2700025
## Proline
## 1 -0.4059428
## 2 1.1220202
## 3 -0.7517257
##
## Clustering vector:
## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [38] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 1 3 3 3 3 3 3 3 3 3 3 3 2
## [75] 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [112] 3 3 3 3 3 3 3 1 3 3 2 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 326.3537 385.6983 558.6971
## (between_SS / total_SS = 44.8 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
winekmeans$centers## Alcohol Malic.acid Ash Acl Mg Phenols
## 1 0.1644436 0.8690954 0.1863726 0.5228924 -0.07526047 -0.97657548
## 2 0.8328826 -0.3029551 0.3636801 -0.6084749 0.57596208 0.88274724
## 3 -0.9234669 -0.3929331 -0.4931257 0.1701220 -0.49032869 -0.07576891
## Flavanoids Nonflavanoid.phenols Proanth Color.int Hue OD
## 1 -1.21182921 0.72402116 -0.77751312 0.9388902 -1.1615122 -1.2887761
## 2 0.97506900 -0.56050853 0.57865427 0.1705823 0.4726504 0.7770551
## 3 0.02075402 -0.03343924 0.05810161 -0.8993770 0.4605046 0.2700025
## Proline
## 1 -0.4059428
## 2 1.1220202
## 3 -0.7517257
winekmeans$size## [1] 51 62 65
fviz_cluster(object=winekmeans,
data=winescale,
ellipse.type = "norm",
geom = ("point"),
palette='jco',
main="",
ggtheme=theme_minimal())欢迎大家关注我的公众号
小明的数据分析笔记本