library(readxl)
## Warning: package 'readxl' was built under R version 3.4.4
ClusterAnalysisData <- read_excel("~/Downloads/ClusterAnalysisData.xlsx")
View(ClusterAnalysisData)
head(ClusterAnalysisData)
## # A tibble: 6 x 3
## math lang manual
## <dbl> <dbl> <dbl>
## 1 143 123 123
## 2 133 116 104
## 3 124 110 110
## 4 145 119 112
## 5 120 110 123
## 6 172 104 128
str(ClusterAnalysisData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 52 obs. of 3 variables:
## $ math : num 143 133 124 145 120 172 147 141 145 124 ...
## $ lang : num 123 116 110 119 110 104 107 117 103 95 ...
## $ manual: num 123 104 110 112 123 128 108 109 106 102 ...
summary(ClusterAnalysisData)
## math lang manual
## Min. : 93.0 Min. : 95.0 Min. : 87.0
## 1st Qu.:119.0 1st Qu.:126.0 1st Qu.:104.8
## Median :132.0 Median :148.0 Median :111.5
## Mean :135.2 Mean :143.8 Mean :112.2
## 3rd Qu.:145.5 3rd Qu.:159.5 3rd Qu.:120.2
## Max. :200.0 Max. :193.0 Max. :142.0
ClusterAnalysisData <- scale(ClusterAnalysisData)
head(ClusterAnalysisData)
## math lang manual
## [1,] 0.3072918 -0.915562 0.88765139
## [2,] -0.0862839 -1.224426 -0.67007432
## [3,] -0.4405020 -1.489167 -0.17816094
## [4,] 0.3860069 -1.092056 -0.01418981
## [5,] -0.5979323 -1.489167 0.88765139
## [6,] 1.4486614 -1.753908 1.29757921
library(factoextra)
## Loading required package: ggplot2
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
set.seed(1)
ClusterAnalysisData=na.omit(ClusterAnalysisData)
head(ClusterAnalysisData)
## math lang manual
## [1,] 0.3072918 -0.915562 0.88765139
## [2,] -0.0862839 -1.224426 -0.67007432
## [3,] -0.4405020 -1.489167 -0.17816094
## [4,] 0.3860069 -1.092056 -0.01418981
## [5,] -0.5979323 -1.489167 0.88765139
## [6,] 1.4486614 -1.753908 1.29757921
fviz_nbclust(ClusterAnalysisData, kmeans,method = "wss")

kmeans_ClusterAnalysisdata=kmeans(ClusterAnalysisData,3)
help("kmeans")
kmeans_ClusterAnalysisdata
## K-means clustering with 3 clusters of sizes 15, 7, 30
##
## Cluster means:
## math lang manual
## 1 0.2679342 -1.3362058 -0.03605263
## 2 1.8759721 1.1582411 1.09847141
## 3 -0.5716939 0.3978466 -0.23828368
##
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 3 1 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [36] 3 3 3 3 3 3 3 3 3 1 2 2 2 2 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 14.827480 6.717068 44.848552
## (between_SS / total_SS = 56.6 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
plot(ClusterAnalysisData, col=kmeans_ClusterAnalysisdata$cluster)

fviz_cluster(kmeans_ClusterAnalysisdata,data = ClusterAnalysisData)

z=rnorm(40)*10.39
z
## [1] -5.8233420 -2.3915441 16.1949794 0.7325822 1.3432996
## [6] 17.8195252 4.7889194 -13.1439862 -7.1364011 -4.6304279
## [11] 12.7182099 3.7384657 4.1640154 1.1499934 -5.7751894
## [16] 18.5660275 5.1726665 -20.4331523 7.2870878 -4.9123027
## [21] -11.0946883 -2.2647594 -10.6601862 -7.5731799 -6.4941580
## [26] -17.5247435 8.7046074 1.5935467 -11.8252428 13.0271370
## [31] 4.4309633 -3.0657927 9.3003556 9.1238069 8.5362274
## [36] 7.1549722 5.7552044 -0.6432627 -3.1789521 -3.9530937
z=z+17.2
z1=dnorm(z,mean = 17.2,sd=10.34)
plot(z,z1)

pnorm(30,mean = 17.2,sd=10.34,lower.tail = FALSE)
## [1] 0.1078745
h=rnorm(50)*10.34
h=h+17.2
hist(h,probability = TRUE,xlim = c(-10,40))

hist(h,xlim = c(-10,40))

hist(h,probability = TRUE,xlim = c(-10,40))
lines(h[order(h)],dnorm(h[order(h)],mean = 17.2,sd=10.34),col="blue")

h=rgamma(40,2.73,rate = 0.15)