id diagnosis radius_mean texture_mean perimeter_mean area_mean
1 842302 M 17.99 10.38 122.80 1001.0
2 842517 M 20.57 17.77 132.90 1326.0
3 84300903 M 19.69 21.25 130.00 1203.0
4 84348301 M 11.42 20.38 77.58 386.1
5 84358402 M 20.29 14.34 135.10 1297.0
6 843786 M 12.45 15.70 82.57 477.1
smoothness_mean compactness_mean concavity_mean concave.points_mean
1 0.11840 0.27760 0.3001 0.14710
2 0.08474 0.07864 0.0869 0.07017
3 0.10960 0.15990 0.1974 0.12790
4 0.14250 0.28390 0.2414 0.10520
5 0.10030 0.13280 0.1980 0.10430
6 0.12780 0.17000 0.1578 0.08089
symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se
1 0.2419 0.07871 1.0950 0.9053 8.589
2 0.1812 0.05667 0.5435 0.7339 3.398
3 0.2069 0.05999 0.7456 0.7869 4.585
4 0.2597 0.09744 0.4956 1.1560 3.445
5 0.1809 0.05883 0.7572 0.7813 5.438
6 0.2087 0.07613 0.3345 0.8902 2.217
area_se smoothness_se compactness_se concavity_se concave.points_se
1 153.40 0.006399 0.04904 0.05373 0.01587
2 74.08 0.005225 0.01308 0.01860 0.01340
3 94.03 0.006150 0.04006 0.03832 0.02058
4 27.23 0.009110 0.07458 0.05661 0.01867
5 94.44 0.011490 0.02461 0.05688 0.01885
6 27.19 0.007510 0.03345 0.03672 0.01137
symmetry_se fractal_dimension_se radius_worst texture_worst perimeter_worst
1 0.03003 0.006193 25.38 17.33 184.60
2 0.01389 0.003532 24.99 23.41 158.80
3 0.02250 0.004571 23.57 25.53 152.50
4 0.05963 0.009208 14.91 26.50 98.87
5 0.01756 0.005115 22.54 16.67 152.20
6 0.02165 0.005082 15.47 23.75 103.40
area_worst smoothness_worst compactness_worst concavity_worst
1 2019.0 0.1622 0.6656 0.7119
2 1956.0 0.1238 0.1866 0.2416
3 1709.0 0.1444 0.4245 0.4504
4 567.7 0.2098 0.8663 0.6869
5 1575.0 0.1374 0.2050 0.4000
6 741.6 0.1791 0.5249 0.5355
concave.points_worst symmetry_worst fractal_dimension_worst
1 0.2654 0.4601 0.11890
2 0.1860 0.2750 0.08902
3 0.2430 0.3613 0.08758
4 0.2575 0.6638 0.17300
5 0.1625 0.2364 0.07678
6 0.1741 0.3985 0.12440
'data.frame': 569 obs. of 32 variables:
$ id : int 842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 ...
$ diagnosis : chr "M" "M" "M" "M" ...
$ radius_mean : num 18 20.6 19.7 11.4 20.3 ...
$ texture_mean : num 10.4 17.8 21.2 20.4 14.3 ...
$ perimeter_mean : num 122.8 132.9 130 77.6 135.1 ...
$ area_mean : num 1001 1326 1203 386 1297 ...
$ smoothness_mean : num 0.1184 0.0847 0.1096 0.1425 0.1003 ...
$ compactness_mean : num 0.2776 0.0786 0.1599 0.2839 0.1328 ...
$ concavity_mean : num 0.3001 0.0869 0.1974 0.2414 0.198 ...
$ concave.points_mean : num 0.1471 0.0702 0.1279 0.1052 0.1043 ...
$ symmetry_mean : num 0.242 0.181 0.207 0.26 0.181 ...
$ fractal_dimension_mean : num 0.0787 0.0567 0.06 0.0974 0.0588 ...
$ radius_se : num 1.095 0.543 0.746 0.496 0.757 ...
$ texture_se : num 0.905 0.734 0.787 1.156 0.781 ...
$ perimeter_se : num 8.59 3.4 4.58 3.44 5.44 ...
$ area_se : num 153.4 74.1 94 27.2 94.4 ...
$ smoothness_se : num 0.0064 0.00522 0.00615 0.00911 0.01149 ...
$ compactness_se : num 0.049 0.0131 0.0401 0.0746 0.0246 ...
$ concavity_se : num 0.0537 0.0186 0.0383 0.0566 0.0569 ...
$ concave.points_se : num 0.0159 0.0134 0.0206 0.0187 0.0188 ...
$ symmetry_se : num 0.03 0.0139 0.0225 0.0596 0.0176 ...
$ fractal_dimension_se : num 0.00619 0.00353 0.00457 0.00921 0.00511 ...
$ radius_worst : num 25.4 25 23.6 14.9 22.5 ...
$ texture_worst : num 17.3 23.4 25.5 26.5 16.7 ...
$ perimeter_worst : num 184.6 158.8 152.5 98.9 152.2 ...
$ area_worst : num 2019 1956 1709 568 1575 ...
$ smoothness_worst : num 0.162 0.124 0.144 0.21 0.137 ...
$ compactness_worst : num 0.666 0.187 0.424 0.866 0.205 ...
$ concavity_worst : num 0.712 0.242 0.45 0.687 0.4 ...
$ concave.points_worst : num 0.265 0.186 0.243 0.258 0.163 ...
$ symmetry_worst : num 0.46 0.275 0.361 0.664 0.236 ...
$ fractal_dimension_worst: num 0.1189 0.089 0.0876 0.173 0.0768 ...
id diagnosis radius_mean texture_mean
Min. : 8670 Length:569 Min. : 6.981 Min. : 9.71
1st Qu.: 869218 Class :character 1st Qu.:11.700 1st Qu.:16.17
Median : 906024 Mode :character Median :13.370 Median :18.84
Mean : 30371831 Mean :14.127 Mean :19.29
3rd Qu.: 8813129 3rd Qu.:15.780 3rd Qu.:21.80
Max. :911320502 Max. :28.110 Max. :39.28
perimeter_mean area_mean smoothness_mean compactness_mean
Min. : 43.79 Min. : 143.5 Min. :0.05263 Min. :0.01938
1st Qu.: 75.17 1st Qu.: 420.3 1st Qu.:0.08637 1st Qu.:0.06492
Median : 86.24 Median : 551.1 Median :0.09587 Median :0.09263
Mean : 91.97 Mean : 654.9 Mean :0.09636 Mean :0.10434
3rd Qu.:104.10 3rd Qu.: 782.7 3rd Qu.:0.10530 3rd Qu.:0.13040
Max. :188.50 Max. :2501.0 Max. :0.16340 Max. :0.34540
concavity_mean concave.points_mean symmetry_mean fractal_dimension_mean
Min. :0.00000 Min. :0.00000 Min. :0.1060 Min. :0.04996
1st Qu.:0.02956 1st Qu.:0.02031 1st Qu.:0.1619 1st Qu.:0.05770
Median :0.06154 Median :0.03350 Median :0.1792 Median :0.06154
Mean :0.08880 Mean :0.04892 Mean :0.1812 Mean :0.06280
3rd Qu.:0.13070 3rd Qu.:0.07400 3rd Qu.:0.1957 3rd Qu.:0.06612
Max. :0.42680 Max. :0.20120 Max. :0.3040 Max. :0.09744
radius_se texture_se perimeter_se area_se
Min. :0.1115 Min. :0.3602 Min. : 0.757 Min. : 6.802
1st Qu.:0.2324 1st Qu.:0.8339 1st Qu.: 1.606 1st Qu.: 17.850
Median :0.3242 Median :1.1080 Median : 2.287 Median : 24.530
Mean :0.4052 Mean :1.2169 Mean : 2.866 Mean : 40.337
3rd Qu.:0.4789 3rd Qu.:1.4740 3rd Qu.: 3.357 3rd Qu.: 45.190
Max. :2.8730 Max. :4.8850 Max. :21.980 Max. :542.200
smoothness_se compactness_se concavity_se concave.points_se
Min. :0.001713 Min. :0.002252 Min. :0.00000 Min. :0.000000
1st Qu.:0.005169 1st Qu.:0.013080 1st Qu.:0.01509 1st Qu.:0.007638
Median :0.006380 Median :0.020450 Median :0.02589 Median :0.010930
Mean :0.007041 Mean :0.025478 Mean :0.03189 Mean :0.011796
3rd Qu.:0.008146 3rd Qu.:0.032450 3rd Qu.:0.04205 3rd Qu.:0.014710
Max. :0.031130 Max. :0.135400 Max. :0.39600 Max. :0.052790
symmetry_se fractal_dimension_se radius_worst texture_worst
Min. :0.007882 Min. :0.0008948 Min. : 7.93 Min. :12.02
1st Qu.:0.015160 1st Qu.:0.0022480 1st Qu.:13.01 1st Qu.:21.08
Median :0.018730 Median :0.0031870 Median :14.97 Median :25.41
Mean :0.020542 Mean :0.0037949 Mean :16.27 Mean :25.68
3rd Qu.:0.023480 3rd Qu.:0.0045580 3rd Qu.:18.79 3rd Qu.:29.72
Max. :0.078950 Max. :0.0298400 Max. :36.04 Max. :49.54
perimeter_worst area_worst smoothness_worst compactness_worst
Min. : 50.41 Min. : 185.2 Min. :0.07117 Min. :0.02729
1st Qu.: 84.11 1st Qu.: 515.3 1st Qu.:0.11660 1st Qu.:0.14720
Median : 97.66 Median : 686.5 Median :0.13130 Median :0.21190
Mean :107.26 Mean : 880.6 Mean :0.13237 Mean :0.25427
3rd Qu.:125.40 3rd Qu.:1084.0 3rd Qu.:0.14600 3rd Qu.:0.33910
Max. :251.20 Max. :4254.0 Max. :0.22260 Max. :1.05800
concavity_worst concave.points_worst symmetry_worst fractal_dimension_worst
Min. :0.0000 Min. :0.00000 Min. :0.1565 Min. :0.05504
1st Qu.:0.1145 1st Qu.:0.06493 1st Qu.:0.2504 1st Qu.:0.07146
Median :0.2267 Median :0.09993 Median :0.2822 Median :0.08004
Mean :0.2722 Mean :0.11461 Mean :0.2901 Mean :0.08395
3rd Qu.:0.3829 3rd Qu.:0.16140 3rd Qu.:0.3179 3rd Qu.:0.09208
Max. :1.2520 Max. :0.29100 Max. :0.6638 Max. :0.20750
id diagnosis radius_mean
0 0 0
texture_mean perimeter_mean area_mean
0 0 0
smoothness_mean compactness_mean concavity_mean
0 0 0
concave.points_mean symmetry_mean fractal_dimension_mean
0 0 0
radius_se texture_se perimeter_se
0 0 0
area_se smoothness_se compactness_se
0 0 0
concavity_se concave.points_se symmetry_se
0 0 0
fractal_dimension_se radius_worst texture_worst
0 0 0
perimeter_worst area_worst smoothness_worst
0 0 0
compactness_worst concavity_worst concave.points_worst
0 0 0
symmetry_worst fractal_dimension_worst
0 0
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 3.6444 2.3857 1.67867 1.40735 1.28403 1.09880 0.82172
Proportion of Variance 0.4427 0.1897 0.09393 0.06602 0.05496 0.04025 0.02251
Cumulative Proportion 0.4427 0.6324 0.72636 0.79239 0.84734 0.88759 0.91010
PC8 PC9 PC10 PC11 PC12 PC13 PC14
Standard deviation 0.69037 0.6457 0.59219 0.5421 0.51104 0.49128 0.39624
Proportion of Variance 0.01589 0.0139 0.01169 0.0098 0.00871 0.00805 0.00523
Cumulative Proportion 0.92598 0.9399 0.95157 0.9614 0.97007 0.97812 0.98335
PC15 PC16 PC17 PC18 PC19 PC20 PC21
Standard deviation 0.30681 0.28260 0.24372 0.22939 0.22244 0.17652 0.1731
Proportion of Variance 0.00314 0.00266 0.00198 0.00175 0.00165 0.00104 0.0010
Cumulative Proportion 0.98649 0.98915 0.99113 0.99288 0.99453 0.99557 0.9966
PC22 PC23 PC24 PC25 PC26 PC27 PC28
Standard deviation 0.16565 0.15602 0.1344 0.12442 0.09043 0.08307 0.03987
Proportion of Variance 0.00091 0.00081 0.0006 0.00052 0.00027 0.00023 0.00005
Cumulative Proportion 0.99749 0.99830 0.9989 0.99942 0.99969 0.99992 0.99997
PC29 PC30
Standard deviation 0.02736 0.01153
Proportion of Variance 0.00002 0.00000
Cumulative Proportion 1.00000 1.00000
[1] 3 3 3 2 3 2 3 2 2 2 1 2 3 1 2 2 1 2 3 1 1 1 2 3 3 3 2 3 2 3 3 2 3 3 2 2 2
[38] 1 1 2 1 2 3 2 1 3 1 2 1 1 1 1 1 3 1 1 3 2 1 1 1 1 2 1 2 2 1 1 2 1 3 2 2 1
[75] 1 3 1 3 3 1 1 2 3 3 1 3 1 3 1 2 1 1 1 1 2 3 1 1 1 2 1 1 1 1 1 2 1 1 3 1 1
[112] 2 2 1 1 1 1 2 2 1 1 3 3 1 1 1 1 3 2 3 1 1 1 1 3 1 1 1 3 1 1 1 1 1 1 2 2 1
[149] 1 1 1 2 2 1 1 1 3 1 1 1 1 3 3 1 3 1 1 1 3 1 1 1 2 1 1 1 2 2 1 1 3 3 1 1 1
[186] 1 1 1 1 1 2 1 1 2 2 1 2 3 3 2 1 3 3 2 1 1 1 1 2 1 3 1 3 2 2 2 2 1 3 3 1 1
[223] 1 2 1 1 1 1 1 2 2 1 1 3 1 1 3 3 1 3 1 1 2 1 3 1 1 2 1 1 3 1 3 1 3 1 3 2 3
[260] 2 3 1 3 1 3 3 1 1 1 2 1 1 3 1 1 1 1 1 1 1 3 1 3 2 1 1 1 1 2 1 2 1 1 1 1 1
[297] 1 1 1 1 3 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 2 1 1 3 1 3 1 1 1 1 2 2 2 1 1
[334] 1 1 3 1 3 1 3 1 1 1 3 1 1 1 1 1 1 1 2 3 2 1 1 2 1 1 1 1 1 1 1 1 3 3 1 3 3
[371] 2 1 3 3 1 1 2 1 1 2 1 1 1 2 1 1 1 1 2 3 1 1 2 3 1 1 1 1 1 1 2 1 1 1 1 1 1
[408] 1 3 1 1 1 1 1 1 1 1 3 1 1 1 2 1 1 1 1 1 1 1 1 2 1 3 3 1 2 1 1 1 1 2 3 1 1
[445] 1 1 3 1 1 3 1 3 1 1 1 1 1 1 1 1 3 3 1 1 1 2 1 1 3 2 1 1 1 1 1 1 1 1 1 2 1
[482] 1 1 1 1 2 1 3 1 1 1 1 3 1 1 1 2 1 3 3 1 2 1 3 2 2 1 2 1 2 1 1 2 1 1 1 3 3
[519] 1 1 2 3 1 1 1 1 1 1 1 1 1 1 1 3 1 3 1 2 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
[556] 1 1 1 1 1 1 1 2 3 3 3 1 3 1
cancer_K2 <- kmeans(data_scaled, centers = 2, nstart = 25)
cancer_K3 <- kmeans(data_scaled, centers = 3, nstart = 25)
cancer_K4 <- kmeans(data_scaled, centers = 4, nstart = 25)
cancer_K5 <- kmeans(data_scaled, centers = 5, nstart = 25)p1 <- fviz_cluster(cancer_K2, geom = "point", data = data_scaled) + ggtitle(" K = 2")
p2 <- fviz_cluster(cancer_K3, geom = "point", data = data_scaled) + ggtitle(" K = 3")
p3 <- fviz_cluster(cancer_K4, geom = "point", data = data_scaled) + ggtitle(" K = 4")
p4 <- fviz_cluster(cancer_K5, geom = "point", data = data_scaled) + ggtitle(" K = 5")
grid.arrange(p1, p2, p3, p4, nrow = 2)# compute gap statistic
set.seed(123)
gap_stat <- clusGap(x = data_scaled, FUN = kmeans, K.max = 15, nstart = 25, B = 50 )
# Print the result
print(gap_stat, method = "firstmax")Clustering Gap statistic ["clusGap"] from call:
clusGap(x = data_scaled, FUNcluster = kmeans, K.max = 15, B = 50, nstart = 25)
B=50 simulated reference sets, k = 1..15; spaceH0="scaledPCA"
--> Number of clusters (method 'firstmax'): 2
logW E.logW gap SE.sim
[1,] 6.903237 7.802259 0.8990224 0.006603625
[2,] 6.708914 7.696045 0.9871308 0.006571583
[3,] 6.646568 7.630162 0.9835946 0.005454643
[4,] 6.609021 7.583872 0.9748510 0.005193434
[5,] 6.561763 7.556734 0.9949712 0.005108036
[6,] 6.532478 7.533734 1.0012561 0.005221217
[7,] 6.498516 7.514930 1.0164144 0.005232757
[8,] 6.482822 7.498273 1.0154506 0.005325144
[9,] 6.459053 7.484009 1.0249562 0.005351269
[10,] 6.441244 7.471300 1.0300555 0.005228238
[11,] 6.423310 7.459754 1.0364441 0.005024080
[12,] 6.407989 7.448861 1.0408729 0.004841184
[13,] 6.393568 7.438572 1.0450047 0.004865372
[14,] 6.383645 7.428971 1.0453255 0.004559727
[15,] 6.370427 7.419974 1.0495463 0.004654894
# Compute k-means clustering with k = 3
set.seed(123)
final <- kmeans(data_scaled, centers = 2, nstart = 25)
print(final)K-means clustering with 2 clusters of sizes 189, 380
Cluster means:
radius_mean texture_mean perimeter_mean area_mean smoothness_mean
1 0.9731199 0.4810905 1.0057496 0.9626801 0.6087185
2 -0.4839991 -0.2392792 -0.5002281 -0.4788067 -0.3027573
compactness_mean concavity_mean concave.points_mean symmetry_mean
1 1.0197987 1.138428 1.1635583 0.6106013
2 -0.5072157 -0.566218 -0.5787172 -0.3036938
fractal_dimension_mean radius_se texture_se perimeter_se area_se
1 0.2520081 0.8578415 0.04270321 0.8595226 0.8063982
2 -0.1253409 -0.4266633 -0.02123923 -0.4274994 -0.4010770
smoothness_se compactness_se concavity_se concave.points_se symmetry_se
1 0.01704563 0.6944395 0.6363352 0.7755561 0.1402588
2 -0.00847796 -0.3453923 -0.3164930 -0.3857371 -0.0697603
fractal_dimension_se radius_worst texture_worst perimeter_worst area_worst
1 0.4146673 1.039169 0.5058654 1.0650336 1.0022723
2 -0.2062424 -0.516850 -0.2516015 -0.5297141 -0.4984986
smoothness_worst compactness_worst concavity_worst concave.points_worst
1 0.6077580 0.9500013 1.0433804 1.145203
2 -0.3022796 -0.4725007 -0.5189444 -0.569588
symmetry_worst fractal_dimension_worst
1 0.5968910 0.6219221
2 -0.2968747 -0.3093244
Clustering vector:
[1] 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 2 1 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[38] 2 2 2 2 2 1 2 2 1 2 1 2 2 2 2 2 1 2 2 1 1 2 2 2 2 1 2 1 1 2 2 1 2 1 2 1 2
[75] 2 1 2 1 1 2 2 1 1 1 2 1 2 1 2 1 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2
[112] 2 1 2 2 2 2 1 1 2 2 1 1 2 2 2 2 1 1 1 2 1 1 2 1 2 2 2 1 2 2 1 2 2 2 2 1 2
[149] 2 2 2 2 1 2 2 2 1 2 2 2 2 1 1 2 1 2 2 1 1 2 2 2 1 2 2 2 2 1 2 2 1 1 2 2 2
[186] 2 2 2 2 2 1 2 2 1 1 2 1 1 1 1 2 1 1 1 2 2 2 2 2 2 1 2 1 1 1 1 2 2 1 1 2 2
[223] 2 1 2 2 2 2 2 1 1 2 2 1 2 2 1 1 2 1 2 2 1 2 1 2 2 2 2 2 1 2 1 1 1 2 1 1 1
[260] 1 1 2 1 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2
[297] 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 1 2 1 2 2 2 2 1 1 1 2 2
[334] 2 2 1 2 1 2 1 2 2 2 1 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1
[371] 1 2 1 1 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2 2
[408] 2 1 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 1 2 1 1 2 2 2 2 2 2 2 1 2 2
[445] 2 2 1 2 2 1 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2
[482] 2 2 2 2 1 2 1 2 2 2 2 1 2 2 2 2 2 1 1 2 1 2 1 1 2 2 2 2 1 2 2 1 2 2 2 1 1
[519] 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[556] 2 2 2 2 2 2 2 1 1 1 1 1 1 2
Within cluster sum of squares by cluster:
[1] 6325.137 5249.946
(between_SS / total_SS = 32.1 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"
[6] "betweenss" "size" "iter" "ifault"
[1] 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[38] 0 0 0 2 0 0 0 0 0 0 0 2 2 2 2 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0
[75] 2 0 0 0 0 2 0 0 0 0 2 0 0 0 2 0 2 0 2 2 0 0 0 0 2 0 2 0 2 0 0 0 0 2 0 0 0
[112] 0 0 0 0 2 0 0 0 0 2 0 0 2 0 2 0 0 0 0 2 1 0 2 1 2 0 2 0 0 0 0 2 2 2 0 0 0
[149] 2 2 0 0 0 2 2 2 0 0 2 2 0 0 0 0 0 2 0 0 0 2 2 2 0 0 0 0 0 0 0 2 0 0 2 0 0
[186] 0 0 2 0 2 0 0 0 0 0 2 0 0 0 0 2 1 0 0 2 0 2 0 0 2 0 2 0 0 0 0 0 0 0 0 0 2
[223] 2 0 2 0 2 2 0 0 0 0 0 0 2 2 0 0 0 0 2 2 0 0 0 0 2 0 0 2 0 2 0 1 1 2 0 0 0
[260] 0 0 0 0 2 1 0 0 2 2 0 2 2 0 0 0 0 2 0 2 2 0 0 0 0 0 2 0 2 0 0 0 0 2 2 2 2
[297] 0 0 2 0 0 0 0 2 2 0 2 0 2 2 2 2 2 0 0 2 2 1 0 0 0 0 0 0 2 2 2 2 1 0 0 0 0
[334] 2 2 0 0 0 2 0 3 0 2 0 2 0 2 2 2 0 2 0 0 0 0 0 0 2 0 0 2 2 2 2 2 0 0 2 0 0
[371] 0 2 0 0 2 0 0 2 0 0 0 2 0 0 2 0 2 2 0 0 2 0 0 0 2 2 0 0 2 2 0 2 0 2 0 2 2
[408] 0 1 2 2 2 0 0 0 2 0 0 2 0 2 0 2 3 0 2 0 2 2 2 0 0 0 0 2 0 2 2 2 2 0 0 2 0
[445] 1 0 0 2 2 0 0 0 0 0 2 0 0 2 2 0 0 0 2 2 2 0 0 0 0 0 2 0 2 0 0 2 2 2 2 0 2
[482] 2 2 2 0 0 2 0 2 0 2 0 0 0 2 2 0 2 0 0 0 0 2 0 0 0 0 0 2 0 0 2 0 2 2 2 1 1
[519] 0 2 0 0 2 2 0 0 0 2 0 2 2 0 2 0 0 0 0 0 0 0 0 0 2 2 2 2 2 0 2 0 2 0 2 0 2
[556] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
db_result <- dbscan(data_scaled, eps = 2, minPts = 4)
fviz_cluster(list(data = data_scaled, cluster = db_result$cluster),
main = "DBSCAN Clustering")silhouette_dbscan <- silhouette(db_result$cluster, dist(data_scaled))
silhouette_score_dbscan <- mean(silhouette_dbscan[, 3])
noise_points_dbscan <- sum(db_result$cluster == 0)
cat("Silhouette Score DBSCAN:", silhouette_score_dbscan, "\n")Silhouette Score DBSCAN: -0.1745369
set.seed(123)
ms_model <- meanShift(as.matrix(data_pca), bandwidth = c(3, 3))
head(ms_model$assignment) [,1]
[1,] 1
[2,] 1
[3,] 1
[4,] 1
[5,] 1
[6,] 1
1 2 3 4
561 6 1 1
set.seed(123)
gap_stat <- clusGap(data_scaled, FUN = kmeans, K.max = 10, B = 50)
fviz_gap_stat(gap_stat) +
labs(title = "Gap Statistic")set.seed(123)
res.fcm <- fcm(
data_scaled,
centers = k_best,
nstart = 10,
iter.max = 200,
m = 2,
con.val = 1e-6
)cluster_fcm
1 2
370 199
Cluster 1 Cluster 2
1 0.2951839 0.7048161
2 0.3413579 0.6586421
3 0.1311350 0.8688650
4 0.4116690 0.5883310
5 0.2662053 0.7337947
6 0.3768976 0.6231024
fviz_silhouette(sil_fcm, palette = "jco", ggtheme = theme_minimal()) +
labs(title = "Silhouette Plot — FCM") cluster size ave.sil.width
1 1 370 0.45
2 2 199 0.13
fviz_cluster(
list(data = data_scaled, cluster = cluster_fcm),
ellipse.type = "convex",
palette = "jco",
repel = TRUE,
ggtheme = theme_minimal(),
main = paste("FCM Clustering — k =", k_best)
)membership_df <- as.data.frame(res.fcm$u)
colnames(membership_df) <- paste0("Cluster_", 1:k_best)
ggplot(membership_df, aes(x = Cluster_1, y = Cluster_2)) +
geom_point(aes(color = as.factor(cluster_fcm)), alpha = 0.6, size = 2) +
geom_vline(xintercept = 0.5, linetype = "dashed", color = "red") +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
scale_color_manual(values = c("steelblue", "darkorange"),
name = "Cluster") +
labs(
title = "FCM — Membership Degree",
subtitle = "Titik dekat garis merah = observasi ambigu",
x = "Derajat keanggotaan ke Cluster 1",
y = "Derajat keanggotaan ke Cluster 2"
) +
theme_minimal()fviz_cluster(
list(data = data_scaled, cluster = cluster_fcm),
geom = "point",
ellipse.type = "norm",
palette = "jco",
ggtheme = theme_minimal(),
main = paste("FCM Clustering — k =", k_best)
)set.seed(123)
res.kmedians <- kcca(
data_scaled,
k = k_best,
family = kccaFamily("kmedians"),
control = list(iter.max = 200)
)cluster_kmed
1 2
196 373
sil_kmed <- silhouette(cluster_kmed, dist(data_scaled))
avg_sil_kmed <- mean(sil_kmed[, 3])
cat("Silhouette Width K-Medians:", round(avg_sil_kmed, 4), "\n")Silhouette Width K-Medians: 0.3347
fviz_silhouette(sil_kmed, palette = "jco", ggtheme = theme_minimal()) +
labs(title = "Silhouette Plot — K-Medians") cluster size ave.sil.width
1 1 196 0.15
2 2 373 0.43
fviz_cluster(
list(data = data_scaled, cluster = cluster_kmed),
ellipse.type = "convex",
palette = "jco",
repel = TRUE,
ggtheme = theme_minimal(),
main = paste("K-Medians Clustering — k =", k_best)
)fviz_cluster(
list(data = data_scaled, cluster = cluster_kmed),
geom = "point",
ellipse.type = "norm",
palette = "jco",
ggtheme = theme_minimal(),
main = paste("K-Medians Clustering — k =", k_best)
)