Wine DataSet - Assignment 6
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\7 PCA\\Assignment\\wine.csv")
data <- mydata[-1]
attach(data)
cor(data)
## Alcohol Malic Ash Alcalinity
## Alcohol 1.00000000 0.09439694 0.211544596 -0.31023514
## Malic 0.09439694 1.00000000 0.164045470 0.28850040
## Ash 0.21154460 0.16404547 1.000000000 0.44336719
## Alcalinity -0.31023514 0.28850040 0.443367187 1.00000000
## Magnesium 0.27079823 -0.05457510 0.286586691 -0.08333309
## Phenols 0.28910112 -0.33516700 0.128979538 -0.32111332
## Flavanoids 0.23681493 -0.41100659 0.115077279 -0.35136986
## Nonflavanoids -0.15592947 0.29297713 0.186230446 0.36192172
## Proanthocyanins 0.13669791 -0.22074619 0.009651935 -0.19732684
## Color 0.54636420 0.24898534 0.258887259 0.01873198
## Hue -0.07174720 -0.56129569 -0.074666889 -0.27395522
## Dilution 0.07234319 -0.36871043 0.003911231 -0.27676855
## Proline 0.64372004 -0.19201056 0.223626264 -0.44059693
## Magnesium Phenols Flavanoids Nonflavanoids
## Alcohol 0.27079823 0.28910112 0.2368149 -0.1559295
## Malic -0.05457510 -0.33516700 -0.4110066 0.2929771
## Ash 0.28658669 0.12897954 0.1150773 0.1862304
## Alcalinity -0.08333309 -0.32111332 -0.3513699 0.3619217
## Magnesium 1.00000000 0.21440123 0.1957838 -0.2562940
## Phenols 0.21440123 1.00000000 0.8645635 -0.4499353
## Flavanoids 0.19578377 0.86456350 1.0000000 -0.5378996
## Nonflavanoids -0.25629405 -0.44993530 -0.5378996 1.0000000
## Proanthocyanins 0.23644061 0.61241308 0.6526918 -0.3658451
## Color 0.19995001 -0.05513642 -0.1723794 0.1390570
## Hue 0.05539820 0.43368134 0.5434786 -0.2626396
## Dilution 0.06600394 0.69994936 0.7871939 -0.5032696
## Proline 0.39335085 0.49811488 0.4941931 -0.3113852
## Proanthocyanins Color Hue Dilution
## Alcohol 0.136697912 0.54636420 -0.07174720 0.072343187
## Malic -0.220746187 0.24898534 -0.56129569 -0.368710428
## Ash 0.009651935 0.25888726 -0.07466689 0.003911231
## Alcalinity -0.197326836 0.01873198 -0.27395522 -0.276768549
## Magnesium 0.236440610 0.19995001 0.05539820 0.066003936
## Phenols 0.612413084 -0.05513642 0.43368134 0.699949365
## Flavanoids 0.652691769 -0.17237940 0.54347857 0.787193902
## Nonflavanoids -0.365845099 0.13905701 -0.26263963 -0.503269596
## Proanthocyanins 1.000000000 -0.02524993 0.29554425 0.519067096
## Color -0.025249931 1.00000000 -0.52181319 -0.428814942
## Hue 0.295544253 -0.52181319 1.00000000 0.565468293
## Dilution 0.519067096 -0.42881494 0.56546829 1.000000000
## Proline 0.330416700 0.31610011 0.23618345 0.312761075
## Proline
## Alcohol 0.6437200
## Malic -0.1920106
## Ash 0.2236263
## Alcalinity -0.4405969
## Magnesium 0.3933508
## Phenols 0.4981149
## Flavanoids 0.4941931
## Nonflavanoids -0.3113852
## Proanthocyanins 0.3304167
## Color 0.3161001
## Hue 0.2361834
## Dilution 0.3127611
## Proline 1.0000000
plot(data)

pcaObj <- princomp(data, cor = TRUE, scores = TRUE)
summary(pcaObj)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.1692972 1.5801816 1.2025273 0.9586313 0.92370351
## Proportion of Variance 0.3619885 0.1920749 0.1112363 0.0706903 0.06563294
## Cumulative Proportion 0.3619885 0.5540634 0.6652997 0.7359900 0.80162293
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.80103498 0.74231281 0.59033665 0.53747553
## Proportion of Variance 0.04935823 0.04238679 0.02680749 0.02222153
## Cumulative Proportion 0.85098116 0.89336795 0.92017544 0.94239698
## Comp.10 Comp.11 Comp.12 Comp.13
## Standard deviation 0.50090167 0.47517222 0.41081655 0.321524394
## Proportion of Variance 0.01930019 0.01736836 0.01298233 0.007952149
## Cumulative Proportion 0.96169717 0.97906553 0.99204785 1.000000000
str(pcaObj)
## List of 7
## $ sdev : Named num [1:13] 2.169 1.58 1.203 0.959 0.924 ...
## ..- attr(*, "names")= chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ loadings: 'loadings' num [1:13, 1:13] 0.14433 -0.24519 -0.00205 -0.23932 0.14199 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## .. ..$ : chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ center : Named num [1:13] 13 2.34 2.37 19.49 99.74 ...
## ..- attr(*, "names")= chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## $ scale : Named num [1:13] 0.81 1.114 0.274 3.33 14.242 ...
## ..- attr(*, "names")= chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## $ n.obs : int 178
## $ scores : num [1:178, 1:13] 3.32 2.21 2.52 3.76 1.01 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ call : language princomp(x = data, cor = TRUE, scores = TRUE)
## - attr(*, "class")= chr "princomp"
loadings(pcaObj)
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Alcohol 0.144 0.484 0.207 0.266 0.214 0.396
## Malic -0.245 0.225 -0.537 0.537 -0.421
## Ash 0.316 -0.626 0.214 0.143 0.154 0.149 -0.170
## Alcalinity -0.239 -0.612 -0.101 0.287 0.428
## Magnesium 0.142 0.300 -0.131 0.352 -0.727 -0.323 -0.156
## Phenols 0.395 -0.146 -0.198 0.149 -0.406
## Flavanoids 0.423 -0.151 -0.152 0.109 -0.187
## Nonflavanoids -0.299 -0.170 0.203 0.501 -0.259 -0.595 -0.233
## Proanthocyanins 0.313 -0.149 -0.399 -0.137 -0.534 -0.372 0.368
## Color 0.530 0.137 -0.419 0.228
## Hue 0.297 -0.279 0.428 0.174 0.106 -0.232 0.437
## Dilution 0.376 -0.164 -0.166 -0.184 0.101 0.266
## Proline 0.287 0.365 0.127 0.232 0.158 0.120 0.120
## Comp.9 Comp.10 Comp.11 Comp.12 Comp.13
## Alcohol 0.509 0.212 0.226 0.266
## Malic -0.309 -0.122
## Ash -0.308 0.499 -0.141
## Alcalinity 0.200 -0.479
## Magnesium 0.271
## Phenols 0.286 -0.320 -0.304 0.304 -0.464
## Flavanoids -0.163 0.832
## Nonflavanoids 0.196 0.216 -0.117 0.114
## Proanthocyanins -0.209 0.134 0.237 -0.117
## Color -0.291 -0.604
## Hue -0.522 -0.259
## Dilution 0.137 0.524 -0.601 -0.157
## Proline -0.576 0.162 -0.539
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077
## Cumulative Var 0.077 0.154 0.231 0.308 0.385 0.462 0.538 0.615
## Comp.9 Comp.10 Comp.11 Comp.12 Comp.13
## SS loadings 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.077 0.077 0.077 0.077 0.077
## Cumulative Var 0.692 0.769 0.846 0.923 1.000
plot(pcaObj)

pcaObj$scores[, 1:3]
## Comp.1 Comp.2 Comp.3
## [1,] 3.31675081 1.44346263 0.165739045
## [2,] 2.20946492 -0.33339289 2.026457374
## [3,] 2.51674015 1.03115130 -0.982818670
## [4,] 3.75706561 2.75637191 0.176191842
## [5,] 1.00890849 0.86983082 -2.026688219
## [6,] 3.05025392 2.12240111 0.629395827
## [7,] 2.44908967 1.17485013 0.977094891
## [8,] 2.05943687 1.60896307 -0.146281883
## [9,] 2.51087430 0.91807096 1.770969027
## [10,] 2.75362819 0.78943767 0.984247490
## [11,] 3.47973668 1.30233324 0.422735217
## [12,] 1.75475290 0.61197723 1.190878320
## [13,] 2.11346234 0.67570634 0.865086426
## [14,] 3.45815682 1.13062988 1.204276353
## [15,] 4.31278391 2.09597558 1.263912752
## [16,] 2.30518820 1.66255173 -0.217902616
## [17,] 2.17195527 2.32730534 -0.831729866
## [18,] 1.89897118 1.63136888 -0.794913792
## [19,] 3.54198508 2.51834367 0.485458508
## [20,] 2.08452220 1.06113799 0.164746678
## [21,] 3.12440254 0.78689711 0.364887083
## [22,] 1.08657007 0.24174355 -0.936961600
## [23,] 2.53522408 -0.09184062 0.311932659
## [24,] 1.64498834 -0.51627893 -0.143885095
## [25,] 1.76157587 -0.31714893 -0.890285647
## [26,] 0.99007910 0.94066734 -3.820908008
## [27,] 1.77527763 0.68617513 0.086700406
## [28,] 1.23542396 -0.08980704 1.386896545
## [29,] 2.18840633 0.68956962 -1.394566881
## [30,] 2.25610898 0.19146194 1.092657258
## [31,] 2.50022003 1.24083383 -1.386017855
## [32,] 2.67741105 1.47187365 0.332261728
## [33,] 1.62857912 0.05270445 0.167128706
## [34,] 1.90269086 1.63306043 -1.172082119
## [35,] 1.41038853 0.69793432 -0.479743025
## [36,] 1.90382623 0.17671095 -0.450835040
## [37,] 1.38486223 0.65863985 -0.458438581
## [38,] 1.12220741 0.11410976 0.039107277
## [39,] 1.50219450 -0.76943201 1.426177346
## [40,] 2.52980109 1.80300198 0.343152389
## [41,] 2.58809543 0.77961630 0.118477466
## [42,] 0.66848199 0.16996094 0.783362548
## [43,] 3.07080699 1.15591896 0.312758084
## [44,] 0.46220914 0.33074213 0.201476496
## [45,] 2.10135193 -0.07100892 0.655849415
## [46,] 1.13616618 1.77710739 -0.028705736
## [47,] 2.72660096 1.19133469 0.539773261
## [48,] 2.82133927 0.64625860 1.155552411
## [49,] 2.00985085 1.24702946 0.057293988
## [50,] 2.70749130 1.75196741 0.643113612
## [51,] 3.21491747 0.16699199 1.973571680
## [52,] 2.85895983 0.74527880 -0.004719502
## [53,] 3.50560436 1.61273386 0.520774530
## [54,] 2.22479138 1.87516800 -0.339549850
## [55,] 2.14698782 1.01675154 0.957762762
## [56,] 2.46932948 1.32900831 -0.513437453
## [57,] 2.74151791 1.43654878 0.612473396
## [58,] 2.17374092 1.21219984 -0.261779593
## [59,] 3.13938015 1.73157912 0.285661413
## [60,] -0.92858197 -3.07348616 4.585064007
## [61,] -1.54248014 -1.38144351 0.874683112
## [62,] -1.83624976 -0.82998412 1.605702186
## [63,] 0.03060683 -1.26278614 1.784408010
## [64,] 2.05026161 -1.92503260 0.007368777
## [65,] -0.60968083 -1.90805881 -0.679357938
## [66,] 0.90022784 -0.76391147 -0.573361302
## [67,] 2.24850719 -1.88459248 2.031840193
## [68,] 0.18338403 -2.42714611 1.069745560
## [69,] -0.81280503 -0.22051399 0.707005396
## [70,] 1.97562050 -1.40328323 1.238276220
## [71,] -1.57221622 -0.88498314 0.628997950
## [72,] 1.65768181 -0.95671220 -1.952584217
## [73,] -0.72537239 -1.06364540 -0.080332229
## [74,] 2.56222717 0.26019855 -3.374393962
## [75,] 1.83256757 -1.28787820 -0.458280027
## [76,] -0.86799290 -2.44410119 1.563333179
## [77,] 0.37001440 -2.15390698 2.449386348
## [78,] -1.45737704 -1.38335177 0.227306902
## [79,] 1.26293085 -0.77084953 1.184224517
## [80,] 0.37615037 -1.02704340 -1.794466295
## [81,] 0.76206390 -3.37505381 0.357470056
## [82,] 1.03457797 -1.45070974 0.363011773
## [83,] -0.49487676 -2.38124353 -1.335743176
## [84,] -2.53897708 -0.08744336 -0.474251393
## [85,] 0.83532015 -1.47367055 -0.610093576
## [86,] 0.78790461 -2.02662652 0.254723404
## [87,] -0.80683216 -2.23383039 -0.772855797
## [88,] -0.55804262 -2.37298543 -2.307611404
## [89,] -1.11511104 -1.80224719 -0.959253308
## [90,] -0.55572283 -2.65754004 -0.849126898
## [91,] -1.34928528 -2.11800147 0.047652321
## [92,] -1.56448261 -1.85221452 -0.781067031
## [93,] -1.93255561 -1.55949546 0.089274676
## [94,] 0.74666594 -2.31293171 -0.114679769
## [95,] 0.95745536 -2.22352843 -0.142444774
## [96,] 2.54386518 0.16927402 -0.788696991
## [97,] -0.54395259 -0.36892655 -1.308895932
## [98,] 1.03104975 -2.56556935 1.086390174
## [99,] 2.25190942 -1.43274138 0.230208244
## [100,] 1.41021602 -2.16619177 -0.748896411
## [101,] 0.79771979 -2.37694880 1.568112531
## [102,] -0.54953173 -2.29312864 1.498935323
## [103,] -0.16117374 -1.16448332 -1.003713103
## [104,] -0.65979494 -2.67996119 0.764920868
## [105,] 0.39235441 -2.09873171 0.471850008
## [106,] -1.77249908 -1.71728847 -0.947033174
## [107,] -0.36626736 -2.16935330 0.481324235
## [108,] -1.62067257 -1.35558339 -0.287159001
## [109,] 0.08253578 -2.30623459 0.463574989
## [110,] 1.57827507 -1.46203429 -1.779645955
## [111,] 1.42056925 -1.41820664 -0.139275829
## [112,] -0.27870275 -1.93056809 -0.078670553
## [113,] -1.30314497 -0.76317231 -1.999596510
## [114,] -0.45707187 -2.26941561 -1.061338968
## [115,] -0.49418585 -1.93904505 -1.323938072
## [116,] 0.48207441 -3.87178385 -1.344271223
## [117,] -0.25288888 -2.82149237 0.302639785
## [118,] -0.10722764 -1.92892204 -0.690148243
## [119,] -2.43301260 -1.25714104 1.903027404
## [120,] -0.55108954 -2.22216155 0.356228830
## [121,] 0.73962193 -1.40895667 -1.125345492
## [122,] 1.33632173 0.25333693 -5.345388179
## [123,] -1.17708700 -0.66396684 -3.010221888
## [124,] -0.46233501 -0.61828818 -0.483442366
## [125,] 0.97847408 -1.44557050 -1.481236975
## [126,] -0.09680973 -2.10999799 -0.434826116
## [127,] 0.03848715 -1.26676211 -0.687577913
## [128,] -1.59715850 -1.20814357 -3.361175555
## [129,] -0.47956492 -1.93884066 -1.296507519
## [130,] -1.79283347 -1.15028810 -0.782800173
## [131,] -1.32710166 0.17038923 1.180013355
## [132,] -2.38450083 0.37458261 0.723822595
## [133,] -2.93694010 0.26386183 0.167639816
## [134,] -2.14681113 0.36825495 0.453301301
## [135,] -2.36986949 -0.45963481 1.101399789
## [136,] -3.06384157 0.35341284 1.099124104
## [137,] -3.91575378 0.15458252 -0.221827800
## [138,] -3.93646339 0.65968723 -1.712215419
## [139,] -3.09427612 0.34884276 1.026831413
## [140,] -2.37447163 0.29198035 -1.241914333
## [141,] -2.77881295 0.28680487 -0.609670124
## [142,] -2.28656128 0.37250784 0.971643032
## [143,] -2.98563349 0.48921791 -0.946952932
## [144,] -2.37519470 0.48233372 0.252883994
## [145,] -2.20986553 1.16005250 1.245125226
## [146,] -2.62562100 0.56316076 0.855961082
## [147,] -4.28063878 0.64967096 1.458196962
## [148,] -3.58264137 1.27270275 0.110784038
## [149,] -2.80706372 1.57053379 0.472527935
## [150,] -2.89965933 2.04105701 0.495959810
## [151,] -2.32073698 2.35636608 -0.437681744
## [152,] -2.54983095 2.04528309 0.312267999
## [153,] -1.81254128 1.52764595 -1.362589782
## [154,] -2.76014464 2.13893235 0.964628688
## [155,] -2.73715050 0.40988627 1.190404684
## [156,] -3.60486887 1.80238422 0.094036861
## [157,] -2.88982600 1.92521861 0.782322556
## [158,] -3.39215608 1.31187639 -1.602025969
## [159,] -1.04818190 3.51508969 -1.160038566
## [160,] -1.60991228 2.40663816 -0.548559697
## [161,] -3.14313097 0.73816104 0.090998724
## [162,] -2.24015690 1.17546529 0.101376932
## [163,] -2.84767378 0.55604397 -0.804215218
## [164,] -2.59749706 0.69796554 0.884939521
## [165,] -2.94929937 1.55530896 0.983400727
## [166,] -3.53003227 0.88252680 0.466029128
## [167,] -2.40611054 2.59235618 -0.428226211
## [168,] -2.92908473 1.27444695 1.213358272
## [169,] -2.18141278 2.07753731 -0.763782552
## [170,] -2.38092779 2.58866743 -1.418044029
## [171,] -3.21161722 -0.25124910 0.847129152
## [172,] -3.67791872 0.84774784 1.339420231
## [173,] -2.46555580 2.19379830 0.918780960
## [174,] -3.37052415 2.21628914 0.342569512
## [175,] -2.60195585 1.75722935 -0.207581355
## [176,] -2.67783946 2.76089913 0.940941877
## [177,] -2.38701709 2.29734668 0.550696197
## [178,] -3.20875816 2.76891957 -1.013913664
mydata <- cbind(mydata, pcaObj$scores[, 1:3]) # Top 3 PCA Scores which represents the whole data
clus_data <- mydata[, 15:17]
#Normalize
norm_clus <- scale(clus_data)
dist <- dist(norm_clus, method = "euclidean")
fit <- hclust(dist, method = "complete")
plot(fit, hang = -1)

groups <- cutree(fit, 5)
groups
## [1] 1 2 3 1 3 1 1 3 1 1 1 1 1 1 1 3 3 3 1 1 1 3 1 1 3 4 1 2 3 1 3 1 1 3 3
## [36] 3 3 1 2 1 1 1 1 1 1 3 1 1 1 1 2 1 1 3 1 3 1 3 1 2 5 3 2 5 5 5 2 2 3 2
## [71] 5 5 5 4 5 2 2 5 2 5 5 5 5 3 5 5 5 5 5 5 5 5 5 5 5 3 5 2 5 5 2 2 5 5 5
## [106] 5 5 5 5 5 5 5 5 5 5 5 5 5 3 5 5 4 5 5 5 5 5 5 5 5 3 3 3 3 3 3 3 3 3 3
## [141] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [176] 3 3 3
membership <- as.data.frame(groups)
View(membership)
final <- cbind(mydata, membership)
View(final)
aggregate(final[, -c(1,15:17)], by = list(groups), FUN = mean)
## Group.1 Alcohol Malic Ash Alcalinity Magnesium Phenols
## 1 1 13.89333 2.000556 2.373056 16.15000 104.91667 2.907500
## 2 2 12.68933 1.392667 1.882000 15.66000 97.46667 2.192000
## 3 3 13.25014 2.943099 2.474225 20.59155 102.71831 1.986056
## 4 4 12.53333 1.923333 3.016667 27.83333 127.33333 3.036667
## 5 5 12.17453 2.042075 2.318113 20.91132 91.32075 2.280377
## Flavanoids Nonflavanoids Proanthocyanins Color Hue Dilution
## 1 3.080833 0.2736111 1.979722 5.747222 1.0397222 3.218333
## 2 2.077333 0.2680000 1.584667 3.884000 1.1460000 2.745333
## 3 1.363944 0.4187324 1.325352 6.636620 0.8056338 2.064366
## 4 3.550000 0.3833333 1.916667 4.310000 1.1233333 3.463333
## 5 2.106604 0.3709434 1.665849 2.850000 1.0421887 2.846792
## Proline groups
## 1 1132.4167 1
## 2 710.3333 2
## 3 746.2535 3
## 4 760.0000 4
## 5 495.4906 5