Dimension Reduction Technique

PCA

mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\3 Data Sets\\9 PCA\\Universities.csv")

data <- mydata[-1]
attach(data)
cor(data)
##                 SAT      Top10     Accept    SFRatio   Expenses   GradRate
## SAT       1.0000000  0.9225222 -0.8858496 -0.8125517  0.7789760  0.7477120
## Top10     0.9225222  1.0000000 -0.8591811 -0.6434351  0.6114666  0.7459420
## Accept   -0.8858496 -0.8591811  1.0000000  0.6316636 -0.5584395 -0.8195495
## SFRatio  -0.8125517 -0.6434351  0.6316636  1.0000000 -0.7818394 -0.5609217
## Expenses  0.7789760  0.6114666 -0.5584395 -0.7818394  1.0000000  0.3935914
## GradRate  0.7477120  0.7459420 -0.8195495 -0.5609217  0.3935914  1.0000000
pcaObj <- princomp(data, cor = TRUE, scores = TRUE)

summary(pcaObj)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4    Comp.5
## Standard deviation     2.1475766 0.8870266 0.53531473 0.40469755 0.3525708
## Proportion of Variance 0.7686808 0.1311360 0.04776031 0.02729668 0.0207177
## Cumulative Proportion  0.7686808 0.8998169 0.94757718 0.97487386 0.9955916
##                             Comp.6
## Standard deviation     0.162636495
## Proportion of Variance 0.004408438
## Cumulative Proportion  1.000000000
str(pcaObj)
## List of 7
##  $ sdev    : Named num [1:6] 2.148 0.887 0.535 0.405 0.353 ...
##   ..- attr(*, "names")= chr [1:6] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ loadings: 'loadings' num [1:6, 1:6] 0.458 0.427 -0.424 -0.391 0.363 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:6] "SAT" "Top10" "Accept" "SFRatio" ...
##   .. ..$ : chr [1:6] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ center  : Named num [1:6] 1266.4 76.5 39.2 12.7 27388 ...
##   ..- attr(*, "names")= chr [1:6] "SAT" "Top10" "Accept" "SFRatio" ...
##  $ scale   : Named num [1:6] 106.17 19.04 19.33 3.99 14133.44 ...
##   ..- attr(*, "names")= chr [1:6] "SAT" "Top10" "Accept" "SFRatio" ...
##  $ n.obs   : int 25
##  $ scores  : num [1:25, 1:6] 1.01 2.822 -1.112 0.742 0.312 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:6] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ call    : language princomp(x = data, cor = TRUE, scores = TRUE)
##  - attr(*, "class")= chr "princomp"
loadings(pcaObj)
## 
## Loadings:
##          Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SAT       0.458         0.187  0.131         0.858
## Top10     0.427 -0.200  0.498  0.375  0.482 -0.396
## Accept   -0.424  0.321 -0.156         0.801  0.217
## SFRatio  -0.391 -0.433  0.606 -0.507         0.172
## Expenses  0.363  0.634  0.205 -0.623        -0.174
## GradRate  0.379 -0.516 -0.532 -0.439  0.338       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.167  0.167  0.167  0.167  0.167  0.167
## Cumulative Var  0.167  0.333  0.500  0.667  0.833  1.000
plot(pcaObj)

pcaObj$scores[, 1:3]
##            Comp.1      Comp.2      Comp.3
##  [1,]  1.00987445 -1.06430962  0.08106631
##  [2,]  2.82223781  2.25904458  0.83682883
##  [3,] -1.11246577  1.63120889 -0.26678684
##  [4,]  0.74174122 -0.04218747  0.06050086
##  [5,]  0.31191206 -0.63524357  0.01024052
##  [6,]  1.69669089 -0.34436328 -0.25340751
##  [7,]  1.24682093 -0.49098366 -0.03209382
##  [8,]  0.33874978 -0.78516859 -0.49358483
##  [9,]  2.37415013 -0.38653888  0.11609839
## [10,]  1.40327739  2.11951503 -0.44282714
## [11,]  1.72610332  0.08823712  0.17040366
## [12,]  0.45085748 -0.01113295 -0.17574605
## [13,] -0.04023814 -1.00920438 -0.49651717
## [14,] -3.23373034 -0.37458049 -0.49537282
## [15,]  2.23626502 -0.37179329 -0.39899365
## [16,] -5.17299212  0.77991535 -0.38591233
## [17,]  1.69964377 -0.30559745  0.31850785
## [18,] -4.57814600 -0.34759136  1.49964176
## [19,] -0.82260312 -0.69890615  1.42781145
## [20,]  0.09776213  0.65044645  0.10050844
## [21,] -1.96318260 -0.22476756 -0.25588143
## [22,]  0.54228894 -0.07958884 -0.30539348
## [23,] -0.53222092 -1.01716720 -0.42371636
## [24,] -3.54869664  0.77846167 -0.44936332
## [25,]  2.30590032 -0.11770432  0.25398866
mydata <- cbind(mydata, pcaObj$scores[, 1:3]) # Top 3 PCA Scores which represents the whole data

clus_data <- mydata[, 8:10]

#Normalize
norm_clus <- scale(clus_data)

dist1 <- dist(norm_clus, method = "euclidean")

dist1
##            1         2         3         4         5         6         7
## 2  4.0090988                                                            
## 3  3.1949981 2.7899361                                                  
## 4  1.1362521 3.0628877 2.1192201                                        
## 5  0.5855116 3.7177067 2.6356888 0.6899633                              
## 6  1.0513692 3.5376813 2.5308330 0.7945660 0.8574657                    
## 7  0.6750089 3.5033196 2.6149813 0.5723409 0.4618755 0.4821206          
## 8  1.1380215 4.3035371 2.7811409 1.3175106 0.9369923 0.9023050 0.9953431
## 9  0.9757112 3.2127335 2.8264662 0.8424366 0.9991144 0.7450502 0.5927989
## 10 3.6496020 2.4348803 1.3084767 2.5770725 3.1929000 2.7468199 2.9807592
## 11 1.3244851 2.7363425 2.2852252 0.5127461 1.0681080 0.9111746 0.7710552
## 12 1.2803560 3.3005199 1.9563887 0.4536127 0.7714593 0.6919226 0.6942218
## 13 1.1622512 4.5490457 2.9870432 1.5190953 1.0279823 1.1684474 1.1811171
## 14 2.3328060 4.6948914 2.4536484 2.1117811 1.8857587 2.2928479 2.2168007
## 15 1.2923806 3.6922370 2.6995974 1.1422740 1.1901804 0.3640387 0.8198055
## 16 3.5829332 4.5807953 2.0889433 2.9621144 3.0382909 3.3799570 3.3119071
## 17 0.9951143 3.0310804 2.7148760 0.7061552 0.9229246 1.0476630 0.7045623
## 18 3.7239654 4.6001352 4.2107807 3.5976471 3.5369261 4.3001076 3.8662111
## 19 2.6339988 3.8223771 4.0326459 2.7016018 2.6466645 3.3080805 2.8432752
## 20 1.9395939 2.5531295 1.3893967 0.8228120 1.4330662 1.4694350 1.3868539
## 21 1.7550373 4.0365729 2.0865953 1.3780242 1.2329723 1.6749798 1.5488819
## 22 1.3148962 3.4822035 2.0361713 0.6771182 0.8494158 0.6099001 0.7483588
## 23 1.1624611 4.5564714 2.9513194 1.5109977 0.9783503 1.2975215 1.2289800
## 24 3.0677772 4.0822928 1.4947627 2.3503532 2.4996850 2.7191719 2.7085568
## 25 1.2422114 2.8435585 2.6594671 0.8010191 1.1633731 1.0012043 0.8231988
##            8         9        10        11        12        13        14
## 2                                                                       
## 3                                                                       
## 4                                                                       
## 5                                                                       
## 6                                                                       
## 7                                                                       
## 8                                                                       
## 9  1.5170616                                                            
## 10 3.2463489 2.9841914                                                  
## 11 1.6758149 0.6101818 2.5131180                                        
## 12 1.0353978 1.1078080 2.4426733 0.8671525                              
## 13 0.3019359 1.7157148 3.5194990 1.8997206 1.2689789                    
## 14 1.6918104 2.7926123 3.4748415 2.6204466 1.8246322 1.6168434          
## 15 0.9939410 0.9450188 2.7791370 1.1825892 0.9945772 1.2674017 2.5018227
## 16 3.0579226 3.7895097 3.3469756 3.3952748 2.7376500 3.0708628 1.5649807
## 17 1.6957171 0.4898401 3.0225387 0.5127128 1.1174848 1.8599642 2.7001585
## 18 4.3099225 4.0589735 5.2453555 3.7978239 3.8478250 4.2625383 3.7027873
## 19 3.5577317 2.8302399 4.7377247 2.7211732 3.0869149 3.5567261 3.7052781
## 20 1.9259053 1.5464338 1.9942256 0.9766895 0.9031289 2.1351280 2.1867041
## 21 1.2943803 2.1003012 3.0300721 1.8871630 1.1358716 1.3093679 0.7453478
## 22 0.8571409 1.1868451 2.4734764 1.0413765 0.2525215 1.1168720 1.7874388
## 23 0.4898199 1.7943429 3.5756595 1.9325984 1.2814185 0.2611788 1.4283232
## 24 2.4769405 3.1668624 2.7016010 2.7675799 2.0835386 2.5433773 1.2844769
## 25 1.7947908 0.3909557 2.8112336 0.3809519 1.1613776 2.0005967 2.8895121
##           15        16        17        18        19        20        21
## 2                                                                       
## 3                                                                       
## 4                                                                       
## 5                                                                       
## 6                                                                       
## 7                                                                       
## 8                                                                       
## 9                                                                       
## 10                                                                      
## 11                                                                      
## 12                                                                      
## 13                                                                      
## 14                                                                      
## 15                                                                      
## 16 3.6118870                                                            
## 17 1.3378810 3.5960456                                                  
## 18 4.6629081 3.6790301 3.5887360                                        
## 19 3.6411563 4.1985727 2.3738915 1.7617109                              
## 20 1.7500668 2.5681987 1.3448140 3.5105994 2.8809280                    
## 21 1.9405583 1.8527644 1.9763149 3.4301840 3.1688962 1.4980708          
## 22 0.8548808 2.7788728 1.2826353 4.0571012 3.3044406 1.1150485 1.1578252
## 23 1.4510654 2.9030985 1.8708693 4.0431687 3.4096435 2.0967320 1.1343211
## 24 2.9306431 0.7501043 3.0236749 3.8070060 4.0018495 1.9495129 1.3699146
## 25 1.2280880 3.7413082 0.3654098 3.8893188 2.6580682 1.3467565 2.1629654
##           22        23        24
## 2                               
## 3                               
## 4                               
## 5                               
## 6                               
## 7                               
## 8                               
## 9                               
## 10                              
## 11                              
## 12                              
## 13                              
## 14                              
## 15                              
## 16                              
## 17                              
## 18                              
## 19                              
## 20                              
## 21                              
## 22                              
## 23 1.1660890                    
## 24 2.1098234 2.4145696          
## 25 1.3028605 2.0499664 3.1259792
fit1 <- hclust(dist1, method = "complete")
plot(fit1, hang = -1)

groups <- cutree(fit1, 5)
groups
##  [1] 1 2 3 1 1 1 1 1 1 3 1 1 1 4 1 4 1 5 5 1 4 1 1 4 1
membership_1 <- as.data.frame(groups)

View(membership_1)

final1 <- cbind(mydata, membership_1)

View(final1)

aggregate(final1[, -c(1,8:10)], by = list(groups), FUN = mean)
##   Group.1      SAT  Top10 Accept SFRatio Expenses GradRate groups
## 1       1 1313.438  85.00  29.00  11.625 29186.62   92.375      1
## 2       2 1415.000 100.00  25.00   6.000 63575.00   81.000      2
## 3       3 1282.500  68.50  51.50   8.000 41858.50   79.500      3
## 4       4 1087.750  42.75  70.25  17.000 11644.50   76.250      4
## 5       5 1157.500  72.00  53.50  21.000 11922.00   72.500      5