mydata<-read.csv("E:\\Data science\\Universities.csv")
View(mydata)
data<-mydata[-1]
attach(data)
cor(data)
##                 SAT      Top10     Accept    SFRatio   Expenses   GradRate
## SAT       1.0000000  0.9225222 -0.8858496 -0.8125517  0.7789760  0.7477120
## Top10     0.9225222  1.0000000 -0.8591811 -0.6434351  0.6114666  0.7459420
## Accept   -0.8858496 -0.8591811  1.0000000  0.6316636 -0.5584395 -0.8195495
## SFRatio  -0.8125517 -0.6434351  0.6316636  1.0000000 -0.7818394 -0.5609217
## Expenses  0.7789760  0.6114666 -0.5584395 -0.7818394  1.0000000  0.3935914
## GradRate  0.7477120  0.7459420 -0.8195495 -0.5609217  0.3935914  1.0000000
summary(data)
##       SAT           Top10            Accept        SFRatio     
##  Min.   :1005   Min.   : 28.00   Min.   :14.0   Min.   : 6.00  
##  1st Qu.:1240   1st Qu.: 74.00   1st Qu.:24.0   1st Qu.:11.00  
##  Median :1285   Median : 81.00   Median :36.0   Median :12.00  
##  Mean   :1266   Mean   : 76.48   Mean   :39.2   Mean   :12.72  
##  3rd Qu.:1340   3rd Qu.: 90.00   3rd Qu.:50.0   3rd Qu.:14.00  
##  Max.   :1415   Max.   :100.00   Max.   :90.0   Max.   :25.00  
##     Expenses        GradRate    
##  Min.   : 8704   Min.   :67.00  
##  1st Qu.:15140   1st Qu.:81.00  
##  Median :27553   Median :90.00  
##  Mean   :27388   Mean   :86.72  
##  3rd Qu.:34870   3rd Qu.:94.00  
##  Max.   :63575   Max.   :97.00
normdata<-scale(data)
pcaobj<-princomp(normdata,cor=TRUE)
summary(pcaobj)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4    Comp.5
## Standard deviation     2.1475766 0.8870266 0.53531473 0.40469755 0.3525708
## Proportion of Variance 0.7686808 0.1311360 0.04776031 0.02729668 0.0207177
## Cumulative Proportion  0.7686808 0.8998169 0.94757718 0.97487386 0.9955916
##                             Comp.6
## Standard deviation     0.162636495
## Proportion of Variance 0.004408438
## Cumulative Proportion  1.000000000
loadings(pcaobj)
## 
## Loadings:
##          Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SAT       0.458         0.187  0.131         0.858
## Top10     0.427 -0.200  0.498  0.375  0.482 -0.396
## Accept   -0.424  0.321 -0.156         0.801  0.217
## SFRatio  -0.391 -0.433  0.606 -0.507         0.172
## Expenses  0.363  0.634  0.205 -0.623        -0.174
## GradRate  0.379 -0.516 -0.532 -0.439  0.338       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.167  0.167  0.167  0.167  0.167  0.167
## Cumulative Var  0.167  0.333  0.500  0.667  0.833  1.000
plot(pcaobj)

pcaobj$scores
##            Comp.1      Comp.2      Comp.3      Comp.4      Comp.5
##  [1,]  1.00987445 -1.06430962  0.08106631  0.05695064 -0.12875425
##  [2,]  2.82223781  2.25904458  0.83682883  0.14384464 -0.12596191
##  [3,] -1.11246577  1.63120889 -0.26678684  1.07507502 -0.19181415
##  [4,]  0.74174122 -0.04218747  0.06050086 -0.15720812 -0.57761139
##  [5,]  0.31191206 -0.63524357  0.01024052  0.17136367  0.01272613
##  [6,]  1.69669089 -0.34436328 -0.25340751  0.01256433 -0.05266060
##  [7,]  1.24682093 -0.49098366 -0.03209382 -0.20564378  0.29350534
##  [8,]  0.33874978 -0.78516859 -0.49358483  0.03985631 -0.54497862
##  [9,]  2.37415013 -0.38653888  0.11609839 -0.45336562 -0.23010830
## [10,]  1.40327739  2.11951503 -0.44282714 -0.63254327  0.23005353
## [11,]  1.72610332  0.08823712  0.17040366  0.26090191  0.23331838
## [12,]  0.45085748 -0.01113295 -0.17574605  0.23616563  0.26325070
## [13,] -0.04023814 -1.00920438 -0.49651717  0.22929876  0.44803192
## [14,] -3.23373034 -0.37458049 -0.49537282 -0.52123771 -0.63929481
## [15,]  2.23626502 -0.37179329 -0.39899365  0.40696648 -0.41676068
## [16,] -5.17299212  0.77991535 -0.38591233 -0.23221171  0.17928698
## [17,]  1.69964377 -0.30559745  0.31850785 -0.29746268 -0.16342468
## [18,] -4.57814600 -0.34759136  1.49964176 -0.45425171 -0.19114197
## [19,] -0.82260312 -0.69890615  1.42781145  0.76077880  0.18426033
## [20,]  0.09776213  0.65044645  0.10050844 -0.50009719  0.48721782
## [21,] -1.96318260 -0.22476756 -0.25588143 -0.04847410  0.82274566
## [22,]  0.54228894 -0.07958884 -0.30539348  0.13169876  0.05273991
## [23,] -0.53222092 -1.01716720 -0.42371636  0.16953571  0.35781321
## [24,] -3.54869664  0.77846167 -0.44936332  0.32367862 -0.35833256
## [25,]  2.30590032 -0.11770432  0.25398866 -0.51618337  0.05589401
##             Comp.6
##  [1,] -0.034649638
##  [2,] -0.180703168
##  [3,]  0.345679459
##  [4,]  0.109163092
##  [5,] -0.016921270
##  [6,] -0.027166160
##  [7,] -0.078011984
##  [8,] -0.155371653
##  [9,]  0.266983932
## [10,] -0.235615124
## [11,]  0.238968449
## [12,] -0.314843521
## [13,]  0.004939215
## [14,] -0.090047785
## [15,]  0.050618633
## [16,] -0.030904694
## [17,]  0.114422592
## [18,]  0.104149297
## [19,] -0.251103268
## [20,]  0.219242132
## [21,]  0.152246521
## [22,] -0.036726444
## [23,] -0.066098999
## [24,] -0.077456415
## [25,] -0.010793201
mydata<-cbind(mydata,pcaobj$scores[,1:3])
clus_data<-mydata[,8:10]
norm_clus<-scale(clus_data)
dist1<-dist(norm_clus,method = "euclidean")
fit1<-hclust(dist1,method="complete")
plot(fit1)

plot(fit1,hang=-1)

groups<-cutree(fit1,5)
membership<-as.matrix(groups)
final<-cbind(membership,mydata)
View(aggregate(final[,-c(2,9:18)],by=list(membership),FUN=mean))
getwd()
## [1] "E:/Data science"