mydata<-read.csv("E:\\Data science\\Universities.csv")
View(mydata)
data<-mydata[-1]
attach(data)
cor(data)
## SAT Top10 Accept SFRatio Expenses GradRate
## SAT 1.0000000 0.9225222 -0.8858496 -0.8125517 0.7789760 0.7477120
## Top10 0.9225222 1.0000000 -0.8591811 -0.6434351 0.6114666 0.7459420
## Accept -0.8858496 -0.8591811 1.0000000 0.6316636 -0.5584395 -0.8195495
## SFRatio -0.8125517 -0.6434351 0.6316636 1.0000000 -0.7818394 -0.5609217
## Expenses 0.7789760 0.6114666 -0.5584395 -0.7818394 1.0000000 0.3935914
## GradRate 0.7477120 0.7459420 -0.8195495 -0.5609217 0.3935914 1.0000000
summary(data)
## SAT Top10 Accept SFRatio
## Min. :1005 Min. : 28.00 Min. :14.0 Min. : 6.00
## 1st Qu.:1240 1st Qu.: 74.00 1st Qu.:24.0 1st Qu.:11.00
## Median :1285 Median : 81.00 Median :36.0 Median :12.00
## Mean :1266 Mean : 76.48 Mean :39.2 Mean :12.72
## 3rd Qu.:1340 3rd Qu.: 90.00 3rd Qu.:50.0 3rd Qu.:14.00
## Max. :1415 Max. :100.00 Max. :90.0 Max. :25.00
## Expenses GradRate
## Min. : 8704 Min. :67.00
## 1st Qu.:15140 1st Qu.:81.00
## Median :27553 Median :90.00
## Mean :27388 Mean :86.72
## 3rd Qu.:34870 3rd Qu.:94.00
## Max. :63575 Max. :97.00
normdata<-scale(data)
pcaobj<-princomp(normdata,cor=TRUE)
summary(pcaobj)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.1475766 0.8870266 0.53531473 0.40469755 0.3525708
## Proportion of Variance 0.7686808 0.1311360 0.04776031 0.02729668 0.0207177
## Cumulative Proportion 0.7686808 0.8998169 0.94757718 0.97487386 0.9955916
## Comp.6
## Standard deviation 0.162636495
## Proportion of Variance 0.004408438
## Cumulative Proportion 1.000000000
loadings(pcaobj)
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SAT 0.458 0.187 0.131 0.858
## Top10 0.427 -0.200 0.498 0.375 0.482 -0.396
## Accept -0.424 0.321 -0.156 0.801 0.217
## SFRatio -0.391 -0.433 0.606 -0.507 0.172
## Expenses 0.363 0.634 0.205 -0.623 -0.174
## GradRate 0.379 -0.516 -0.532 -0.439 0.338
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.167 0.167 0.167 0.167 0.167 0.167
## Cumulative Var 0.167 0.333 0.500 0.667 0.833 1.000
plot(pcaobj)

pcaobj$scores
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## [1,] 1.00987445 -1.06430962 0.08106631 0.05695064 -0.12875425
## [2,] 2.82223781 2.25904458 0.83682883 0.14384464 -0.12596191
## [3,] -1.11246577 1.63120889 -0.26678684 1.07507502 -0.19181415
## [4,] 0.74174122 -0.04218747 0.06050086 -0.15720812 -0.57761139
## [5,] 0.31191206 -0.63524357 0.01024052 0.17136367 0.01272613
## [6,] 1.69669089 -0.34436328 -0.25340751 0.01256433 -0.05266060
## [7,] 1.24682093 -0.49098366 -0.03209382 -0.20564378 0.29350534
## [8,] 0.33874978 -0.78516859 -0.49358483 0.03985631 -0.54497862
## [9,] 2.37415013 -0.38653888 0.11609839 -0.45336562 -0.23010830
## [10,] 1.40327739 2.11951503 -0.44282714 -0.63254327 0.23005353
## [11,] 1.72610332 0.08823712 0.17040366 0.26090191 0.23331838
## [12,] 0.45085748 -0.01113295 -0.17574605 0.23616563 0.26325070
## [13,] -0.04023814 -1.00920438 -0.49651717 0.22929876 0.44803192
## [14,] -3.23373034 -0.37458049 -0.49537282 -0.52123771 -0.63929481
## [15,] 2.23626502 -0.37179329 -0.39899365 0.40696648 -0.41676068
## [16,] -5.17299212 0.77991535 -0.38591233 -0.23221171 0.17928698
## [17,] 1.69964377 -0.30559745 0.31850785 -0.29746268 -0.16342468
## [18,] -4.57814600 -0.34759136 1.49964176 -0.45425171 -0.19114197
## [19,] -0.82260312 -0.69890615 1.42781145 0.76077880 0.18426033
## [20,] 0.09776213 0.65044645 0.10050844 -0.50009719 0.48721782
## [21,] -1.96318260 -0.22476756 -0.25588143 -0.04847410 0.82274566
## [22,] 0.54228894 -0.07958884 -0.30539348 0.13169876 0.05273991
## [23,] -0.53222092 -1.01716720 -0.42371636 0.16953571 0.35781321
## [24,] -3.54869664 0.77846167 -0.44936332 0.32367862 -0.35833256
## [25,] 2.30590032 -0.11770432 0.25398866 -0.51618337 0.05589401
## Comp.6
## [1,] -0.034649638
## [2,] -0.180703168
## [3,] 0.345679459
## [4,] 0.109163092
## [5,] -0.016921270
## [6,] -0.027166160
## [7,] -0.078011984
## [8,] -0.155371653
## [9,] 0.266983932
## [10,] -0.235615124
## [11,] 0.238968449
## [12,] -0.314843521
## [13,] 0.004939215
## [14,] -0.090047785
## [15,] 0.050618633
## [16,] -0.030904694
## [17,] 0.114422592
## [18,] 0.104149297
## [19,] -0.251103268
## [20,] 0.219242132
## [21,] 0.152246521
## [22,] -0.036726444
## [23,] -0.066098999
## [24,] -0.077456415
## [25,] -0.010793201
mydata<-cbind(mydata,pcaobj$scores[,1:3])
clus_data<-mydata[,8:10]
norm_clus<-scale(clus_data)
dist1<-dist(norm_clus,method = "euclidean")
fit1<-hclust(dist1,method="complete")
plot(fit1)

plot(fit1,hang=-1)

groups<-cutree(fit1,5)
membership<-as.matrix(groups)
final<-cbind(membership,mydata)
View(aggregate(final[,-c(2,9:18)],by=list(membership),FUN=mean))
getwd()
## [1] "E:/Data science"