# 1. Load data ------------------------------------------------------------

univ <- read.csv("Universities.csv", header = TRUE)
head(univ)
##                        College.Name State Public.vs.Private No.appli.rec
## 1         Alaska Pacific University    AK                 2          193
## 2 University of Alaska at Fairbanks    AK                 1         1852
## 3    University of Alaska Southeast    AK                 1          146
## 4 University of Alaska at Anchorage    AK                 1         2065
## 5       Alabama Agri. & Mech. Univ.    AL                 1         2817
## 6               Faulkner University    AL                 2          345
##   No.appl.accepted No.new.stud.enrolled Per.new.stud.from.top.10
## 1              146                   55                       16
## 2             1427                  928                       NA
## 3              117                   89                        4
## 4             1598                 1162                       NA
## 5             1920                  984                       NA
## 6              320                  179                       NA
##   Per.new.stud.from.top.25 No.FT.undergrad No.PT.undergrad in.state.tuition
## 1                       44             249             869             7560
## 2                       NA            3885            4519             1742
## 3                       24             492            1849             1742
## 4                       NA            6209           10537             1742
## 5                       NA            3958             305             1700
## 6                       27            1367             578             5600
##   out.of.state.tuition room board add.fees estim.book.costs
## 1                 7560 1620  2500      130              800
## 2                 5226 1800  1790      155              650
## 3                 5226 2514  2250       34              500
## 4                 5226 2600  2520      114              580
## 5                 3400 1108  1442      155              500
## 6                 5600 1550  1700      300              350
##   estim.personal.dollar Per.fac.wvs.PHD stud.vs.fac.ratio Graduation.rate
## 1                  1500              76              11.9              15
## 2                  2304              67              10.0              NA
## 3                  1162              39               9.5              39
## 4                  1260              48              13.7              NA
## 5                   850              53              14.3              40
## 6                    NA              52              32.8              55
# 2. Remove records with missing data -------------------------------------------------

univ_complete <- na.omit(univ)
# 3. Hierarchical clustering using normalised data, complete linkage and Manhattan  --------

# 3.1 Remove categorical variables and ID----------------------------------------
# These are college name, state and public vs private

univ_cont <- univ_complete[, -c(1:3)]
head(univ_cont)
##    No.appli.rec No.appl.accepted No.new.stud.enrolled Per.new.stud.from.top.10
## 1           193              146                   55                       16
## 3           146              117                   89                        4
## 10          805              588                  287                       67
## 12          608              520                  127                       26
## 22         4414             1500                  335                       30
## 26         1797             1260                  938                       24
##    Per.new.stud.from.top.25 No.FT.undergrad No.PT.undergrad in.state.tuition
## 1                        44             249             869             7560
## 3                        24             492            1849             1742
## 10                       88            1376             207            11660
## 12                       47             538             126             8080
## 22                       60             908             119             5666
## 26                       35            6960            4698             2220
##    out.of.state.tuition room board add.fees estim.book.costs
## 1                  7560 1620  2500      130              800
## 3                  5226 2514  2250       34              500
## 10                11660 2050  2430      120              400
## 12                 8080 1380  2540      100              500
## 22                 5666 1424  1540      418             1000
## 26                 4440 1935  3240      291              750
##    estim.personal.dollar Per.fac.wvs.PHD stud.vs.fac.ratio Graduation.rate
## 1                   1500              76              11.9              15
## 3                   1162              39               9.5              39
## 10                   900              74              14.0              72
## 12                  1100              63              11.4              44
## 22                  1400              56              15.5              46
## 26                  2200              96               6.7              33
names(univ_cont)
##  [1] "No.appli.rec"             "No.appl.accepted"        
##  [3] "No.new.stud.enrolled"     "Per.new.stud.from.top.10"
##  [5] "Per.new.stud.from.top.25" "No.FT.undergrad"         
##  [7] "No.PT.undergrad"          "in.state.tuition"        
##  [9] "out.of.state.tuition"     "room"                    
## [11] "board"                    "add.fees"                
## [13] "estim.book.costs"         "estim.personal.dollar"   
## [15] "Per.fac.wvs.PHD"          "stud.vs.fac.ratio"       
## [17] "Graduation.rate"
# 3.2 Normalise data ------------------------------------------------------

univ_cont_norm <- sapply(univ_cont, scale)
head(univ_cont_norm)
##      No.appli.rec No.appl.accepted No.new.stud.enrolled
## [1,]   -0.7253139       -0.7656329           -0.7925715
## [2,]   -0.7368529       -0.7772155           -0.7554388
## [3,]   -0.5750612       -0.5890979           -0.5391950
## [4,]   -0.6234268       -0.6162571           -0.7139374
## [5,]    0.3109878       -0.2248447           -0.4867723
## [6,]   -0.3315143       -0.3207008            0.1717883
##      Per.new.stud.from.top.10 Per.new.stud.from.top.25 No.FT.undergrad
## [1,]               -0.6500683               -0.5732933      -0.7097404
## [2,]               -1.2994472               -1.5573355      -0.6576975
## [3,]                2.1097921                1.5915994      -0.4683728
## [4,]               -0.1089192               -0.4256870      -0.6478457
## [5,]                0.1075405                0.2139404      -0.5686035
## [6,]               -0.2171490               -1.0161123       0.7275427
##      No.PT.undergrad in.state.tuition out.of.state.tuition       room
## [1,]       0.0462840       -0.3347297           -0.6993021 -0.8428467
## [2,]       0.6802614       -1.3893276           -1.2406234  0.4106795
## [3,]      -0.3819742        0.4084555            0.2516051 -0.2399203
## [4,]      -0.4343744       -0.2404720           -0.5786992 -1.1793639
## [5,]      -0.4389028       -0.6780450           -1.1385749 -1.1176691
## [6,]       2.5233243       -1.3026831           -1.4229193 -0.4011680
##           board   add.fees estim.book.costs estim.personal.dollar
## [1,]  0.6669350 -0.6997824        1.5394532             0.2758088
## [2,]  0.2259098 -0.9695550       -0.2989446            -0.2199034
## [3,]  0.5434479 -0.7278837       -0.9117438            -0.6041537
## [4,]  0.7374990 -0.7840863       -0.2989446            -0.3108329
## [5,] -1.0266018  0.1095355        2.7650518             0.1291484
## [6,]  1.9723696 -0.2473512        1.2330536             1.3024317
##      Per.fac.wvs.PHD stud.vs.fac.ratio Graduation.rate
## [1,]      0.16752615      -0.529035524      -2.7862940
## [2,]     -2.05260943      -1.144600894      -1.4637549
## [3,]      0.04751883       0.009584174       0.3547362
## [4,]     -0.61252148      -0.657278310      -1.1882260
## [5,]     -1.03254714       0.394312530      -1.0780144
## [6,]      1.36759945      -1.862760492      -1.7943897
# 3.3 Compute normalised distance using manhattan distance ----------------


# create the distance matrix using the following syntax

distance_matrix <- dist(univ_cont_norm, method = "manhattan")
#as.matrix(distance_matrix)
# 3.4 Create hierarchical clusters -----------------------------------------

# Use the following syntax
hierarchical_cluster <- hclust(distance_matrix, method = "complete")



# plot the dendrogram using the following syntax
plot(hierarchical_cluster, hang = "-200", ann = TRUE)

# check cluster quality

library(factoextra)
## Warning: package 'factoextra' was built under R version 4.5.2
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(univ_cont_norm, 
             hcut, method = "silhouette") +
  labs(subtitle = "Silhouette method")

# 3.5 Compute cluster membership using k = ??? ------------------------------

# create a vector of memberships using the following syntax

univ_hc_m_memb <- cutree(hierarchical_cluster, k = 3)

head(univ_hc_m_memb)
## [1] 1 1 1 1 1 1
# 3.6 Create data frame from cluster membership ---------------------------


# use as.data.frame()

# change the column name if preferable
cluster_df <- as.data.frame(univ_hc_m_memb)
names(cluster_df)[1] <- "Cluster"

names(cluster_df)
## [1] "Cluster"
# 3.7 Merge with original data --------------------------------------------


# Merge with original data

# use cbind()
univ_complete_w_clusters <- cbind(univ_complete, cluster_df)

# if desired, this can be done in a different application.
# just export the merged data to csv
# write.csv(univ_complete_w_cluster, "univ_complete_w_cluster.csv")

head(univ_complete_w_clusters, 50)
##                             College.Name State Public.vs.Private No.appli.rec
## 1              Alaska Pacific University    AK                 2          193
## 3         University of Alaska Southeast    AK                 1          146
## 10           Birmingham-Southern College    AL                 2          805
## 12                    Huntingdon College    AL                 2          608
## 22                     Talladega College    AL                 2         4414
## 26   University of Alabama at Birmingham    AL                 1         1797
## 32       Arkansas College (Lyon College)    AR                 2          708
## 38                       Hendrix College    AR                 2          823
## 39                 John Brown University    AR                 2          605
## 46                    Harding University    AR                 2         1721
## 49           Northern Arizona University    AZ                 1         5891
## 50                 University of Arizona    AZ                 1        14079
## 63       California Polytechnic-San Luis    CA                 1         7811
## 77             Claremont McKenna College    CA                 2         1860
## 78                   Harvey Mudd College    CA                 2         1377
## 79                        Pitzer College    CA                 2         1133
## 81                       Scripps College    CA                 2          855
## 90                    Occidental College    CA                 2         2324
## 92                Fresno Pacific College    CA                 2          346
## 95                 Pepperdine University    CA                 2         3821
## 96           Southern California College    CA                 2          385
## 97      St. Mary's College of California    CA                 2         2643
## 108          University of San Francisco    CA                 2         2306
## 110    University of Southern California    CA                 2        12229
## 112                     Westmont College    CA                 1          950
## 120          Loyola Marymount University    CA                 2         3768
## 121                 Concordia University    CA                 2          688
## 122                  Adams State College    CO                 1         1508
## 123                     Colorado College    CO                 2         3207
## 126                   Fort Lewis College    CO                 1         3440
## 127                   Mesa State College    CO                 1         1584
## 130      University of Southern Colorado    CO                 1         1401
## 134    Western State College of Colorado    CO                 1         2702
## 139 Central Connecticut State University    CT                 1         4158
## 140                  Connecticut College    CT                 2         3035
## 146              Sacred Heart University    CT                 2         2307
## 148                 Saint Joseph College    CT                 2          292
## 149                      Trinity College    CT                 2         3058
## 151             University of Bridgeport    CT                 2          598
## 152               University of Hartford    CT                 2         5081
## 153                  Wesleyan University    CT                 2         4772
## 154 Eastern Connecticut State University    CT                 1         2172
## 156  University of Connecticut at Storrs    CT                 1         9735
## 158       Catholic University of America    DC                 2         1754
## 160         George Washington University    DC                 2         7875
## 161                Georgetown University    DC                 2        11115
## 164                      Trinity College    DC                 2          247
## 168               University of Delaware    DE                 2        14446
## 169                       Wesley College    DE                 2          980
## 172              Bethune Cookman College    FL                 2         1646
##     No.appl.accepted No.new.stud.enrolled Per.new.stud.from.top.10
## 1                146                   55                       16
## 3                117                   89                        4
## 10               588                  287                       67
## 12               520                  127                       26
## 22              1500                  335                       30
## 26              1260                  938                       24
## 32               334                  166                       46
## 38               721                  274                       52
## 39               405                  284                       24
## 46              1068                  806                       35
## 49              4931                 1973                       23
## 50             12238                 4529                       29
## 63              3817                 1650                       47
## 77               767                  227                       71
## 78               572                  178                       95
## 79               630                  220                       37
## 81               632                  139                       60
## 90              1319                  370                       52
## 92               274                  146                       51
## 95              2037                  680                       86
## 96               340                  193                       18
## 97              1611                  465                       36
## 108             1721                  538                       23
## 110             8498                 2477                       45
## 112              713                  351                       42
## 120             2662                  753                       42
## 121              497                  144                       30
## 122             1259                  569                       16
## 123             1577                  490                       56
## 126             2823                 1123                       16
## 127             1456                  891                        6
## 130             1239                  605                       10
## 134             1623                  604                        7
## 139             2532                  902                        6
## 140             1546                  438                       42
## 146             1896                  509                       19
## 148              241                   96                       20
## 149             1798                  478                       46
## 151              441                  153                       16
## 152             4040                 1194                       11
## 153             1973                  712                       60
## 154             1493                  564                       14
## 156             7187                 2064                       23
## 158             1465                  505                       24
## 160             5062                 1492                       38
## 161             2881                 1390                       71
## 164              189                  100                       19
## 168            10516                 3252                       22
## 169              807                  350                       10
## 172             1150                  542                       12
##     Per.new.stud.from.top.25 No.FT.undergrad No.PT.undergrad in.state.tuition
## 1                         44             249             869             7560
## 3                         24             492            1849             1742
## 10                        88            1376             207            11660
## 12                        47             538             126             8080
## 22                        60             908             119             5666
## 26                        35            6960            4698             2220
## 32                        74             530             182             8644
## 38                        87             954               6             8800
## 39                        53             961              99             6398
## 46                        75            3128             213             5504
## 49                        48           11249            2682             1828
## 50                        57           21664            4894             1828
## 63                        73           12911            1404             2033
## 77                        93             887               1            17000
## 78                       100             654               5            17230
## 79                        73             750              30            17688
## 81                        83             569               7            17238
## 90                        81            1686              35            16560
## 92                        87             704              63             9900
## 95                        96            2488             625            18200
## 96                        38             784             127             9520
## 97                        80            2615             248            13332
## 108                       48            4309             549            13226
## 110                       71           13259            1429            17230
## 112                       72            1276               9            14320
## 120                       64            3558             436            13592
## 121                       75             641             101            10800
## 122                       44            1922             232             1386
## 123                       87            1892               7            17142
## 126                       35            3793             486             1450
## 127                       18            3471             911             1434
## 130                       34            3716             675             1644
## 134                       24            2315             146             1819
## 139                       24            6394            3881             1842
## 140                       93            1630             232            18740
## 146                       51            1707            1889            11070
## 148                       52             543             712            12200
## 149                       84            1737             244            18810
## 151                       36             527             312            12500
## 152                       26            3768            1415            14220
## 153                       86            2714              27            19130
## 154                       50            2766            1531             1842
## 156                       63           12478            1660             3824
## 158                       49            2159             211            13712
## 160                       71            5471            1470            17450
## 161                       93            5881             406            18300
## 164                       49             309             639            11412
## 168                       57           14130            4522             3690
## 169                       25             872             448             9890
## 172                       30            2128              82             5188
##     out.of.state.tuition room board add.fees estim.book.costs
## 1                   7560 1620  2500      130              800
## 3                   5226 2514  2250       34              500
## 10                 11660 2050  2430      120              400
## 12                  8080 1380  2540      100              500
## 22                  5666 1424  1540      418             1000
## 26                  4440 1935  3240      291              750
## 32                  8644 2382  1540      120              500
## 38                  8800 1935  1260      325              500
## 39                  6398 1450  2222      148              400
## 46                  5504 1650  1878     1016              700
## 49                  6746 1890  1838       66              620
## 50                  7434 2186  2030       66              620
## 63                  7380 2335  2542       20              612
## 77                 17000 3160  2850      140              500
## 78                 17230 3320  3370      465              700
## 79                 17688 3192  2708     1548              650
## 81                 17238 3500  3850      112              600
## 90                 16560 2980  2160      334              558
## 92                  9900 1500  2170      174              630
## 95                 18200 3750  3020       70              500
## 96                  9520 1980  2144      340              630
## 97                 13332 3177  3177      120              630
## 108                13226 3846  2606      100              750
## 110                17230 3566  2916      330              600
## 112                14320 3080  2224      866              490
## 120                13592 3276  2640      176              545
## 121                10800 2520  1920      690              570
## 122                 1672 1820  1910      374              520
## 123                17142 2114  2076      120              450
## 126                 6198 1820  1500      327              500
## 127                 5016 1782  2016      380              540
## 130                 7100 1756  2624      324              540
## 134                 5918 1936  1819      507              500
## 139                 5962 2384  2060     1298              500
## 140                18740 2935  3365      210              600
## 146                11070 4090  1690      320              400
## 148                12200 2200  2400      200              650
## 149                18810 3540  2150      780              500
## 151                12500 3700  3110      624              500
## 152                14220 2300  3700      690              500
## 153                19130 3290  2310      660             1400
## 154                 5962 2436  1880     1513              650
## 156                11656 2552  2520      888              700
## 158                13712 3622  2786      460              526
## 160                17450 3998  2330      720              700
## 161                18300 2590  4541      156              670
## 164                11412 2900  3530      150              500
## 168                10220 2280  1950      410              530
## 169                 9890 2450  2224      405              500
## 172                 5188 1541  1855      427              650
##     estim.personal.dollar Per.fac.wvs.PHD stud.vs.fac.ratio Graduation.rate
## 1                    1500              76              11.9              15
## 3                    1162              39               9.5              39
## 10                    900              74              14.0              72
## 12                   1100              63              11.4              44
## 22                   1400              56              15.5              46
## 26                   2200              96               6.7              33
## 32                    800              79              12.6              54
## 38                   1200              82              13.1              63
## 39                   1350              68              13.3              75
## 46                    910              71              17.7              73
## 49                   2342              78              21.7              41
## 50                   2550              91              19.6              49
## 63                   2091              72              19.8              59
## 77                    850              99               9.6              87
## 78                    900             100               8.2             100
## 79                    850             100              10.4              73
## 81                    800              95               8.2              73
## 90                   1152              91              10.5              79
## 92                   1818              59              10.5              54
## 95                    700              95              11.6              66
## 96                   1818              63              18.6              43
## 97                   1584              88              16.1              78
## 108                  2450              86              13.6              62
## 110                  2210              90              11.4              68
## 112                  1410              77              14.9              87
## 120                  1328              84              14.2              84
## 121                  1515              55              13.1              55
## 122                  2200              63              27.9              60
## 123                  1200              85              11.3              84
## 126                  2500              89              19.1              46
## 127                  2256              48              28.8              59
## 130                  2948              63              19.4              36
## 134                  2050              76              19.4              52
## 139                   985              69              16.7              49
## 140                   500              86              10.7              91
## 146                   600              71              14.8              82
## 148                   950              87              11.2              76
## 149                   680              91              10.4              91
## 151                   500              76               7.9              51
## 152                  1440              61              10.7              66
## 153                  1400              90              12.1              92
## 154                   500              71              16.9              50
## 156                  2300              89              16.0              71
## 158                  1100              90               9.3              75
## 160                   950              92               7.6              72
## 161                  1700              91               7.2              95
## 164                   900              89               8.3              96
## 168                  1300              82              18.3              75
## 169                  1350              52              14.4              84
## 172                  2500              48              13.8              58
##     Cluster
## 1         1
## 3         1
## 10        1
## 12        1
## 22        1
## 26        1
## 32        1
## 38        1
## 39        1
## 46        1
## 49        1
## 50        2
## 63        1
## 77        3
## 78        3
## 79        3
## 81        3
## 90        3
## 92        1
## 95        3
## 96        1
## 97        3
## 108       1
## 110       3
## 112       1
## 120       3
## 121       1
## 122       1
## 123       3
## 126       1
## 127       1
## 130       1
## 134       1
## 139       1
## 140       3
## 146       1
## 148       1
## 149       3
## 151       1
## 152       1
## 153       3
## 154       1
## 156       2
## 158       1
## 160       3
## 161       3
## 164       1
## 168       2
## 169       1
## 172       1
nrow(univ_cont)
## [1] 471
nrow(univ_complete)
## [1] 471
length(univ_complete_w_clusters$Cluster)
## [1] 471
length(univ_hc_m_memb)
## [1] 471
names(univ_complete_w_clusters)
##  [1] "College.Name"             "State"                   
##  [3] "Public.vs.Private"        "No.appli.rec"            
##  [5] "No.appl.accepted"         "No.new.stud.enrolled"    
##  [7] "Per.new.stud.from.top.10" "Per.new.stud.from.top.25"
##  [9] "No.FT.undergrad"          "No.PT.undergrad"         
## [11] "in.state.tuition"         "out.of.state.tuition"    
## [13] "room"                     "board"                   
## [15] "add.fees"                 "estim.book.costs"        
## [17] "estim.personal.dollar"    "Per.fac.wvs.PHD"         
## [19] "stud.vs.fac.ratio"        "Graduation.rate"         
## [21] "Cluster"
# 4. Investigate cluster characteristics ----------------------------------

# Cluster characteristics, numerical variables

# may use aggregate()

aggregate(univ_cont, 
          by = list(Cluster = univ_complete_w_clusters$Cluster), 
          FUN = mean)
##   Cluster No.appli.rec No.appl.accepted No.new.stud.enrolled
## 1       1     2027.571         1440.339             585.6202
## 2       2    14043.185         9537.704            3501.4815
## 3       3     4629.782         2397.051             754.2949
##   Per.new.stud.from.top.10 Per.new.stud.from.top.25 No.FT.undergrad
## 1                 21.82514                 49.17213        2678.484
## 2                 32.55556                 64.70370       17186.370
## 3                 55.47436                 82.92308        2997.269
##   No.PT.undergrad in.state.tuition out.of.state.tuition     room    board
## 1        706.3689         8076.637             9190.503 2062.150 2002.738
## 2       3678.0741         3871.074             9156.296 2244.407 2128.407
## 3        227.7179        17563.551            17563.551 2958.923 2679.038
##   add.fees estim.book.costs estim.personal.dollar Per.fac.wvs.PHD
## 1 360.2951         536.8169              1345.164        68.63115
## 2 684.7407         565.7778              1755.630        86.44444
## 3 361.0641         599.0513              1002.462        90.10256
##   stud.vs.fac.ratio Graduation.rate
## 1          14.60956        61.43443
## 2          16.37037        67.44444
## 3          10.09359        84.28205
# Cluster characteristics, categorical variables

# may use table()

table(univ_complete_w_clusters$Cluster, univ_complete_w_clusters$State)
##    
##     AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL IN KS KY LA MA MD ME MI MN MO
##   1  2  4  4  1  6  5  6  2  1  6  5  1 17  2 10 14  7  6  4  9  2  4 11  8 14
##   2  0  0  0  1  0  0  1  0  1  0  1  0  0  0  1  0  0  0  0  3  1  0  2  1  0
##   3  0  0  0  0  9  1  3  2  0  2  1  0  1  0  4  1  0  0  1 10  0  2  0  2  1
##    
##     MS MT NC ND NE NH NJ NM NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI WV WY
##   1  5  2 19  5  7  4 10  2 25 17  6  4 31  2  9  4 12 17  2 13  7  1  8  2  1
##   2  0  0  2  0  0  1  1  0  1  4  0  0  2  1  0  0  0  2  0  1  0  0  0  0  0
##   3  0  0  2  0  0  1  2  0 12  3  0  1  9  1  0  0  3  1  0  1  0  1  1  0  0
table(univ_complete_w_clusters$Cluster, univ_complete_w_clusters$Public.vs.Private)
##    
##       1   2
##   1 104 262
##   2  24   3
##   3   0  78

Interpretation:

This clustering method parts our data into three different types of universities, each with their own distinct characteristics.

Our first cluster includes smaller schools, mostly private with a mix of some public. The application counts and enrolling counts are lowest in this cluster, but the selection process isn’t as rigorous as cluster 3.

The second cluster includes mostly public schools, where the average number of applicants is 14,043. Tuition is lowest in this cluster, which aligns with the affordability of in-state public colleges. This cluster is still moderately selective, but still doesn’t have the selection requirements of cluster 3.

Cluster three includes exclusively private schools. Applicants are twice as common for this cluster compared to cluster 1, but the enrolled student count is only less than 200 above cluster 1. This rate, plus the 83% of students enrolled in the top 25% nationally, signify that these schools have the most competent and prepared students of all three clusters. Furthermore, graduation rate is 84% for this group, by far the highest of the three clusters.

These clusters make sense to me, as two clusters deal with high-end private schools and public schools respectively, with the third cluster encompassing the remaining lower-prestige private schools and smaller public schools.