# Example 
# ---
# Question: Implement the hierarchical clustering algorithm using the Arrests dataset
# ---
# OUR CODE GOES BELOW
# 

# Loading the data set
# ---
#
data("USArrests")
# Remove any missing value (i.e, NA values for not available)
# That might be present in the data
# ---
# 
df <- na.omit(USArrests)
# Previewing our dataset
# ---
#
head(df)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
# Before hierarchical clustering, we can compute some descriptive statistics
# ---
# 
desc_stats <- data.frame(
  Min = apply(df, 2, min),    # minimum
  Med = apply(df, 2, median), # median
  Mean = apply(df, 2, mean),  # mean
  SD = apply(df, 2, sd),      # Standard deviation
  Max = apply(df, 2, max)     # Maximum
)
desc_stats <- round(desc_stats, 1)
head(desc_stats)
##           Min   Med  Mean   SD   Max
## Murder    0.8   7.2   7.8  4.4  17.4
## Assault  45.0 159.0 170.8 83.3 337.0
## UrbanPop 32.0  66.0  65.5 14.5  91.0
## Rape      7.3  20.1  21.2  9.4  46.0
#the above can be done using describe() as well
library(psych)
## Warning: package 'psych' was built under R version 4.1.3
describe(df)
##          vars  n   mean    sd median trimmed    mad  min   max range  skew
## Murder      1 50   7.79  4.36   7.25    7.53   5.41  0.8  17.4  16.6  0.37
## Assault     2 50 170.76 83.34 159.00  168.48 110.45 45.0 337.0 292.0  0.22
## UrbanPop    3 50  65.54 14.47  66.00   65.88  17.79 32.0  91.0  59.0 -0.21
## Rape        4 50  21.23  9.37  20.10   20.36   8.60  7.3  46.0  38.7  0.75
##          kurtosis    se
## Murder      -0.95  0.62
## Assault     -1.15 11.79
## UrbanPop    -0.87  2.05
## Rape         0.08  1.32
# We note that the variables have a large different means and variances. 
# This is explained by the fact that the variables are measured in different 
# units; Murder, Rape, and Assault are measured as the number of occurrences per 100 000 people, 
# and UrbanPop is the percentage of the state’s population that lives in an urban area.
# They must be standardized (i.e., scaled) to make them comparable. Recall that, 
# standardization consists of transforming the variables such that 
# they have mean zero and standard deviation one.
#As we don’t want the hierarchical clustering result to depend to an arbitrary variable unit, 
# we start by scaling the data using the R function scale() as follows
# ---
# 
df <- scale(df)
head(df)
##                Murder   Assault   UrbanPop         Rape
## Alabama    1.24256408 0.7828393 -0.5209066 -0.003416473
## Alaska     0.50786248 1.1068225 -1.2117642  2.484202941
## Arizona    0.07163341 1.4788032  0.9989801  1.042878388
## Arkansas   0.23234938 0.2308680 -1.0735927 -0.184916602
## California 0.27826823 1.2628144  1.7589234  2.067820292
## Colorado   0.02571456 0.3988593  0.8608085  1.864967207
# We now use the R function hclust() for hierarchical clustering
# ---
# 

# First we use the dist() function to compute the Euclidean distance between observations, 
# d will be the first argument in the hclust() function dissimilarity matrix
# ---
#
d <- dist(df, method = "euclidean")
d
##                  Alabama    Alaska   Arizona  Arkansas California  Colorado
## Alaska         2.7037541                                                   
## Arizona        2.2935197 2.7006429                                         
## Arkansas       1.2898102 2.8260386 2.7177583                               
## California     3.2631104 3.0125415 1.3104842 3.7636409                     
## Colorado       2.6510673 2.3265187 1.3650307 2.8310512  1.2876185          
## Connecticut    3.2152975 4.7399125 3.2628575 2.6076395  4.0663898 3.3279920
## Delaware       2.0192927 3.6213633 1.9093696 1.8003239  3.0737852 2.5547456
## Florida        2.2981353 2.9967642 1.7493928 3.3721968  2.0250039 2.4458600
## Georgia        1.1314351 2.8194388 2.7871963 2.2117614  3.3780585 2.8649105
## Hawaii         3.3885300 4.5301340 3.2621208 2.9723097  3.6589083 2.8233524
## Idaho          2.9146623 4.0580555 3.5210071 1.7687255  4.4879436 3.4767685
## Illinois       1.8734993 3.2670626 1.0825512 2.4626424  1.9117469 1.7898322
## Indiana        2.0761411 3.3655952 2.6407486 1.4450503  3.4061273 2.3655622
## Iowa           3.4878952 4.7251910 4.1157513 2.4252661  4.9708591 3.9406898
## Kansas         2.2941096 3.6808173 2.7762838 1.5718411  3.6071725 2.6272281
## Kentucky       1.8475879 3.5440903 3.3567681 1.0598104  4.2463809 3.2274013
## Louisiana      0.7722224 2.9631431 2.2178519 2.0254276  3.0176625 2.6546743
## Maine          3.4851115 4.8322605 4.2961903 2.3621893  5.2699843 4.2713441
## Maryland       1.2896460 2.2777590 1.2117356 2.0582244  2.2312581 1.9667562
## Massachusetts  2.9874810 4.3729925 2.5162281 2.6881270  3.2156499 2.6522793
## Michigan       1.8814771 2.1154937 1.1940906 2.5895050  1.5146739 1.2363108
## Minnesota      3.2314338 4.4266606 3.5388450 2.3300992  4.3123134 3.3283853
## Mississippi    1.2831907 3.2554326 3.4551406 1.9318631  4.4200736 3.8491042
## Missouri       1.6309686 2.5360573 1.5958731 1.6717500  2.2891751 1.3127406
## Montana        2.3317271 3.6575988 3.3270869 1.2290066  4.2494176 3.1845338
## Nebraska       2.6625170 3.9136902 3.1641791 1.7240495  4.0197242 3.0034613
## Nevada         3.1024305 2.3443182 1.9260292 3.7086787  1.1968261 1.3988595
## New Hampshire  3.5619825 4.8650686 4.2430411 2.4949861  5.1270892 4.1126287
## New Jersey     2.6980230 4.1791832 2.1755787 2.7398478  2.7463023 2.3229870
## New Mexico     1.5993970 2.0580889 1.0376848 2.3183196  1.8010201 1.5467439
## New York       2.0723680 3.2903769 1.0725219 2.7478626  1.6787069 1.7363385
## North Carolina 1.6043662 3.2403071 3.1478947 2.0717938  4.2802569 3.8649275
## North Dakota   4.0614988 5.2110254 4.9319844 2.8756492  5.8660699 4.8014019
## Ohio           2.2698519 3.5903348 2.3585705 1.9617104  3.0133425 2.1188236
## Oklahoma       1.9570874 3.3416664 2.2648377 1.4224574  3.1488712 2.2263966
## Oregon         2.3705678 2.6990696 2.0008664 1.8477626  2.6574019 1.5331980
## Pennsylvania   2.5161340 4.1239537 2.9188907 1.9739986  3.7144562 2.8541709
## Rhode Island   3.3951297 5.0629572 3.0570151 3.0883430  3.8883995 3.4810739
## South Carolina 0.9157968 2.5640542 2.7992041 1.7074195  3.7546959 3.2131137
## South Dakota   3.0835587 4.2467198 4.1020099 1.8724822  5.0529153 3.9667318
## Tennessee      0.8407489 2.3362541 2.2989846 1.4254486  3.0119267 2.1972111
## Texas          1.6463225 3.1527905 1.6448574 2.3505545  2.1698156 1.7947199
## Utah           3.0906007 3.9480881 2.5244431 2.6049855  3.0701663 2.2461228
## Vermont        3.9791527 4.8707876 5.1003665 2.7442984  6.0323504 4.8924735
## Virginia       1.4859733 3.0492081 2.3106550 0.9971035  3.2159723 2.2622539
## Washington     2.6481824 3.2715253 2.1399117 2.1313402  2.7746720 1.7897920
## West Virginia  3.1243471 4.5004558 4.4974190 1.9951691  5.4883565 4.4210375
## Wisconsin      3.5047330 4.8711543 3.9425867 2.6102451  4.7354960 3.7846917
## Wyoming        1.8291027 3.4993456 2.6923028 0.9912639  3.7242766 2.8211492
##                Connecticut  Delaware   Florida   Georgia    Hawaii     Idaho
## Alaska                                                                      
## Arizona                                                                     
## Arkansas                                                                    
## California                                                                  
## Colorado                                                                    
## Connecticut                                                                 
## Delaware         1.7568475                                                  
## Florida          4.4700701 3.0614170                                        
## Georgia          3.9738227 2.9838715 2.1812958                              
## Hawaii           1.3843291 2.4748807 4.3596338 3.8105218                    
## Idaho            1.6354214 2.0382540 4.6999827 3.8005715 2.3658101          
## Illinois         2.7400560 1.5584719 1.7711863 2.3135778 2.7329756 3.2728945
## Indiana          1.6147898 1.6973340 3.6150778 2.6924143 1.5460727 1.4923351
## Iowa             1.5470089 2.6068606 5.2682765 4.2517889 2.1564575 0.8584962
## Kansas           1.2280424 1.5510864 3.8424558 3.0071474 1.4648766 1.2103118
## Kentucky         2.3346386 2.2514939 3.9474983 2.4408198 2.5203345 1.6565236
## Louisiana        3.5329409 2.3266996 1.7529677 0.8592544 3.5687157 3.5283772
## Maine            1.8792141 2.6560808 5.3946798 4.3334217 2.7160558 0.8486112
## Maryland         3.4968269 1.9624834 1.4355204 1.8388691 3.6148670 3.4014584
## Massachusetts    0.9468199 1.4382527 3.7753087 3.6706708 1.3276676 2.2201020
## Michigan         3.7037870 2.5165292 1.3357020 1.9185489 3.4123472 3.7775301
## Minnesota        0.9843793 2.1652930 4.7635252 3.9621842 1.4673850 1.0124936
## Mississippi      4.1762631 3.0510628 3.0886673 1.5828594 4.4777223 3.6002946
## Missouri         2.4383227 1.6723281 2.5182466 2.1021909 2.1832480 2.4697182
## Montana          1.8584328 2.0306850 4.2696476 3.0967288 2.2488801 0.8286936
## Nebraska         1.2116949 1.8113430 4.3082894 3.4295510 1.6628657 0.7515014
## Nevada           4.5868149 3.5920897 1.9500388 2.9023041 4.0281974 4.7300228
## New Hampshire    1.6169000 2.6744233 5.3778074 4.3427351 2.3112009 0.9249563
## New Jersey       1.6108823 1.5808719 3.1900596 3.1989350 1.5050500 2.7425260
## New Mexico       3.6233659 2.2271650 1.2965798 1.9015384 3.5506088 3.5883476
## New York         3.0239174 1.8992106 1.5730970 2.3634498 2.9055803 3.5910319
## North Carolina   4.1894604 2.7475286 2.9994188 2.3351307 4.7330517 3.5929592
## North Dakota     2.5099838 3.3615239 6.0356613 4.8596758 3.1974906 1.4144557
## Ohio             1.4443671 1.5838515 3.3897305 2.8043208 1.1494313 1.9647327
## Oklahoma         1.4510623 1.1802929 3.3553471 2.7121515 1.6585736 1.5168111
## Oregon           2.1756954 1.7742778 3.3399718 2.9998878 2.0031861 1.9757247
## Pennsylvania     0.8721491 1.5894850 3.9389869 3.1817981 1.2119256 1.5171866
## Rhode Island     1.0756115 1.6230495 4.2314871 4.1832075 2.0590981 2.4592705
## South Carolina   4.0127954 2.7039667 2.5295912 1.3970074 4.2531214 3.4549959
## South Dakota     2.2397424 2.6722813 5.1015141 3.8729745 2.8044891 0.8070290
## Tennessee        3.2302375 2.3195070 2.3992285 1.0122252 3.0747375 2.9234395
## Texas            2.8734475 2.0031365 1.8537984 1.7575559 2.5901696 3.3172180
## Utah             1.2825907 1.8080931 3.9274528 3.7183994 1.0709720 2.0268663
## Vermont          3.2066152 3.7144653 6.0766416 4.7091538 3.7208347 1.7797462
## Virginia         1.9277004 1.4088230 3.1515587 2.2249559 2.0479238 1.6999289
## Washington       1.6963486 1.6350170 3.5570666 3.3016469 1.5452901 1.8861921
## West Virginia    2.7117590 3.0381601 5.3004067 3.8545331 3.2831874 1.4398440
## Wisconsin        1.0354597 2.4410507 5.1085370 4.2281611 1.6666970 1.2105401
## Wyoming          1.6218573 1.2586225 3.6325811 2.7329062 2.1883414 1.1687896
##                 Illinois   Indiana      Iowa    Kansas  Kentucky Louisiana
## Alaska                                                                    
## Arizona                                                                   
## Arkansas                                                                  
## California                                                                
## Colorado                                                                  
## Connecticut                                                               
## Delaware                                                                  
## Florida                                                                   
## Georgia                                                                   
## Hawaii                                                                    
## Idaho                                                                     
## Illinois                                                                  
## Indiana        2.2027081                                                  
## Iowa           3.7380070 1.7786548                                        
## Kansas         2.3228505 0.4287712 1.4699265                              
## Kentucky       2.8478883 1.1790552 1.9426473 1.3020180                    
## Louisiana      1.6535178 2.4957547 4.0359614 2.7284126 2.4221964          
## Maine          3.9342034 2.1029158 0.6457158 1.7913753 1.9925855 4.0901924
## Maryland       1.3429997 2.5430878 4.0642448 2.7400943 2.8229479 1.2739137
## Massachusetts  2.0080982 1.6615695 2.3510287 1.4343401 2.6284451 3.1524549
## Michigan       1.3959090 2.6118471 4.3248636 2.9020920 3.1163494 1.6677999
## Minnesota      3.1558788 1.3184866 0.7644384 0.9745872 1.9333640 3.6905974
## Mississippi    3.0869477 3.0859068 4.1603272 3.2683740 2.3898884 1.6268879
## Missouri       1.3552973 1.2203931 2.9398546 1.5192717 1.9677184 1.8362172
## Montana        2.9659043 1.0033431 1.2403561 0.9170466 0.8523702 2.9444756
## Nebraska       2.7962196 0.8570429 0.9821819 0.5279092 1.4219429 3.1706333
## Nevada         2.3891753 3.5278633 5.2227312 3.8391728 4.1644286 2.8410670
## New Hampshire  3.8490624 1.9278736 0.2058539 1.6084091 2.0093558 4.1168122
## New Jersey     1.4562775 1.7638332 2.9122979 1.7071034 2.6914828 2.6826380
## New Mexico     1.3393276 2.5909993 4.2131394 2.8356373 3.0007332 1.4911656
## New York       0.3502188 2.4628527 4.0411586 2.6096016 3.1213366 1.7495096
## North Carolina 3.0124311 3.3437548 4.2973973 3.4387635 2.8798080 1.9868618
## North Dakota   4.6139615 2.6587932 1.0534375 2.3970805 2.4482563 4.6977846
## Ohio           1.8124981 0.6976320 2.1610242 0.7817000 1.7726720 2.4996969
## Oklahoma       1.8439860 0.5303259 1.9391446 0.5198728 1.4623483 2.3535566
## Oregon         2.0743434 1.1780815 2.4662295 1.3426890 2.1388677 2.7490592
## Pennsylvania   2.3134187 0.8412900 1.5708895 0.5456840 1.5944097 2.8440845
## Rhode Island   2.5057761 2.3335609 2.5453686 2.0087021 3.0457816 3.5648047
## South Carolina 2.6163680 2.8469842 4.1015324 3.0609333 2.4166385 1.3151908
## South Dakota   3.8004708 1.8411735 0.9886706 1.6701106 1.5114990 3.7457555
## Tennessee      1.9478353 1.8100316 3.4176329 2.1533060 1.7489942 1.1298534
## Texas          0.8241352 2.0035762 3.6962443 2.2378289 2.5297839 1.3325285
## Utah           2.2771632 1.4019666 2.1682069 1.2751603 2.5461745 3.3440990
## Vermont        4.8624402 2.8667983 1.7298425 2.7298377 2.3888326 4.6795933
## Virginia       1.8624960 0.6127246 2.1704984 0.8351949 1.0918624 1.9554079
## Washington     2.0612962 1.1405746 2.2502832 1.1579118 2.2630242 2.9705622
## West Virginia  4.1148082 2.2478563 1.5256890 2.1244674 1.5236299 3.7947215
## Wisconsin      3.4790637 1.6806129 0.6318069 1.3242947 2.0950212 3.9559184
## Wyoming        2.2643574 0.8898783 1.7194683 0.7588728 1.0694408 2.3837077
##                    Maine  Maryland Massachusetts  Michigan Minnesota
## Alaska                                                              
## Arizona                                                             
## Arkansas                                                            
## California                                                          
## Colorado                                                            
## Connecticut                                                         
## Delaware                                                            
## Florida                                                             
## Georgia                                                             
## Hawaii                                                              
## Idaho                                                               
## Illinois                                                            
## Indiana                                                             
## Iowa                                                                
## Kansas                                                              
## Kentucky                                                            
## Louisiana                                                           
## Maine                                                               
## Maryland       4.1259083                                            
## Massachusetts  2.6920282 2.9743193                                  
## Michigan       4.5333420 1.0800988     3.0576915                    
## Minnesota      1.2980362 3.6448929     1.6587245 3.7995101          
## Mississippi    4.0014591 2.2992240     4.1217248 2.9722824 4.1067600
## Missouri       3.2055955 1.5705755     1.9810531 1.4068840 2.4088795
## Montana        1.3271199 3.0249456     2.2919046 3.3348908 1.2662635
## Nebraska       1.3218907 3.1309065     1.6863806 3.3478988 0.6083415
## Nevada         5.5153139 2.2551337     3.8556049 1.2609417 4.6391114
## New Hampshire  0.4995971 4.1663744     2.4573524 4.4646172 0.9279247
## New Jersey     3.2532459 2.6263456     0.7977642 2.5678440 2.2254151
## New Mexico     4.3460538 0.5353893     3.0274701 0.5782474 3.7377675
## New York       4.2595904 1.4362170     2.2479437 1.2897453 3.4391596
## North Carolina 4.0631653 2.0542355     4.0773401 3.0232021 4.2219622
## North Dakota   0.7305609 4.7423030     3.3446903 5.1171939 1.8065731
## Ohio           2.5455752 2.5061694     1.1567960 2.4459855 1.5216293
## Oklahoma       2.1929825 2.2492942     1.3383233 2.4336743 1.4198434
## Oregon         2.7813372 2.2466329     1.8709252 2.1626274 1.9270100
## Pennsylvania   1.9197571 2.9585539     1.1337883 3.1048542 1.0106613
## Rhode Island   2.7331079 3.4379146     0.9440940 3.7320501 2.0310592
## South Carolina 4.0015575 1.6165582     3.8310425 2.3233363 3.9484630
## South Dakota   0.7812991 3.7991896     2.8925136 4.1744724 1.4990317
## Tennessee      3.5420469 1.5202431     2.9678843 1.5970196 3.1023238
## Texas          3.9386296 1.5431868     2.2593978 1.2888621 3.1438264
## Utah           2.6218087 3.0338001     0.9015809 2.9441421 1.4177147
## Vermont        1.4253680 4.7430576     3.9277625 5.1250778 2.4019924
## Virginia       2.3474650 2.0124420     1.8503795 2.2439957 1.7932233
## Washington     2.6292546 2.5434911     1.3472994 2.4715215 1.5955418
## West Virginia  1.1818120 4.0251562     3.3782752 4.4668346 2.0791705
## Wisconsin      1.1485830 4.0091486     1.8882704 4.2034334 0.4940832
## Wyoming        1.7665064 2.4041294     1.8201580 2.8324573 1.4845967
##                Mississippi  Missouri   Montana  Nebraska    Nevada
## Alaska                                                            
## Arizona                                                           
## Arkansas                                                          
## California                                                        
## Colorado                                                          
## Connecticut                                                       
## Delaware                                                          
## Florida                                                           
## Georgia                                                           
## Hawaii                                                            
## Idaho                                                             
## Illinois                                                          
## Indiana                                                           
## Iowa                                                              
## Kansas                                                            
## Kentucky                                                          
## Louisiana                                                         
## Maine                                                             
## Maryland                                                          
## Massachusetts                                                     
## Michigan                                                          
## Minnesota                                                         
## Mississippi                                                       
## Missouri         2.8692946                                        
## Montana          3.0015255 2.0313649                              
## Nebraska         3.5269565 1.9651798 0.7389936                    
## Nevada           4.1064793 2.3489003 4.3243112 4.2628916          
## New Hampshire    4.1895936 3.0885710 1.3329504 1.1300720 5.3871427
## New Jersey       3.8894324 1.7079555 2.5912431 2.1246377 3.3464214
## New Mexico       2.6557350 1.4579057 3.1915871 3.2494088 1.7234839
## New York         3.2655822 1.5284764 3.2662661 3.0925340 2.1674148
## North Carolina   1.1826891 3.0224849 3.2209267 3.6500186 4.1773437
## North Dakota     4.4753078 3.7811273 1.8291157 1.9038740 6.0519445
## Ohio             3.4148987 1.1327425 1.6436336 1.2654510 3.2930712
## Oklahoma         3.0466140 1.0927654 1.2225315 0.9674809 3.4108696
## Oregon           3.5033774 0.9974171 1.8044622 1.5727910 2.8581280
## Pennsylvania     3.4971746 1.7793568 1.3246445 0.8483058 4.0392694
## Rhode Island     4.3875001 2.7475003 2.6888576 2.1303973 4.6185330
## South Carolina   0.7865674 2.3846001 2.9024302 3.3517226 3.4427701
## South Dakota     3.5355186 2.8862448 0.8857149 1.2591419 5.1416772
## Tennessee        1.8269569 1.2413874 2.2494023 2.5526834 2.6666268
## Texas            2.8431727 1.1654171 2.8298991 2.7568751 2.2765693
## Utah             4.2571173 1.7478909 2.0956369 1.4573012 3.5868975
## Vermont          4.2046660 3.8803394 1.9261350 2.2952287 6.0437845
## Virginia         2.5383053 0.9787310 1.1556682 1.2472262 3.3001850
## Washington       3.8140404 1.2502752 1.8442691 1.3859985 3.1570805
## West Virginia    3.3281129 3.2538044 1.2758193 1.8117833 5.4963193
## Wisconsin        4.2987974 2.8171535 1.4916365 0.9719877 5.0751736
## Wyoming          2.6813279 1.6073860 0.8150071 0.9268202 3.9202716
##                New Hampshire New Jersey New Mexico  New York North Carolina
## Alaska                                                                     
## Arizona                                                                    
## Arkansas                                                                   
## California                                                                 
## Colorado                                                                   
## Connecticut                                                                
## Delaware                                                                   
## Florida                                                                    
## Georgia                                                                    
## Hawaii                                                                     
## Idaho                                                                      
## Illinois                                                                   
## Indiana                                                                    
## Iowa                                                                       
## Kansas                                                                     
## Kentucky                                                                   
## Louisiana                                                                  
## Maine                                                                      
## Maryland                                                                   
## Massachusetts                                                              
## Michigan                                                                   
## Minnesota                                                                  
## Mississippi                                                                
## Missouri                                                                   
## Montana                                                                    
## Nebraska                                                                   
## Nevada                                                                     
## New Hampshire                                                              
## New Jersey         3.0269198                                               
## New Mexico         4.3360809  2.6208087                                    
## New York           4.1586415  1.6344744  1.3324096                         
## North Carolina     4.3157112  3.9418824  2.5348334 3.2163998               
## North Dakota       0.9231894  3.9166205  4.9450519 4.9325292      4.5836787
## Ohio               2.3095495  1.1099823  2.4960904 2.0434995      3.6205693
## Oklahoma           2.0697098  1.4711183  2.3426252 2.1367108      3.1366639
## Oregon             2.6377191  1.9738854  2.1553130 2.2727718      3.5095191
## Pennsylvania       1.6822035  1.4216058  3.0619915 2.5949374      3.6803956
## Rhode Island       2.5813199  1.4668378  3.6032966 2.7682543      4.2185789
## South Carolina     4.1596914  3.5826726  1.9596343 2.7755634      1.0476313
## South Dakota       0.9874611  3.3318222  3.9969513 4.1124693      3.6955387
## Tennessee          3.5298430  2.6339707  1.5528304 2.0847931      2.3374653
## Texas              3.8178258  1.6226525  1.4418241 0.8457697      3.0857436
## Utah               2.3304873  1.3141843  2.9843796 2.4826984      4.2680823
## Vermont            1.6716127  4.4005416  4.9416825 5.1704762      4.3880034
## Virginia           2.2878085  1.8255601  2.1341562 2.1439207      2.7517523
## Washington         2.4214987  1.5759539  2.4796057 2.2747965      3.8055684
## West Virginia      1.4648924  3.7402121  4.2681325 4.4279608      3.5978058
## Wisconsin          0.7155628  2.4671212  4.1327758 3.7687073      4.4429456
## Wyoming            1.7950754  2.0372127  2.6286722 2.5890441      2.7501141
##                North Dakota      Ohio  Oklahoma    Oregon Pennsylvania
## Alaska                                                                
## Arizona                                                               
## Arkansas                                                              
## California                                                            
## Colorado                                                              
## Connecticut                                                           
## Delaware                                                              
## Florida                                                               
## Georgia                                                               
## Hawaii                                                                
## Idaho                                                                 
## Illinois                                                              
## Indiana                                                               
## Iowa                                                                  
## Kansas                                                                
## Kentucky                                                              
## Louisiana                                                             
## Maine                                                                 
## Maryland                                                              
## Massachusetts                                                         
## Michigan                                                              
## Minnesota                                                             
## Mississippi                                                           
## Missouri                                                              
## Montana                                                               
## Nebraska                                                              
## Nevada                                                                
## New Hampshire                                                         
## New Jersey                                                            
## New Mexico                                                            
## New York                                                              
## North Carolina                                                        
## North Dakota                                                          
## Ohio              3.1448279                                           
## Oklahoma          2.8246690 0.6483903                                 
## Oregon            3.2862071 1.2407607 1.0734082                       
## Pennsylvania      2.5555137 0.7781298 0.8180221 1.7293732             
## Rhode Island      3.4042300 1.9659747 1.9746699 2.6621371    1.6369255
## South Carolina    4.5104172 3.1289884 2.7470931 3.0134453    3.3429642
## South Dakota      1.0324944 2.4394250 2.0340486 2.4988870    1.9790714
## Tennessee         4.0623149 2.0167804 1.8500296 2.0306758    2.4343114
## Texas             4.5749422 1.6711510 1.8312655 2.1053000    2.2460705
## Utah              3.1738212 1.0154223 1.2372916 1.2825152    1.2529078
## Vermont           0.9824857 3.4825859 3.1010306 3.4262789    3.0270572
## Virginia          2.9443461 0.9774388 0.5646254 1.2664430    1.1769236
## Washington        3.1725909 0.9725013 0.9586525 0.5935343    1.3993323
## West Virginia     1.2716808 2.8650371 2.4631736 3.0349855    2.3799278
## Wisconsin         1.6216339 1.8649801 1.7916829 2.4088700    1.2204658
## Wyoming           2.4170757 1.3086480 0.7366465 1.6013015    1.0684605
##                Rhode Island South Carolina South Dakota Tennessee     Texas
## Alaska                                                                     
## Arizona                                                                    
## Arkansas                                                                   
## California                                                                 
## Colorado                                                                   
## Connecticut                                                                
## Delaware                                                                   
## Florida                                                                    
## Georgia                                                                    
## Hawaii                                                                     
## Idaho                                                                      
## Illinois                                                                   
## Indiana                                                                    
## Iowa                                                                       
## Kansas                                                                     
## Kentucky                                                                   
## Louisiana                                                                  
## Maine                                                                      
## Maryland                                                                   
## Massachusetts                                                              
## Michigan                                                                   
## Minnesota                                                                  
## Mississippi                                                                
## Missouri                                                                   
## Montana                                                                    
## Nebraska                                                                   
## Nevada                                                                     
## New Hampshire                                                              
## New Jersey                                                                 
## New Mexico                                                                 
## New York                                                                   
## North Carolina                                                             
## North Dakota                                                               
## Ohio                                                                       
## Oklahoma                                                                   
## Oregon                                                                     
## Pennsylvania                                                               
## Rhode Island                                                               
## South Carolina    4.1861320                                                
## South Dakota      3.1262712      3.5215978                                 
## Tennessee         3.5743861      1.4375120    3.0589938                    
## Texas             2.8757996      2.4532276    3.7101039 1.4712840          
## Utah              1.7565845      3.8912317    2.6823382 2.8678113 2.4039834
## Vermont           4.1104165      4.2668977    1.0856574 3.9356721 4.7444455
## Virginia          2.4330133      2.2636538    2.0316897 1.3514491 1.6921625
## Washington        2.1743525      3.3802314    2.5083824 2.3809584 2.1635337
## West Virginia     3.5400858      3.4651680    0.7108812 3.1707450 3.9586581
## Wisconsin         2.0779526      4.2190973    1.5437375 3.4257189 3.4539515
## Wyoming           2.1726807      2.5059056    1.5644785 1.9298669 2.2564704
##                     Utah   Vermont  Virginia Washington West Virginia Wisconsin
## Alaska                                                                         
## Arizona                                                                        
## Arkansas                                                                       
## California                                                                     
## Colorado                                                                       
## Connecticut                                                                    
## Delaware                                                                       
## Florida                                                                        
## Georgia                                                                        
## Hawaii                                                                         
## Idaho                                                                          
## Illinois                                                                       
## Indiana                                                                        
## Iowa                                                                           
## Kansas                                                                         
## Kentucky                                                                       
## Louisiana                                                                      
## Maine                                                                          
## Maryland                                                                       
## Massachusetts                                                                  
## Michigan                                                                       
## Minnesota                                                                      
## Mississippi                                                                    
## Missouri                                                                       
## Montana                                                                        
## Nebraska                                                                       
## Nevada                                                                         
## New Hampshire                                                                  
## New Jersey                                                                     
## New Mexico                                                                     
## New York                                                                       
## North Carolina                                                                 
## North Dakota                                                                   
## Ohio                                                                           
## Oklahoma                                                                       
## Oregon                                                                         
## Pennsylvania                                                                   
## Rhode Island                                                                   
## South Carolina                                                                 
## South Dakota                                                                   
## Tennessee                                                                      
## Texas                                                                          
## Utah                                                                           
## Vermont        3.6546040                                                       
## Virginia       1.7612066 3.0638337                                             
## Washington     0.6940667 3.4804319 1.3809295                                   
## West Virginia  3.2680139 1.0380554 2.3353210  3.0846553                        
## Wisconsin      1.8082282 2.3518637 2.1266497  2.0637823     2.0308890          
## Wyoming        1.8552036 2.6299335 0.7038309  1.5929546     1.8821600 1.7446366
# We then hierarchical clustering using the Ward's method
# ---
# 
res.hc <- hclust(d, method = "ward.D2" )
res.hc
## 
## Call:
## hclust(d = d, method = "ward.D2")
## 
## Cluster method   : ward.D2 
## Distance         : euclidean 
## Number of objects: 50
# Lastly, we plot the obtained dendrogram
# ---
# 
plot(res.hc, cex = 0.6, hang = -1)

# We now use the R function hclust() for hierarchical clustering
# ---
# 

# First we use the dist() function to compute the Euclidean distance between observations, 
# d will be the first argument in the hclust() function dissimilarity matrix
# ---
#
d.dist <- dist(df, method = "manhattan")
as.matrix(d.dist)[1:6, 1:6]
##             Alabama   Alaska  Arizona Arkansas California Colorado
## Alabama    0.000000 4.237162 4.433076 2.296372   5.795338 4.850928
## Alaska     4.237162 0.000000 4.460279 3.958759   3.772656 3.881920
## Arizona    4.433076 4.460279 0.000000 4.709019   2.207509 2.086123
## Arkansas   2.296372 3.958759 4.709019 0.000000   6.163118 4.358911
## California 5.795338 3.772656 2.207509 6.163118   0.000000 2.217477
## Colorado   4.850928 3.881920 2.086123 4.358911   2.217477 0.000000

exploring the coloured visualization on ward method created

# Hierarchical clustering using the Ward's method
# ---
# 
res.hc <- hclust(d, method = "ward.D2" )
res.hc
## 
## Call:
## hclust(d = d, method = "ward.D2")
## 
## Cluster method   : ward.D2 
## Distance         : euclidean 
## Number of objects: 50
# Lastly, we plot the obtained dendrogram
# ---
# 
plot(res.hc, cex = 0.6, hang = -1)

Enhanced Visualization of Dendrogram

# Cut the tree

library(factoextra)
## Warning: package 'factoextra' was built under R version 4.1.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.3
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#visualization
library("ggplot2")

fviz_dend(res.hc, cex = 0.5, k = 4, color_labels_by_k = TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Don't color labels, add rectangles
fviz_dend(res.hc, cex = 0.5, k = 4,
 color_labels_by_k = FALSE, rect = TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Change the color of tree using black color for all groups
# Change rectangle border colors
fviz_dend(res.hc, rect = TRUE, k_colors ="black",
rect_border = 2:5, rect_lty = 1)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Customized color for groups
fviz_dend(res.hc, k = 4,
 k_colors = c("#1B9E77", "#D95F02", "#7570B3", "#E7298A"))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Color labels using k-means clusters
 km.clust <- kmeans(df, 4)$cluster
 fviz_dend(res.hc, k = 4,
   k_colors = c("blue", "green3", "red", "black"),
   label_cols =  km.clust[res.hc$order], cex = 0.6)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

my etra work….

Verify the cluster tree After linking the objects in a data set into a hierarchical cluster tree, you might want to assess that the distances (i.e., heights) in the tree reflect the original distances accurately.

One way to measure how well the cluster tree generated by the hclust() function reflects your data is to compute the correlation between the cophenetic distances and the original distance data generated by the dist() function. If the clustering is valid, the linking of objects in the cluster tree should have a strong correlation with the distances between objects in the original distance matrix.

The closer the value of the correlation coefficient is to 1, the more accurately the clustering solution reflects your data. Values above 0.75 are felt to be good. The “average” linkage method appears to produce high values of this statistic. This may be one reason that it is so popular.

The R base function cophenetic() can be used to compute the cophenetic distances for hierarchical clustering.

# Compute cophentic distance
res.coph <- cophenetic(res.hc)

# Correlation between cophenetic distance and
# the original distance
cor(d.dist, res.coph)
## [1] 0.694799

Execute the hclust() function again using the average linkage method. Next, call cophenetic() to evaluate the clustering solution.

res.hc2 <- hclust(d.dist, method = "average")

cor(d.dist, cophenetic(res.hc2))
## [1] 0.7118403

observation: The correlation coefficient shows that using a different linkage method creates a tree that represents the original distances slightly better

##Cut the dendrogram into different groups One of the problems with hierarchical clustering is that, it does not tell us how many clusters there are, or where to cut the dendrogram to form clusters.

You can cut the hierarchical tree at a given height in order to partition your data into clusters. The R base function cutree() can be used to cut a tree, generated by the hclust() function, into several groups either by specifying the desired number of groups or the cut height. It returns a vector containing the cluster number of each observation.

# Cut tree into 4 groups and check in which cluster each country belongs
grp <- cutree(res.hc, k = 4)
head(grp, n = 30)
##       Alabama        Alaska       Arizona      Arkansas    California 
##             1             2             2             3             2 
##      Colorado   Connecticut      Delaware       Florida       Georgia 
##             2             3             3             2             1 
##        Hawaii         Idaho      Illinois       Indiana          Iowa 
##             3             4             2             3             4 
##        Kansas      Kentucky     Louisiana         Maine      Maryland 
##             3             3             1             4             2 
## Massachusetts      Michigan     Minnesota   Mississippi      Missouri 
##             3             2             4             1             3 
##       Montana      Nebraska        Nevada New Hampshire    New Jersey 
##             4             4             2             4             3
# Number of members in each cluster
table(grp)
## grp
##  1  2  3  4 
##  7 12 19 12
# Get the names for the members of cluster 1
rownames(df)[grp == 1]
## [1] "Alabama"        "Georgia"        "Louisiana"      "Mississippi"   
## [5] "North Carolina" "South Carolina" "Tennessee"
# circuler  Visualization
fviz_dend(res.hc, cex = 0.5, k = 4, type = "circular")
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# phylogenic visualization
fviz_dend(res.hc, cex = 0.5, k = 4, type = "phylogenic")

more practice

# Load and scale the data
data(USArrests)
df <- scale(USArrests)

# Hierarchical clustering
res.hc <- hclust(dist(df))

# Default plot
fviz_dend(res.hc)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Cut the tree
fviz_dend(res.hc, cex = 0.5, k = 4, color_labels_by_k = TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Don't color labels, add rectangles
fviz_dend(res.hc, cex = 0.5, k = 4, 
 color_labels_by_k = FALSE, rect = TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# phylogenic
fviz_dend(res.hc, cex = 0.5, k = 4, type = "phylogenic")

# Change the color of tree using black color for all groups
# Change rectangle border colors
fviz_dend(res.hc, rect = TRUE, k_colors ="black",
rect_border = 2:5, rect_lty = 1)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Customized color for groups
fviz_dend(res.hc, k = 4, 
 k_colors = c("#1B9E77", "#D95F02", "#7570B3", "#E7298A"))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# Color labels using k-means clusters
 km.clust <- kmeans(df, 4)$cluster
 fviz_dend(res.hc, k = 4, 
   k_colors = c("blue", "green3", "red", "black"),
   label_cols =  km.clust[res.hc$order], cex = 0.6)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

#example Hierarchical clustering for both Agglomerative and DIvisive

#install cluster package
#call the library
library("cluster")
## Warning: package 'cluster' was built under R version 4.1.3
#1.  Agglomerative Nesting (Hierarchical Clustering)
res.agnes <- agnes(x = USArrests, # data matrix
                   stand = TRUE, # Standardize the data
                   metric = "euclidean", # metric for distance matrix
                   method = "ward" # Linkage method
                   )

#2.  DIvisive ANAlysis Clustering
res.diana <- diana(x = USArrests, # data matrix
                   stand = TRUE, # standardize the data
                   metric = "euclidean" # metric for distance matrix
                   )
#plotting agnes and diana clustering and checking the differences
fviz_dend(res.agnes, cex = 0.6, k = 4)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

fviz_dend(res.diana, cex = 0.6, k = 4)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.