The data ‘crime’ gives rates of different types of crime per 100,000 residents of the 50 states of the USA for the year 1986.
#Loading libraries.
library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cluster)
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.19.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
##
## Attaching package: 'dendextend'
##
## The following object is masked from 'package:stats':
##
## cutree
library(dendsort)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dplyr)
library(vegan)
## Loading required package: permute
##
## Attaching package: 'permute'
##
## The following object is masked from 'package:dendextend':
##
## shuffle
##
## Loading required package: lattice
## Registered S3 method overwritten by 'vegan':
## method from
## rev.hclust dendextend
#Loading the data.
crime <- read.csv('/Users/michaelcajigal/Desktop/MA564/Homework 4/crime.csv')
head(crime)
## Murder Rape Robbery Assault Burglary Theft Vehicle State
## 1 2.0 14.8 28 102 803 2347 164 ME
## 2 2.2 21.5 24 92 755 2208 228 NH
## 3 2.0 21.8 22 103 949 2697 181 VT
## 4 3.6 29.7 193 331 1071 2189 906 MA
## 5 3.5 21.4 119 192 1294 2568 705 RI
## 6 4.6 23.8 192 205 1198 2758 447 CT
summary(crime)
## Murder Rape Robbery Assault
## Min. : 1.000 Min. :11.60 Min. : 7.0 Min. : 32.0
## 1st Qu.: 3.700 1st Qu.:23.27 1st Qu.: 67.0 1st Qu.:176.5
## Median : 6.300 Median :30.10 Median :109.5 Median :248.0
## Mean : 6.776 Mean :33.85 Mean :142.1 Mean :275.7
## 3rd Qu.: 9.275 3rd Qu.:43.45 3rd Qu.:202.8 3rd Qu.:366.0
## Max. :13.500 Max. :72.70 Max. :514.0 Max. :605.0
## Burglary Theft Vehicle State
## Min. : 385 Min. :1358 Min. : 99.0 Length:50
## 1st Qu.: 894 1st Qu.:2366 1st Qu.:209.8 Class :character
## Median :1148 Median :2812 Median :328.0 Mode :character
## Mean :1197 Mean :2918 Mean :382.2
## 3rd Qu.:1425 3rd Qu.:3382 3rd Qu.:529.5
## Max. :2221 Max. :4373 Max. :906.0
#Checking for any missing data because PCA interpretation changes.
colSums(is.na(crime))
## Murder Rape Robbery Assault Burglary Theft Vehicle State
## 0 0 0 0 0 0 0 0
#Remove first 3 columns
crime_data <- crime %>% select(-State)
head(crime_data)
## Murder Rape Robbery Assault Burglary Theft Vehicle
## 1 2.0 14.8 28 102 803 2347 164
## 2 2.2 21.5 24 92 755 2208 228
## 3 2.0 21.8 22 103 949 2697 181
## 4 3.6 29.7 193 331 1071 2189 906
## 5 3.5 21.4 119 192 1294 2568 705
## 6 4.6 23.8 192 205 1198 2758 447
*Note: Since there are no entries missing, all data is preserved and will be used for further analysis.
############### Hierarchical Clustering #############
################# Complete Linkage #################
#1.Compute distance matrix.
#The dist() function uses Euclidean distance by default.
dist_matrix <- dist(crime_data)
#2.Perform hierarchical clustering using complete linkage.
hc_complete <- hclust(dist_matrix, method = "complete")
#3.Plot the dendrogram, dendsort function orders dendodram.
dend_complete <- dendsort(as.dendrogram(hc_complete))
plot(dend_complete)
#4.Plot colored Dendrogram with 4 clusters.
#Cut the dendrogram to get 4 clusters.
clusters_complete <- cutree(hc_complete, k = 4)
table(clusters_complete)
## clusters_complete
## 1 2 3 4
## 27 6 11 6
#Color branches by clusters.
dend_colored_complete <- color_branches(dend_complete, k = 4)%>%
set("branches_lwd", 2)
# Plot the colored dendrogram
plot(dend_colored_complete,
main = "Dendrogram - Complete Linkage (4 Clusters)",
ylab = "Height (Distance)")
############### Hierarchical Clustering ################
################# Ward Linkage #################
#1.Perform hierarchical clustering using ward linkage,
#when we use method = "ward.D2" in hclust(),
#R automatically squares the Euclidean distances internally.
hc_ward <- hclust(dist_matrix, method = "ward.D2")
clusters_ward <- cutree(hc_ward, k = 4)
dend <- as.dendrogram(hc_ward)
dend_colored <- color_branches(dend, 4)%>%
set("branches_lwd", 2)
plot(dend_colored,
main = "Dendrogram - Ward Linkage (4 Clusters)",
ylab = "Height (Distance)")
################ Elbow Method ################
#1.Scree plot.
fviz_nbclust(crime_data, kmeans, method = "wss", k.max=12)
#2.Run K-means clustering with k = 2.
set.seed(123) # for reproducibility
kmeans_result <- kmeans(crime_data, centers = 2, nstart = 100)
# Optional: Add cluster labels to your original data
crime_data$cluster <- kmeans_result$cluster
#3.Plot results of final k-means model.
fviz_cluster(kmeans_result, data = crime_data)
Find a range of values and variances for all the variables in your data. What do you notice about the scale and variability of the variables.
summary_stats <- data.frame(
Min = sapply(crime_data, min),
Max = sapply(crime_data, max),
Range = sapply(crime_data, function(x) max(x) - min(x)),
Variance = sapply(crime_data, var)
)
print(summary_stats)
## Min Max Range Variance
## Murder 1.0 13.5 12.5 1.193492e+01
## Rape 11.6 72.7 61.1 2.097634e+02
## Robbery 7.0 514.0 507.0 1.188956e+04
## Assault 32.0 605.0 573.0 1.937354e+04
## Burglary 385.0 2221.0 1836.0 1.758950e+05
## Theft 1358.0 4373.0 3015.0 5.652766e+05
## Vehicle 99.0 906.0 807.0 4.399736e+04
## cluster 1.0 2.0 1.0 2.142857e-01
Standardize the data and run a) and b) again. Compare your results.
#Scaling data since variables seem to have different scales.
crime_scaled <- scale(crime_data)
#Redoing a) with standarized data.
############### Hierarchical Clustering #############
################# Complete Linkage #################
#1.Compute distance matrix.
#The dist() function uses Euclidean distance by default.
dist_matrix <- dist(crime_scaled)
#2.Perform hierarchical clustering using complete linkage.
hc_complete <- hclust(dist_matrix, method = "complete")
#3.Plot the dendrogram, dendsort function orders dendodram.
dend_complete <- dendsort(as.dendrogram(hc_complete))
plot(dend_complete)
#4.Plot colored Dendrogram with 3 clusters.
#Cut the dendrogram to get 3 clusters.
clusters_complete <- cutree(hc_complete, k = 3)
table(clusters_complete)
## clusters_complete
## 1 2 3
## 12 30 8
#Color branches by clusters.
dend_colored_complete <- color_branches(dend_complete, k = 3)%>%
set("branches_lwd", 2)
# Plot the colored dendrogram
plot(dend_colored_complete,
main = "Dendrogram - Complete Linkage (3 Clusters)",
ylab = "Height (Distance)")
############### Hierarchical Clustering ################
################# Ward Linkage #################
#1.Perform hierarchical clustering using ward linkage,
#when we use method = "ward.D2" in hclust(),
#R automatically squares the Euclidean distances internally.
hc_ward <- hclust(dist_matrix, method = "ward.D2")
clusters_ward <- cutree(hc_ward, k = 4)
dend <- as.dendrogram(hc_ward)
dend_colored <- color_branches(dend, 4)%>%
set("branches_lwd", 2)
plot(dend_colored,
main = "Dendrogram - Ward Linkage (4 Clusters)",
ylab = "Height (Distance)")
#Redoing b) with standarized data.
################ Elbow Method ################
#1.Scree plot.
fviz_nbclust(crime_scaled, kmeans, method = "wss", k.max=12)
#2.Run K-means clustering with k = 3.
set.seed(456) # for reproducibility
kmeans_result <- kmeans(crime_scaled, centers = 3, nstart = 100)
# Optional: Add cluster labels to your original data
crime_data$cluster <- kmeans_result$cluster
#3.Plot results of final k-means model.
fviz_cluster(kmeans_result, data = crime_scaled)
Describe your clusters by comparing the means of different crime rates across clusters.
crime_scaled <- as.data.frame(crime_scaled)
#For hierarchial-complete linkage.
crime_scaled$Cluster <- as.factor(clusters_complete)
crime_vars <- c("Murder", "Rape", "Robbery", "Assault", "Burglary", "Theft", "Vehicle") # replace with your variable names
aggregate(crime_scaled[crime_vars], by = list(cluster = crime_scaled$Cluster), FUN = mean)
## cluster Murder Rape Robbery Assault Burglary
## 1 1 -1.08094421 -1.05609650 -1.0227203 -1.03511646 -1.03301145
## 2 2 0.05519058 0.03033398 0.1513216 0.06111601 0.09737758
## 3 3 1.41445164 1.47039234 0.9666243 1.32348966 1.18435125
## Theft Vehicle
## 1 -0.54844363 -0.9855285
## 2 -0.06013621 0.1267190
## 3 1.04817623 1.0030966
#For hierarchial-ward.
crime_scaled$Cluster <- as.factor(clusters_ward)
crime_vars <- c("Murder", "Rape", "Robbery", "Assault", "Burglary", "Theft", "Vehicle") # replace with your variable names
aggregate(crime_scaled[crime_vars], by = list(cluster = crime_scaled$Cluster), FUN = mean)
## cluster Murder Rape Robbery Assault Burglary Theft
## 1 1 -1.04131561 -0.96148777 -0.9547020 -1.02493844 -0.90916625 -0.2804059
## 2 2 -0.07989126 -0.22072851 1.2070357 0.41503736 -0.01553927 -0.2375592
## 3 3 0.20052417 -0.04299807 -0.2055452 -0.03752097 -0.28635661 -0.6360887
## 4 4 0.91763620 1.20722287 0.6311770 0.92647786 1.33990832 1.2127705
## Vehicle
## 1 -0.9174220
## 2 1.3406495
## 3 -0.3925762
## 4 0.7492754
#For k-means.
crime_scaled$Cluster <- as.factor(kmeans_result$cluster)
crime_vars <- c("Murder", "Rape", "Robbery", "Assault", "Burglary", "Theft", "Vehicle") # replace with your variable names
aggregate(crime_scaled[crime_vars], by = list(cluster = crime_scaled$Cluster), FUN = mean)
## cluster Murder Rape Robbery Assault Burglary Theft
## 1 1 -0.6982854 -0.77138855 -0.7654457 -0.8495717 -0.797085483 -0.4900018
## 2 2 0.2288672 0.08511005 0.5756336 0.4430911 0.007804831 -0.3323984
## 3 3 0.9176362 1.20722287 0.6311770 0.9264779 1.339908321 1.2127705
## Vehicle
## 1 -0.8162838
## 2 0.5478443
## 3 0.7492754To interpret the clusters, we compared the standardized means of crime rates across methods. Hierarchical clustering with complete linkage identified three clusters representing low, average, and high crime areas. Cluster 1 had the lowest crime rates (e.g., murder = -1.06, assault = -1.04), Cluster 2 showed near-average values (e.g., robbery = 0.15), and Cluster 3 had the highest (e.g., rape = 1.47, assault = 1.32).
Ward’s method, using four clusters, provided more nuanced groupings. Cluster 1 had the lowest crime rates (e.g., murder = -1.04), Cluster 2 and 3 represented moderate levels, while Cluster 4 clearly reflected high crime (e.g., rape = 1.21, burglary = 1.34, vehicle theft = 1.21).
K-means clustering yielded similar results to complete linkage, with a clear separation between low (e.g., robbery = -0.76), moderate, and high crime clusters (e.g., assault = 0.93). Among all methods, Ward’s method offered the most detailed and interpretable clusters, making it especially useful for identifying crime patterns and targeting interventions.
The dataset SVI Village contains SVI variables.
#Loading the data.
svi <- read.csv('/Users/michaelcajigal/Desktop/MA564/Homework 4/SVI Village.csv')
head(svi)
## Village A1_P A2_P A3_P A4_P B1_P B2_P B3_P
## 1 Agana Heights 17.93592 6.975415 19276 17.13636 8.219538 30.82983 9.637605
## 2 Agat 26.54057 13.676093 14749 23.29920 8.867195 34.26886 11.714460
## 3 Asan 18.71783 7.788162 21626 16.65326 8.469818 31.77351 8.469818
## 4 Barrigada 20.13521 6.554071 19279 17.84249 8.078873 31.97746 7.864789
## 5 Dededo 24.66013 8.402469 13550 24.11425 7.235832 34.07427 7.411610
## 6 Hagatna 27.21218 9.619687 16093 28.55093 5.328259 24.07231 9.229305
## B4_P C2_P D1_P D2_P D3_P D4_P D5_P
## 1 14.78102 0.14232849 14.6708961 0.4758128 16.69708 7.025547 1.52310924
## 2 16.91819 0.04493372 7.2281167 1.8567639 25.41700 8.895949 2.11511084
## 3 14.08451 0.20618557 0.7989348 0.3994674 14.86698 4.851330 0.04679457
## 4 15.35730 0.52832043 5.9622642 1.3962264 19.35198 4.837994 3.59436620
## 5 15.26115 0.34968455 5.6980279 0.9977395 28.39137 6.945956 1.18817168
## 6 17.13287 2.05973223 29.5454545 0.2525253 22.72727 12.937063 21.97906755
## H1_P H2_P H3A_P H3B_P H3C_P H4_P H5_P H6_P
## 1 0.2379064 12.529738 2.616971 5.551150 0.7137193 9.992070 4.916733 3.832117
## 2 0.6631300 13.859416 9.748011 17.241379 2.3209549 14.257294 13.925729 3.733122
## 3 0.0000000 36.484687 3.595206 8.122503 1.0652463 7.589880 6.524634 2.660407
## 4 0.3773585 41.509434 4.528302 8.679245 1.4716981 11.509434 9.962264 2.796272
## 5 0.9665601 24.156209 8.207966 14.958298 1.8941461 13.703328 11.894926 4.171201
## 6 0.0000000 8.333333 2.525253 6.060606 1.5151515 7.828283 20.202020 7.342657
## H7_P H8_P H9_P P1_P P2_P P3_P P4_P
## 1 19.98175 23.99635 7.299270 9.952731 15.25735 25.63869 13.54560
## 2 23.74901 34.07466 7.863384 11.673785 21.51719 30.73868 15.78296
## 3 18.30986 19.87480 5.790297 6.972391 12.44736 25.19562 13.14149
## 4 22.37017 21.79316 6.080781 14.715493 25.43099 28.85042 18.94785
## 5 26.71382 27.29416 6.909684 21.195737 40.50241 38.25716 24.01518
## 6 32.86713 37.06294 10.839161 30.827783 35.39486 32.16783 28.65169
summary(svi)
## Village A1_P A2_P A3_P
## Length:19 Min. :12.85 Min. : 5.670 Min. :13546
## Class :character 1st Qu.:18.33 1st Qu.: 7.670 1st Qu.:15164
## Mode :character Median :21.08 Median : 8.397 Median :17882
## Mean :21.64 Mean : 9.091 Mean :17675
## 3rd Qu.:24.55 3rd Qu.: 9.720 3rd Qu.:19292
## Max. :29.70 Max. :15.576 Max. :26303
## A4_P B1_P B2_P B3_P
## Min. :12.49 Min. :4.220 Min. :24.07 Min. : 5.916
## 1st Qu.:17.49 1st Qu.:6.050 1st Qu.:31.17 1st Qu.: 7.460
## Median :19.66 Median :6.523 Median :33.19 Median : 8.077
## Mean :19.79 Mean :6.875 Mean :32.90 Mean : 8.562
## 3rd Qu.:21.93 3rd Qu.:7.994 3rd Qu.:35.14 3rd Qu.: 9.614
## Max. :28.55 Max. :9.452 Max. :39.51 Max. :12.270
## B4_P C2_P D1_P D2_P
## Min. : 9.888 Min. :0.0000 Min. : 0.000 Min. :0.0000
## 1st Qu.:13.968 1st Qu.:0.0733 1st Qu.: 1.047 1st Qu.:0.3691
## Median :15.426 Median :0.2062 Median : 5.962 Median :0.4758
## Mean :15.736 Mean :0.3988 Mean :12.034 Mean :0.6874
## 3rd Qu.:17.318 3rd Qu.:0.4362 3rd Qu.:17.974 3rd Qu.:0.9931
## Max. :21.637 Max. :2.0597 Max. :54.445 Max. :1.8568
## D3_P D4_P D5_P H1_P
## Min. :13.03 Min. : 1.124 Min. : 0.0000 Min. :0.00000
## 1st Qu.:16.59 1st Qu.: 4.845 1st Qu.: 0.8245 1st Qu.:0.04353
## Median :20.64 Median : 6.247 Median : 1.5231 Median :0.42634
## Mean :21.63 Mean : 6.594 Mean : 3.5976 Mean :0.61984
## 3rd Qu.:26.28 3rd Qu.: 8.516 3rd Qu.: 3.5305 3rd Qu.:0.71976
## Max. :32.75 Max. :12.937 Max. :21.9791 Max. :3.14136
## H2_P H3A_P H3B_P H3C_P
## Min. : 4.655 Min. : 1.297 Min. : 3.990 Min. :0.4544
## 1st Qu.:14.373 1st Qu.: 3.321 1st Qu.: 7.392 1st Qu.:1.0304
## Median :31.533 Median : 6.056 Median : 9.107 Median :1.5226
## Mean :32.527 Mean : 6.652 Mean :11.345 Mean :1.7611
## 3rd Qu.:41.456 3rd Qu.: 8.801 3rd Qu.:16.100 3rd Qu.:2.2183
## Max. :81.549 Max. :17.801 Max. :21.746 Max. :3.9539
## H4_P H5_P H6_P H7_P
## Min. : 2.926 Min. : 4.644 Min. :1.865 Min. :15.36
## 1st Qu.: 9.610 1st Qu.: 7.381 1st Qu.:3.322 1st Qu.:18.79
## Median :11.668 Median : 9.962 Median :3.888 Median :22.37
## Mean :12.165 Mean :11.510 Mean :4.153 Mean :22.37
## 3rd Qu.:14.406 3rd Qu.:12.910 3rd Qu.:4.333 3rd Qu.:24.83
## Max. :25.865 Max. :25.041 Max. :8.187 Max. :32.87
## H8_P H9_P P1_P P2_P
## Min. :15.47 Min. : 4.494 Min. : 3.080 Min. : 4.987
## 1st Qu.:21.18 1st Qu.: 5.945 1st Qu.: 6.844 1st Qu.:12.096
## Median :23.40 Median : 7.042 Median : 9.953 Median :15.895
## Mean :25.39 Mean : 7.065 Mean :12.868 Mean :20.406
## 3rd Qu.:30.05 3rd Qu.: 7.738 3rd Qu.:17.949 3rd Qu.:26.897
## Max. :39.77 Max. :10.839 Max. :34.681 Max. :50.597
## P3_P P4_P
## Min. :20.47 Min. :10.53
## 1st Qu.:24.47 1st Qu.:13.46
## Median :26.89 Median :15.18
## Mean :27.69 Mean :17.19
## 3rd Qu.:30.76 3rd Qu.:20.24
## Max. :38.26 Max. :30.34
#Checking for any missing data because PCA interpretation changes.
colSums(is.na(svi))
## Village A1_P A2_P A3_P A4_P B1_P B2_P B3_P B4_P C2_P
## 0 0 0 0 0 0 0 0 0 0
## D1_P D2_P D3_P D4_P D5_P H1_P H2_P H3A_P H3B_P H3C_P
## 0 0 0 0 0 0 0 0 0 0
## H4_P H5_P H6_P H7_P H8_P H9_P P1_P P2_P P3_P P4_P
## 0 0 0 0 0 0 0 0 0 0
svi[, 2:30] <- scale(svi[, 2:30])
#Run PCA on standardized SVI data but
pca_result <- prcomp(svi[, -1], center = FALSE, scale. = FALSE)
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.2965 2.7998 1.7995 1.3835 1.27771 1.00980 0.9011
## Proportion of Variance 0.3747 0.2703 0.1117 0.0660 0.05629 0.03516 0.0280
## Cumulative Proportion 0.3747 0.6450 0.7567 0.8227 0.87899 0.91415 0.9422
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.66524 0.57581 0.53093 0.45027 0.34801 0.28273 0.24476
## Proportion of Variance 0.01526 0.01143 0.00972 0.00699 0.00418 0.00276 0.00207
## Cumulative Proportion 0.95741 0.96884 0.97856 0.98555 0.98973 0.99249 0.99455
## PC15 PC16 PC17 PC18 PC19
## Standard deviation 0.23343 0.22490 0.17414 0.15031 2.292e-15
## Proportion of Variance 0.00188 0.00174 0.00105 0.00078 0.000e+00
## Cumulative Proportion 0.99643 0.99818 0.99922 1.00000 1.000e+00
plot(pca_result, type = "l", main = "Scree Plot")
pca_result$rotation
## PC1 PC2 PC3 PC4 PC5
## A1_P -0.110405193 0.28157055 0.073684340 -0.05319899 0.26446841
## A2_P -0.229807042 0.10300136 0.178505407 -0.01630685 0.20130082
## A3_P 0.219161395 -0.18557262 0.068673894 0.14218256 -0.02987983
## A4_P -0.141924138 0.27367590 -0.007639570 -0.18454128 -0.19831300
## B1_P 0.100201072 -0.11953639 0.206041487 -0.52515533 -0.04009545
## B2_P -0.239082347 -0.12211749 -0.006877935 -0.11093227 0.20912763
## B3_P -0.158708062 0.04690335 0.365135445 -0.27647077 -0.10529670
## B4_P -0.228864135 0.13019809 0.187546520 0.04027536 0.02594598
## C2_P 0.122786988 0.21814550 -0.110841121 0.19109249 -0.34503302
## D1_P 0.198449376 0.17502385 0.047133280 0.07784710 0.13667398
## D2_P -0.049532102 -0.03904538 -0.304048632 -0.46836846 -0.15568035
## D3_P -0.258309030 0.13777453 -0.151663081 -0.05417840 0.05779719
## D4_P -0.006723012 0.30960268 0.241771221 -0.02441526 0.04737796
## D5_P 0.113090213 0.18011196 -0.049350581 0.25687371 -0.32077189
## H1_P -0.242927248 0.02229721 -0.185117429 0.16906871 0.22356366
## H2_P -0.152417706 -0.15907636 -0.177257575 0.12701477 -0.33086042
## H3A_P -0.279567445 -0.02238068 -0.142333271 0.06666404 0.13007966
## H3B_P -0.261679273 -0.03233865 -0.225928248 -0.07668115 0.02512619
## H3C_P -0.233041371 -0.03137547 -0.264667484 0.05720483 -0.03768310
## H4_P -0.256934234 -0.09032621 -0.137701055 -0.06313781 -0.20496025
## H5_P -0.240044194 0.11266112 -0.032156523 0.04739911 -0.30918088
## H6_P -0.175958833 0.24501013 0.090784081 0.24550515 0.04820461
## H7_P 0.123819533 0.30251464 -0.078304641 -0.07605460 0.06605872
## H8_P -0.190562968 0.24469159 0.107955319 -0.00360906 0.02319528
## H9_P -0.059126946 0.21728409 0.213927800 -0.14173910 -0.41153476
## P1_P 0.168430703 0.26889143 -0.177552675 -0.02974207 0.05174605
## P2_P 0.165456355 0.22673702 -0.256498642 -0.13659272 0.14032002
## P3_P 0.142321750 0.11972796 -0.339394803 -0.27210814 0.05631510
## P4_P 0.129075480 0.28201993 -0.211558299 -0.02697543 0.06016054
## PC6 PC7 PC8 PC9 PC10
## A1_P -0.110937190 -0.034572780 0.378204647 -0.081878184 0.0114244633
## A2_P 0.193218965 0.296513027 0.256199409 0.029642873 -0.1060430290
## A3_P 0.182494948 -0.277691939 0.218927838 0.079335960 -0.0234923160
## A4_P -0.070570942 0.084618242 -0.009594626 -0.368690769 -0.0007068658
## B1_P 0.200161092 -0.187313499 0.039887043 -0.092704563 0.2736457218
## B2_P -0.278689452 0.059144001 0.154275761 -0.378944970 -0.1814515136
## B3_P 0.267027427 -0.121485218 0.074787287 0.059683421 -0.0608621664
## B4_P -0.317250394 0.133479971 -0.138191598 0.235500310 -0.0633628047
## C2_P 0.051778062 -0.021935334 0.469504284 -0.178864813 0.3675666262
## D1_P 0.024616275 -0.469540774 0.094859702 0.165875100 -0.5371144431
## D2_P -0.098214645 0.172996710 0.409142912 0.502564590 -0.0685312090
## D3_P -0.008196978 -0.091798491 -0.098872034 -0.072131389 -0.0622036158
## D4_P -0.026161466 0.131741841 -0.108349890 0.188093642 0.1669066046
## D5_P 0.309217758 0.468055084 -0.027699143 0.131169901 -0.2443098801
## H1_P 0.054029690 -0.135422536 -0.121256649 0.258521834 0.4378149549
## H2_P -0.444923012 -0.103794455 0.156400764 0.063002927 0.0130666444
## H3A_P 0.137419804 -0.017546786 0.093670696 0.079649857 -0.0410319047
## H3B_P 0.165229834 0.002100843 0.021988030 -0.072880887 -0.2654572509
## H3C_P 0.298545246 -0.210132897 -0.034577442 0.027152064 0.0085515054
## H4_P 0.035523805 -0.183946161 -0.241576574 0.112953583 0.0559438344
## H5_P 0.207885806 -0.138929839 -0.015577205 -0.162162297 -0.1954362207
## H6_P 0.026877566 -0.104444757 0.054245368 -0.139848731 0.1247425120
## H7_P -0.102570977 -0.001253024 -0.201112473 0.187579379 -0.0240712481
## H8_P 0.154699994 -0.106813572 0.052087441 0.216670351 0.1452471641
## H9_P -0.226047101 -0.200243516 -0.203125047 0.028056344 -0.0723197756
## P1_P -0.031443249 -0.108236354 0.051093983 -0.007096884 -0.0336017988
## P2_P -0.069319781 -0.020166952 -0.038213814 -0.029303265 -0.0170377092
## P3_P 0.188244492 0.156751352 -0.269009958 -0.203163474 0.0403981277
## P4_P -0.070140567 -0.184086140 0.045694352 -0.046860375 0.0437928104
## PC11 PC12 PC13 PC14 PC15
## A1_P -0.343469595 0.11640143 -0.10143615 0.168875739 0.120718919
## A2_P 0.121568054 -0.34979800 -0.33026684 0.132230610 0.006640825
## A3_P -0.257717418 0.03574748 -0.29325428 0.213417894 -0.299225581
## A4_P -0.086717849 -0.09567560 0.02175925 0.001659139 -0.137062813
## B1_P 0.132805240 0.28403092 -0.14590917 0.071517723 0.063753969
## B2_P 0.032025662 -0.16773106 0.03870547 0.275355042 -0.190510253
## B3_P 0.239202459 -0.05017170 0.16041504 -0.259432002 -0.137961263
## B4_P -0.238122190 0.34119862 -0.08059716 -0.214373517 -0.187506461
## C2_P 0.120526061 -0.18741290 0.06487690 -0.045966416 0.100857025
## D1_P 0.027345133 -0.21052710 0.14568270 -0.008618885 0.084506495
## D2_P -0.065752792 -0.05538500 0.13696813 -0.085329498 -0.256467616
## D3_P 0.171403355 0.14070856 0.28928388 0.292139484 -0.148854780
## D4_P 0.125192128 -0.08999613 -0.14751401 0.146754961 0.262649615
## D5_P 0.015013636 0.11662147 0.12089049 0.209631247 -0.076150827
## H1_P 0.130633720 -0.13534117 0.01732387 0.082142168 -0.116410300
## H2_P 0.087579159 0.11660295 -0.25234140 -0.004715721 0.230473202
## H3A_P 0.134179644 -0.05037619 -0.07668861 -0.289357950 0.189335075
## H3B_P 0.101456573 0.13740503 -0.08495808 -0.184125125 0.417948065
## H3C_P -0.298454130 -0.10520512 -0.22755587 -0.107789272 -0.033875747
## H4_P 0.039092080 -0.26306276 0.13892100 0.354495946 -0.064786682
## H5_P 0.127561793 0.35466922 -0.28730358 0.138699410 -0.154370234
## H6_P -0.007482911 0.02286841 0.14778535 -0.421947110 -0.340916042
## H7_P 0.254270270 -0.08182824 -0.49934027 0.023248043 -0.220771141
## H8_P -0.337624996 0.21275909 0.21697032 0.256974807 0.254099189
## H9_P -0.194501093 -0.34520884 -0.04711597 -0.055674226 0.150732227
## P1_P 0.105244669 0.07677385 0.08612174 -0.009705724 0.022944345
## P2_P 0.197637024 0.15732937 0.06172915 0.055400486 0.129012828
## P3_P -0.403269604 -0.16156674 -0.08710221 -0.148275339 0.016240853
## P4_P 0.081557181 0.11879031 -0.07002027 -0.012895247 -0.162548168
## PC16 PC17 PC18 PC19
## A1_P -0.09013328 0.017160928 -0.14820916 0.212019975
## A2_P -0.25278226 -0.097339136 0.02235069 -0.353992249
## A3_P -0.16955108 0.127988027 0.33776271 0.143758809
## A4_P -0.09190108 -0.257158446 0.31819278 -0.046467362
## B1_P 0.07920701 0.095259842 -0.18084417 -0.029738921
## B2_P 0.33350751 0.146941426 -0.19116815 0.321516242
## B3_P -0.24457956 -0.208725398 -0.08039521 0.483819393
## B4_P -0.11856540 -0.044339796 -0.37570022 -0.138959743
## C2_P 0.02534215 0.058722566 -0.24749633 -0.087690884
## D1_P 0.01711438 0.104429816 -0.16082026 -0.046441800
## D2_P 0.19070119 0.111943199 0.11852586 -0.111595834
## D3_P -0.27290317 -0.103327652 0.21545347 -0.085910105
## D4_P 0.24387259 0.128724207 0.03404028 0.107775292
## D5_P 0.01875358 -0.002135237 -0.14388034 0.325906054
## H1_P 0.04484245 0.001270653 0.01714284 0.142909120
## H2_P -0.26980611 -0.039728819 0.07812882 0.333982227
## H3A_P -0.29498258 0.312418437 -0.10797524 0.044443071
## H3B_P 0.19726632 0.013702023 0.29299128 0.032491669
## H3C_P 0.32298818 -0.527171913 -0.22718281 0.048775974
## H4_P -0.12753865 0.187392220 -0.25390787 -0.044884214
## H5_P 0.13494902 0.285677416 -0.11625324 -0.223676520
## H6_P 0.20893588 0.330112009 0.21327929 0.036620729
## H7_P 0.10679991 0.013174161 0.13260494 0.211281617
## H8_P 0.03074438 0.053433449 0.21475029 0.033689383
## H9_P 0.05555000 0.060425653 0.08457116 -0.079043522
## P1_P 0.03271090 -0.190882559 -0.04523574 0.071460153
## P2_P 0.02100106 0.014625502 -0.04108712 0.007259177
## P3_P -0.27374242 0.331299064 -0.08933812 0.178903709
## P4_P -0.22963243 -0.145981789 -0.11974727 -0.147646757
pca_scores <- pca_result$x
print(pca_scores)
## PC1 PC2 PC3 PC4 PC5 PC6
## [1,] 2.01626140 -1.3117972 2.0569104 -0.40524385 0.144611858 0.11690715
## [2,] -2.45751579 1.3264795 0.3938806 -3.52814620 0.333926627 1.27482597
## [3,] 1.92988702 -2.9253588 1.3132288 -0.06751008 0.105725452 -0.03899067
## [4,] 1.54145840 -1.1205420 -1.0272840 -1.04083079 -0.535039773 -0.44528663
## [5,] -0.31404345 2.1071456 -2.5730746 -1.89342712 0.916190493 -0.19367902
## [6,] 1.85234241 7.8355419 0.5946449 1.38272618 -2.708595248 1.13057879
## [7,] -5.54982765 -1.7756187 -1.4157004 0.14478113 -2.451531340 -0.01532043
## [8,] 0.05655776 1.0779630 -1.0769165 0.25258515 0.486694908 -1.10522613
## [9,] -4.86794927 -0.1297912 2.1373147 -0.53268855 -0.004339196 0.77405105
## [10,] 0.50033182 2.9181434 2.4175775 0.25074315 1.294257581 -2.04570507
## [11,] -0.01132788 -1.4798246 -0.2957122 0.12945381 -0.676283680 -1.04351009
## [12,] 3.40965672 -4.2661756 -0.3879596 0.52310410 0.946097671 1.81237142
## [13,] 2.92140323 -3.2158932 -0.2495083 1.75685170 -0.483621927 0.95552218
## [14,] 1.76433385 -0.2676727 3.0179157 -1.50384358 -0.327796923 0.13792239
## [15,] -0.40470772 -2.4703966 -0.7094904 0.84594281 -1.718259031 -0.94206040
## [16,] 6.60842467 2.9601643 -1.1341559 0.74759445 1.303764045 0.25973871
## [17,] -7.24397386 1.2560931 0.5751780 2.75333129 2.420006254 0.86759430
## [18,] -0.93034010 0.6734392 -4.3109883 -0.30407150 0.793466298 -0.02302702
## [19,] -0.82097155 -1.1918995 0.6741394 0.48864789 0.160725931 -1.47670651
## PC7 PC8 PC9 PC10 PC11
## [1,] -0.21668538 -0.591444868 0.30574819 0.52933023 0.0178062978
## [2,] 0.66704555 0.875599882 1.04033298 -0.24616697 -0.2390348554
## [3,] 0.19680973 0.305030467 -0.43084599 0.64867192 -0.0785395195
## [4,] 0.16247084 0.592714100 0.58872160 0.47060876 0.0009765367
## [5,] 0.27677089 -0.845943489 -0.89548619 0.46294148 -0.1532784260
## [6,] 0.34330329 -0.006319592 -0.20851731 0.11463392 -0.4272194572
## [7,] -1.75448111 -0.816365555 0.43331281 -0.36315797 0.0426013148
## [8,] 0.43314064 0.148315419 0.45824001 -0.81231067 -0.0512350363
## [9,] 0.25217730 0.604218173 -1.30125742 -0.80228294 0.8041078762
## [10,] 0.01774921 -0.065965393 -0.19038845 -0.50440204 -0.6992882007
## [11,] -0.15445306 -0.396438935 0.30604980 -0.46552860 0.1328410755
## [12,] -1.18121610 0.124385579 -0.45504733 -0.61235413 -1.0063092174
## [13,] 2.43761890 -0.661906136 0.36568322 -0.36680961 0.3194940098
## [14,] -0.54627793 -1.042603584 -0.05164923 0.54930986 0.3631999279
## [15,] -0.03691899 1.576502712 -0.41185706 0.70490048 -0.0903511229
## [16,] -1.56898748 0.572473179 0.32768907 -0.19880921 0.9791569124
## [17,] -0.18418076 0.018750740 0.56091277 0.74725845 -0.0316064790
## [18,] 0.50626948 -0.343438120 -0.50465821 0.21440677 0.0768640673
## [19,] 0.34984498 -0.047564581 0.06301675 -0.07023974 0.0398142959
## PC12 PC13 PC14 PC15 PC16 PC17
## [1,] -0.06888670 0.86088664 -0.25444701 -0.110820122 0.234183599 0.03767299
## [2,] -0.23930748 -0.14303507 0.03733118 0.154976955 0.087169406 0.03372846
## [3,] 0.60651879 -0.36280743 -0.13120301 0.164173624 -0.148649884 -0.30526816
## [4,] 0.76410513 0.02245136 0.08769664 -0.476630455 0.024109814 0.03652202
## [5,] 0.19093545 0.09656717 0.07395211 0.147247591 -0.334687858 0.33801636
## [6,] -0.01779764 -0.05900122 -0.08287841 -0.165712269 -0.043368001 -0.03283016
## [7,] 0.30085276 0.05419559 0.28098897 0.184637307 0.064910209 -0.04809626
## [8,] -0.08997669 0.40444278 -0.15512003 0.079318118 -0.544025403 -0.31027229
## [9,] 0.19496668 0.14981079 -0.05185524 -0.153761302 0.045042574 0.01380177
## [10,] 0.29053558 -0.04508967 0.18771798 0.255801685 0.369996223 0.02657094
## [11,] -0.09773438 -0.43813782 -0.75131531 -0.056680960 0.055373358 0.23647091
## [12,] -0.17255459 -0.02443721 0.01907914 -0.145536099 -0.063330098 0.03504450
## [13,] 0.10509357 -0.01279334 0.23092706 0.232587961 0.074595012 0.11261927
## [14,] -0.49747572 -0.26684021 0.10224691 0.087830190 -0.134730437 -0.15981492
## [15,] -0.48678387 0.16240773 0.02444873 0.326564376 0.005037615 0.12116061
## [16,] 0.03091136 -0.06470508 0.13247977 0.119361426 0.048486275 0.07774047
## [17,] 0.02686424 -0.08515724 -0.08144679 0.002476426 -0.089439063 0.05116034
## [18,] -0.32268285 -0.05843786 -0.08499120 -0.103686248 0.466315765 -0.33252768
## [19,] -0.51758363 -0.19031989 0.41638852 -0.542148205 -0.116989107 0.06830083
## PC18 PC19
## [1,] 0.162382898 2.394984e-15
## [2,] 0.119979142 1.121731e-15
## [3,] 0.291390291 2.896413e-15
## [4,] -0.284191814 2.312846e-15
## [5,] 0.050551294 1.638041e-15
## [6,] 0.044673190 1.779717e-15
## [7,] 0.079184615 1.183898e-15
## [8,] -0.074466006 2.094358e-15
## [9,] -0.052205259 1.156540e-15
## [10,] -0.082314760 2.533827e-15
## [11,] 0.002429587 2.087653e-15
## [12,] -0.072302008 2.636583e-15
## [13,] -0.042899923 2.345578e-15
## [14,] -0.288114682 3.211997e-15
## [15,] -0.092704244 2.488035e-15
## [16,] 0.081495643 3.076341e-15
## [17,] -0.068061386 1.863274e-15
## [18,] -0.027200887 1.838719e-15
## [19,] 0.252374309 2.633181e-15
dist_matrix <- dist(svi[, -1])
mds_result <- cmdscale(dist_matrix, k = 3) # k = number of dimensions
village_names <- svi[, 1]
plot(mds_result, type = "n", main = "Classical MDS")
text(mds_result, labels = village_names, cex = 0.7)
pc_subset <- pca_scores[, 1:3] # First 3 principal components
pc_dist <- dist(pc_subset)
hc <- hclust(pc_dist, method = "ward.D2") # You can also try "complete", "average", etc.
plot(hc, labels = svi[, 1], main = "Hierarchical Clustering Dendrogram", xlab = "", sub = "", cex = 0.7)
Compare the 2D MDS map and the hierarchical clustering structure.
• Are the clusters consistent between MDS and hierarchical clustering?
• Do the villages group similarly across methods?
Answering both questions: When comparing the 2D Classical MDS map and the hierarchical clustering dendrogram, we see a strong degree of consistency in how the villages group based on their social vulnerability characteristics. Both methods aim to identify patterns of similarity, but they visualize them differently: MDS preserves the pairwise distances between villages in a spatial layout, while hierarchical clustering builds a tree based on how villages group together incrementally.
A clear example of agreement between the two methods is seen with the southern villages of Inarajan, Merizo, and Umatac. These villages are tightly clustered together on the left side of the MDS map and also form a distinct branch on the far left of the dendrogram, indicating high similarity and early merging. Similarly, Tamuning and Hagåtña stand out in both visualizations. They appear far from the rest of the villages in the MDS plot, suggesting they are quite distinct, and in the dendrogram, they merge at a higher level (i.e., greater dissimilarity), which reinforces their uniqueness.
There is also a mid-group of central or northern villages, such as Yigo, Dededo, Mangilao, and M-T-M, that cluster together in both methods. On the MDS map, these villages appear relatively close in space, and in the dendrogram, they form a tight sub-cluster, showing consistency in how the methods identify shared characteristics. A similar consistency is observed among central villages like Yona, Talofofo, Ordot-Chalan Pago, Agana Heights, and Sinajana, which are grouped in the middle of the MDS space and are also grouped together in the middle section of the dendrogram.
While the overall patterns are similar, there are slight differences in how some villages are grouped depending on the method. For instance, Agat appears spatially closer to the southern villages in the MDS map, but in the dendrogram, it joins a cluster with more urbanized areas like M-T-M and Mangilao, suggesting some ambiguity in its grouping. These differences are expected due to the different mathematical criteria used by each method.
In summary, both the MDS and hierarchical clustering methods reveal broadly consistent clusters among villages, especially for the southernmost villages, urban centers, and mid-region communities. Although the specific distances and cluster boundaries may vary slightly, the overall structure of the groupings aligns well across the two approaches.
species <- data.frame('D' = c(1000, 100, 940, 2040),
'E' = c(100, 10, 110, 220),
'F' = c(10, 1, 60, 71))
#Setting index (rownames).
rownames(species) <- c('Sample 1', 'Sample 2', 'Sample 3', 'Total')
samples_only <- species[1:3, ] #Only the first 3 rows.
species_distribution <- prop.table(as.matrix(samples_only), margin = 1)
round(species_distribution, 3)
## D E F
## Sample 1 0.901 0.090 0.009
## Sample 2 0.901 0.090 0.009
## Sample 3 0.847 0.099 0.054
Sample 1 and Sample 2 have very similar distributions: they are both highly dominated by species D (both about 90%), with small portions of E (both at 9%) and very little F (both at 0.9%) .
Sample 3, although still dominated by D (with a proportion of about 85%), has a slightly higher proportion of species F (around 5.4%) and a slightly more balanced distribution in comparison to Samples 1 and 2.
Thus, the samples that are close (based off distribution) are Samples 1 and 2.
dDE<-((abs(species[1,1] - species[1,2]) + abs(species[2,1] - species[2,2]) + abs(species[3,1] - species[3,2]))/ ((species[1,1] + species[1,2]) + (species[2,1] + species[2,2]) + (species[3,1] + species[3,2])))
print(dDE)
## [1] 0.8053097
dDF<-((abs(species[1,1] - species[1,3]) + abs(species[2,1] - species[2,3]) + abs(species[3,1] - species[3,3]))/ ((species[1,1] + species[1,3]) + (species[2,1] + species[2,3]) + (species[3,1] + species[3,3])))
print(dDF)
## [1] 0.9327333
dEF<-((abs(species[1,2] - species[1,3]) + abs(species[2,2] - species[2,3]) + abs(species[3,2] - species[3,3]))/ ((species[1,2] + species[1,3]) + (species[2,2] + species[2,3]) + (species[3,2] + species[3,3])))
print(dEF)
## [1] 0.5120275
# Data transformation
transformed_data <- wisconsin(sqrt(species[,]))
transformed_data
## D E F
## Sample 1 0.4001639 0.3853378 0.2144983
## Sample 2 0.4001639 0.3853378 0.2144983
## Sample 3 0.2944701 0.3067449 0.3987850
## Total 0.3333333 0.3333333 0.3333333
species[,]
## D E F
## Sample 1 1000 100 10
## Sample 2 100 10 1
## Sample 3 940 110 60
## Total 2040 220 71
dDE<-((abs(transformed_data[1,1] - transformed_data[1,2]) + abs(transformed_data[2,1] - transformed_data[2,2]) + abs(transformed_data[3,1] - transformed_data[3,2]))/ ((transformed_data[1,1] + transformed_data[1,2]) + (transformed_data[2,1] + transformed_data[2,2]) + (transformed_data[3,1] + transformed_data[3,2])))
print(dDE)
## [1] 0.01930144
dDF<-((abs(transformed_data[1,1] - transformed_data[1,3]) + abs(transformed_data[2,1] - transformed_data[2,3]) + abs(transformed_data[3,1] - transformed_data[3,3]))/ ((transformed_data[1,1] + transformed_data[1,3]) + (transformed_data[2,1] + transformed_data[2,3]) + (transformed_data[3,1] + transformed_data[3,3])))
print(dDF)
## [1] 0.2474
dEF<-((abs(transformed_data[1,2] - transformed_data[1,3]) + abs(transformed_data[2,2] - transformed_data[2,3]) + abs(transformed_data[3,2] - transformed_data[3,3]))/ ((transformed_data[1,2] + transformed_data[1,3]) + (transformed_data[2,2] + transformed_data[2,3]) + (transformed_data[3,2] + transformed_data[3,3])))
print(dEF)
## [1] 0.22765