Cluster Analysis Pt. 3

## This code will not be evaluated automatically.
## (Notice the eval = FALSE declaration in the options section of the
## code chunk)

my_packages <- c("tidyverse", "broom", "coefplot", "cowplot",
                 "gapminder", "GGally", "ggrepel", "ggridges", "gridExtra",
                 "here", "interplot", "margins", "maps", "mapproj",
                 "mapdata", "MASS", "quantreg", "rlang", "scales",
                 "survey", "srvyr", "viridis", "viridisLite", "devtools")

install.packages(my_packages, repos = "http://cran.rstudio.com")

Set Up Project and Load Libraries

To begin we must load some libraries we will be using. If we do not load them, R will not be able to find the functions contained in these libraries. The tidyverse includes ggplot and other tools. We also load the socviz and gapminder libraries.

Look at Social Vulnerability Data

svi = read_csv("/Users/riyasharma/Documents/GitHub/24Spr_RSharma_DATS4001/data/bigdata/Social_Vulnerability_Index_VA_Data.csv")

## Rows: 1900 Columns: 127
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (5): the_geom, STATE, ST_ABBR, COUNTY, LOCATION
## dbl (122): ST, STCNTY, FIPS, AREA_SQMI, E_TOTPOP, M_TOTPOP, E_HU, M_HU, E_HH...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# getting rid of margin of error columns and categorical columns for cluster analysis
svi$LOCATION2 = str_extract_all(svi$LOCATION,"(?<=, ).+(?=,)")
svi_df = as.data.frame(svi)

# selecting only columns with raw, percentage data for each variable (excluding themes, series, etc.)
svi_pc_1 = select(svi_df, c("the_geom", "COUNTY", "LOCATION", "LOCATION2", "E_TOTPOP", "EP_POV", "EP_NOHSDP", "EP_AGE65", "EP_AGE17", "EP_DISABL", "EP_SNGPNT", "EP_MINRTY", "EP_LIMENG", "EP_MUNIT", "EP_MOBILE", "EP_CROWD", "EP_NOVEH", "EP_GROUPQ", "EP_UNINSUR", "EP_UNEMP"))

# get rid of non-numeric columns to get ready for cluster analysis
svi_pc_clean_1 = select(svi_pc_1, -c("the_geom", "COUNTY", "LOCATION"))

# get county level geometries so we can group by county and get county geometries
va_county = get_acs(
  geography = "county", 
  variables = "B19013_001",
  state = "VA", 
  year = 2020,
  geometry = TRUE
)

## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |=============================================================         |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |======================================================================| 100%

# new location column to make joining everything together easier
va_county$LOCATION2 = str_extract_all(va_county$NAME,"(?<=).+(?=,)")

# join 
svi_county_pc = svi_pc_clean_1 %>%
  left_join(va_county, by = "LOCATION2")

# get rid of irrelevant columns pulled from tidycensus
svi_county_pc_clean = select(svi_county_pc, -c("variable", "estimate", "moe", "GEOID", "geometry", "NAME", "E_TOTPOP")) 

# create dataframe with median percentages by locality since they're currently organized by tract
svi_county_pc_median = svi_county_pc_clean %>% 
  group_by(LOCATION2) %>%
  summarize_all("median")

Load Health Opportunity Index Data

I’ll join the HOI column to the SVI dataframe for cluster analysis purposes

# load HOI dataset
hoi = read_csv("/Users/riyasharma/Documents/GitHub/24Spr_RSharma_DATS4001/data/bigdata/VA HOI Data/Health_Opportunity_Index_20240203.csv")

## Rows: 1875 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Rural~Urban
## dbl (19): Census Tract, Access to Care, Employment Accessibility, Affordabil...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# get county and tract values from tidycensus
va_income = get_acs(
  geography = "tract", 
  variables = "B19013_001",
  state = "VA", 
  year = 2020,
  geometry = TRUE
)

## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%

hoi_county = hoi %>% 
  rename(
    GEOID = `Census Tract`)


hoi_county$GEOID = as.character(hoi_county$GEOID)
va_income$GEOID = as.character(va_income$GEOID)

hoi_county = va_income %>%
  left_join(hoi_county, by = "GEOID")

hoi_county = select(hoi_county, -c("variable", "moe", "estimate"))
colnames(hoi_county) = gsub(" ", "_", colnames(hoi_county))

hoi_county$LOCATION = hoi_county$NAME

hoi_county$LOCATION2 = str_extract_all(hoi_county$NAME,"(?<=, ).+(?=,)")

hoi_county = select(as.data.frame(hoi_county), -c("GEOID", "NAME", "LOCATION", "geometry", `Rural~Urban`))

hoi_county = hoi_county %>%
  group_by(LOCATION2) %>%
  summarize_all("median", na.rm=T)

# hoi_county = na.omit(hoi_county)

hoi_county = as.data.frame(hoi_county)

hoi_county = hoi_county %>%
  left_join(va_county, by = "LOCATION2")

hoi_county = as.data.frame(hoi_county)
hoi_county = select(hoi_county, -c("variable", "moe", "estimate", "NAME", "GEOID", "geometry"))

svi_county_pc_median = hoi_county %>%
  left_join(svi_county_pc_median, by = "LOCATION2")

# remove all other columns from HOI dataset besides median HOI
svi_county_pc_median = select(svi_county_pc_median, -c("Access_to_Care", "Affordability", "Employment_Accessibility", "Air_Quality", "Population_Churning", "Education", "Food_Accessibility",
                                                       "Income_Inequality", "Job_Participation", "Population_Density", "Segregation", "Material_Deprivation", "Walkability", "Community_Environment_Profile", "Consumer_Opportunity_Profile", "Economic_Opportunity_Profile", 
                                                       "Wellness_Disparity_Profile"))

svi_county_pc_128 = na.omit(svi_county_pc_median) 
svi_county_pc_numeric = na.omit(svi_county_pc_median) 
svi_county_pc_numeric = select(svi_county_pc_numeric, -c("LOCATION2"))

# divide all columns (except HOI) by 100 so they're all on the same scale 
svi_county_pc_numeric = svi_county_pc_numeric %>% mutate(svi_county_pc_numeric[,2:16] / 100)

PCA

# first, conduct principal component analysis
pca_res = prcomp(svi_county_pc_numeric)
pca_res$rotation

##                                   PC1         PC2          PC3          PC4
## Health_Opportunity_Index -0.084098991  0.28571097 -0.361615876  0.011521564
## EP_POV                    0.041903254 -0.40847464  0.452592375 -0.264020974
## EP_NOHSDP                 0.009998088 -0.40945180 -0.036410996 -0.127373003
## EP_AGE65                 -0.121117306 -0.19653380 -0.135950362 -0.004092889
## EP_AGE17                  0.044770473  0.07585217 -0.139973365 -0.172500966
## EP_DISABL                -0.077404604 -0.31314136  0.007410657 -0.188925287
## EP_SNGPNT                 0.084529203 -0.07698988  0.031291969 -0.251818933
## EP_MINRTY                 0.939755415 -0.14220393 -0.241221019  0.136844530
## EP_LIMENG                 0.023866472  0.03041623  0.013409972  0.017728105
## EP_MUNIT                  0.150437484  0.22531333  0.695852297  0.448782295
## EP_MOBILE                -0.213671211 -0.55957297 -0.194637791  0.673065421
## EP_CROWD                  0.008542874  0.00244183  0.016855845  0.016345873
## EP_NOVEH                  0.074536597 -0.14247742  0.193080154 -0.193226379
## EP_GROUPQ                 0.018296289 -0.04162013  0.050037543  0.110340171
## EP_UNINSUR                0.029853581 -0.13786514  0.029140125 -0.198270528
## EP_UNEMP                  0.049088829 -0.09707200  0.050798887 -0.150014420
##                                   PC5         PC6         PC7           PC8
## Health_Opportunity_Index -0.302227349  0.05518448 -0.56747235  0.2187314356
## EP_POV                   -0.023727434 -0.43704271 -0.23324151 -0.1893570741
## EP_NOHSDP                -0.278981168  0.43573932  0.33605499  0.0706027328
## EP_AGE65                  0.721707351  0.45117465 -0.19377808  0.0429136600
## EP_AGE17                 -0.394806365  0.17492042  0.05746962  0.0925027736
## EP_DISABL                -0.061418738  0.15283591 -0.42690612  0.1633666998
## EP_SNGPNT                -0.123065042  0.02468668  0.04827142  0.2399148659
## EP_MINRTY                 0.070670899 -0.02048722 -0.09265913 -0.0414606244
## EP_LIMENG                -0.088421604  0.10654178  0.09333488  0.0050705284
## EP_MUNIT                 -0.124851747  0.39362845 -0.15246310  0.0957415060
## EP_MOBILE                -0.229250995 -0.10085439 -0.07816388 -0.0115200182
## EP_CROWD                 -0.041195668  0.08136020  0.05183996 -0.0008072342
## EP_NOVEH                 -0.006871349  0.12117682 -0.28037595  0.2611076614
## EP_GROUPQ                 0.175378536 -0.25819536  0.30767753  0.8214103630
## EP_UNINSUR               -0.116638692  0.29956312  0.22565114 -0.1314924623
## EP_UNEMP                 -0.086896872 -0.05799628 -0.10805206  0.2131726763
##                                   PC9        PC10         PC11         PC12
## Health_Opportunity_Index -0.500774196 -0.11454303  0.145602703 -0.112897782
## EP_POV                   -0.330569879 -0.12175059  0.296487360 -0.070132163
## EP_NOHSDP                -0.234217689  0.31178737  0.361327023 -0.183135221
## EP_AGE65                 -0.026140345 -0.24332679  0.249944158 -0.085340400
## EP_AGE17                  0.309333513 -0.14901221  0.208294901  0.001804473
## EP_DISABL                 0.171270092  0.42784589 -0.130025488  0.603943010
## EP_SNGPNT                 0.326955655 -0.23816423  0.118355260 -0.246531067
## EP_MINRTY                -0.034696311  0.02420696  0.003377656  0.041703544
## EP_LIMENG                -0.157996144 -0.04329142  0.038878029  0.044987739
## EP_MUNIT                 -0.008253917 -0.05039382  0.119916471  0.069439917
## EP_MOBILE                 0.111014025 -0.19299703 -0.114909933 -0.096646932
## EP_CROWD                 -0.005802071 -0.06302912 -0.015684092  0.131640780
## EP_NOVEH                  0.106564107  0.10030712 -0.539809116 -0.575331038
## EP_GROUPQ                -0.252389883  0.03590189 -0.012275708  0.132752195
## EP_UNINSUR               -0.410649467 -0.43645994 -0.532054967  0.250059892
## EP_UNEMP                  0.264961852 -0.55280243  0.139943637  0.263765592
##                                 PC13          PC14         PC15          PC16
## Health_Opportunity_Index  0.05369174 -0.1017549460  0.066889168  0.0243066893
## EP_POV                   -0.22945158  0.0673289953  0.013130056  0.0272411645
## EP_NOHSDP                 0.32122281  0.0501273493  0.082464334  0.0205046545
## EP_AGE65                 -0.17580574  0.0610244908 -0.005964000 -0.0089827929
## EP_AGE17                 -0.59990937  0.4586301232  0.081005971 -0.0805450008
## EP_DISABL                -0.08499917 -0.1333307565 -0.043515123 -0.0565675742
## EP_SNGPNT                -0.12345814 -0.7694256878  0.003735493 -0.0348168294
## EP_MINRTY                -0.01935502  0.0003816526  0.008961285 -0.0030862976
## EP_LIMENG                -0.11315465 -0.0293843044 -0.962322519  0.0003483533
## EP_MUNIT                 -0.02584089 -0.0556347583  0.086010228 -0.0712284707
## EP_MOBILE                -0.08614106 -0.0362388180 -0.018515860 -0.0125928221
## EP_CROWD                 -0.10516198 -0.0463856563  0.042085080  0.9756032520
## EP_NOVEH                  0.06496694  0.2622558426 -0.101616539  0.1042464364
## EP_GROUPQ                -0.15732878  0.0749230025  0.053074192 -0.0224783041
## EP_UNINSUR               -0.11771139 -0.0910281083  0.152034533 -0.1356133384
## EP_UNEMP                  0.59402417  0.2619924389 -0.090188925  0.0214330540

summary(pca_res)

## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     0.1964 0.1231 0.09332 0.05709 0.04687 0.03577 0.03188
## Proportion of Variance 0.5235 0.2056 0.11816 0.04423 0.02981 0.01736 0.01379
## Cumulative Proportion  0.5235 0.7291 0.84725 0.89148 0.92128 0.93864 0.95243
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.03096 0.02952 0.02240 0.01898 0.01612 0.01542 0.01404
## Proportion of Variance 0.01300 0.01182 0.00681 0.00489 0.00353 0.00323 0.00267
## Cumulative Proportion  0.96544 0.97726 0.98407 0.98896 0.99249 0.99571 0.99839
##                           PC15     PC16
## Standard deviation     0.00848 0.006843
## Proportion of Variance 0.00098 0.000640
## Cumulative Proportion  0.99936 1.000000

get_eig(pca_res) # Dim 1 and 2 - highest variance.percent

##          eigenvalue variance.percent cumulative.variance.percent
## Dim.1  3.857919e-02      52.35029375                    52.35029
## Dim.2  1.515043e-02      20.55847838                    72.90877
## Dim.3  8.707779e-03      11.81607998                    84.72485
## Dim.4  3.259323e-03       4.42276082                    89.14761
## Dim.5  2.196596e-03       2.98068517                    92.12830
## Dim.6  1.279183e-03       1.73579587                    93.86409
## Dim.7  1.016338e-03       1.37912682                    95.24322
## Dim.8  9.583723e-04       1.30046974                    96.54369
## Dim.9  8.712809e-04       1.18229047                    97.72598
## Dim.10 5.018349e-04       0.68096832                    98.40695
## Dim.11 3.603315e-04       0.48895433                    98.89590
## Dim.12 2.599320e-04       0.35271654                    99.24862
## Dim.13 2.378604e-04       0.32276634                    99.57139
## Dim.14 1.971301e-04       0.26749702                    99.83888
## Dim.15 7.190936e-05       0.09757790                    99.93646
## Dim.16 4.682432e-05       0.06353857                   100.00000

# pc1 accounts for 52.35% of the direction of max variance, and within PC1, percent minority accounts for 94% of the max direction of variance

# Visualize how correlated variables are with PC1 and PC2
fviz_pca_var(pca_res, col.var="steelblue")+
  theme_minimal() # we see that median percent minority and median per capita income are loosely correlated with nearly all other variables, with EP_AGE17, EP_LIMENG, EP_CROWD, EP_GROUPQ, EP_SNGPNT, EP_UNINSUR and all others highly correlated with one another

# Visualize Dim2 vs. Dim1 for individual points
fviz_pca_ind(pca_res, title = "Individuals Factor Map (PCA)", geom="point") +
  geom_point(show.legend = FALSE) +
  geom_text_repel(aes(label = (svi_county_pc_128$LOCATION2), show.legend = FALSE))

## Warning in geom_text_repel(aes(label = (svi_county_pc_128$LOCATION2),
## show.legend = FALSE)): Ignoring unknown aesthetics: show.legend

## Warning: ggrepel: 70 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Cluster Analysis

#############################################################
# find optimal number of clusters with a scree (elbow) plot #
#############################################################

# Decide how many clusters to look at
n_clusters <- 10

# Initialize total within sum of squares error: wss
wss <- numeric(n_clusters)

set.seed(123)

# Look over 1 to n possible clusters
for (i in 1:n_clusters) {
  # Fit the model: km.out
  km_svi_pc_county <- kmeans(svi_county_pc_numeric, centers = i, nstart = 20)
  # Save the within cluster sum of squares
  wss[i] <- km_svi_pc_county$tot.withinss
}

# Produce a scree plot
wss_df <- tibble(clusters = 1:n_clusters, wss = wss)
 
scree_plot <- ggplot(wss_df, aes(x = clusters, y = wss, group = 1)) +
    geom_point(size = 4)+
    geom_line() +
    scale_x_continuous(breaks = c(2, 4, 6, 8, 10)) +
    xlab('Number of clusters') + theme_economist()

scree_plot +
    geom_hline(
        yintercept = wss, 
        linetype = 'dashed') # we see that within cluster sum of squares sharply reduces up to 6 clusters but the variability doesn't change much past that point. For this reason, we will do cluster analysis with 6 clusters.

# fitting and evaluating the model
set.seed(134)
km_svi_pc_county <- kmeans(svi_county_pc_numeric, centers = 6, nstart = 20)
km_svi_pc_county

## K-means clustering with 6 clusters of sizes 36, 16, 22, 30, 11, 13
## 
## Cluster means:
##   Health_Opportunity_Index     EP_POV  EP_NOHSDP  EP_AGE65  EP_AGE17  EP_DISABL
## 1                0.4431281 0.08562500 0.10705556 0.1907917 0.2044444 0.13198611
## 2                0.3569403 0.18525000 0.17568750 0.1565000 0.2130625 0.16118750
## 3                0.3890466 0.18297727 0.17350000 0.2211818 0.1869773 0.21500000
## 4                0.3895936 0.15175000 0.16840000 0.2123667 0.1944667 0.16168333
## 5                0.3495323 0.17227273 0.07727273 0.1172273 0.1769545 0.09336364
## 6                0.4627851 0.05815385 0.07865385 0.1203846 0.2418462 0.10161538
##    EP_SNGPNT  EP_MINRTY   EP_LIMENG   EP_MUNIT   EP_MOBILE   EP_CROWD
## 1 0.06316667 0.15733333 0.005138889 0.02368056 0.056013889 0.01058333
## 2 0.11859375 0.62518750 0.010406250 0.07306250 0.050625000 0.01303125
## 3 0.06790909 0.05290909 0.001636364 0.02000000 0.202886364 0.01038636
## 4 0.07458333 0.33540000 0.006916667 0.01495000 0.145650000 0.01241667
## 5 0.06936364 0.32559091 0.018272727 0.26531818 0.003272727 0.01768182
## 6 0.07600000 0.39203846 0.018230769 0.04257692 0.006423077 0.01065385
##     EP_NOVEH    EP_GROUPQ EP_UNINSUR   EP_UNEMP
## 1 0.04016667 0.0049583333 0.08444444 0.04256944
## 2 0.10937500 0.0140312500 0.11534375 0.08775000
## 3 0.06940909 0.0105909091 0.09943182 0.06006818
## 4 0.05848333 0.0214666667 0.10746667 0.04930000
## 5 0.08431818 0.0236818182 0.07572727 0.04809091
## 6 0.02669231 0.0002307692 0.07488462 0.04776923
## 
## Clustering vector:
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   6   2   1   6   4   4   3   5   5   1   2   5   5   6   3   1   6   6   6   6 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   5   5   2   2   2   2   5   1   3   1   1   1   1   3   1   4   2   1   4   6 
##  41  42  43  44  45  46  47  48  49  50  51  53  54  55  56  57  58  59  60  61 
##   1   1   2   6   5   3   6   4   4   6   5   2   4   4   4   1   4   5   4   5 
##  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  81  82 
##   2   2   3   6   1   1   1   4   4   4   3   1   3   3   4   1   4   4   4   3 
##  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 
##   1   3   2   3   1   4   4   3   3   4   3   4   1   1   6   3   4   4   3   2 
## 103 104 105 106 107 108 109 110 111 112 114 115 116 117 118 119 120 121 122 123 
##   3   1   3   1   1   1   1   1   1   4   1   4   1   4   4   2   3   1   1   1 
## 124 125 127 128 129 130 131 133 
##   2   4   1   1   4   3   2   3 
## 
## Within cluster sum of squares by cluster:
## [1] 0.6326554 0.5939735 0.4994669 0.6559988 0.5329950 0.1551270
##  (between_SS / total_SS =  67.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

#####################################
# initial visualization of clusters #
#####################################

fviz_cluster(km_svi_pc_county, svi_county_pc_numeric)

# Add cluster column to main data frame
svi_county_pc_median = na.omit(svi_county_pc_median)
svi_county_pc_median$cluster = km_svi_pc_county$cluster

# map of counties and clusters
svi_county_pc_median = svi_county_pc_median %>%
  left_join(va_county, by = "LOCATION2")

svi_county_pc_median = select(svi_county_pc_median, -c("variable", "estimate", "moe", "GEOID"))
svi_county_pc_median$cluster = as.factor(svi_county_pc_median$cluster)
svi_county_pc_median$LOCATION2 = as.character(svi_county_pc_median$LOCATION2)

svi_county_pc_median_sf = st_as_sf(svi_county_pc_median)

mylabel1 = glue::glue("<strong>{svi_county_pc_median$LOCATION2}</strong><br />
                      Cluster: {svi_county_pc_median$cluster}<br />") %>% 
  lapply(htmltools::HTML)

cluster_map = mapview(st_as_sf(svi_county_pc_median), zcol = "cluster",
               col.regions = RColorBrewer::brewer.pal(6, "Accent"),
               alpha.regions = 1,
               label = mylabel1,
               legend = F)
cluster_map

Visualize by Variables

# according to pca, percent below poverty is very influential 
pov_plot = ggplot(svi_county_pc_median, aes(y = EP_POV, x = cluster, colour = factor(cluster))) +
  geom_violin(show.legend = TRUE) +
  geom_text_repel(aes(label = LOCATION2), show.legend = FALSE, colour = "black") +
  labs(title = "Scatterplot for Poverty & Cluster", 
       x = "Cluster", 
       y = "Median % Living Below Poverty Line") + theme_economist_white()
pov_plot

## Warning: ggrepel: 110 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

More Visualizations by Variables with High Influence

# percent minority by cluster
minrty_plot = ggplot(svi_county_pc_median, aes(y = EP_MINRTY, x = cluster, colour = factor(cluster))) +
  geom_violin(show.legend = TRUE) +
  geom_text_repel(aes(label = LOCATION2), show.legend = FALSE, colour = "black") +
  labs(title = "Scatterplot for Percent Minority & Cluster", 
       x = "Cluster", 
       y = "Median % Minority") + theme_economist_white()
minrty_plot

## Warning: ggrepel: 114 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# percent minority map 
mylabel2 = glue::glue("<strong>{svi_county_pc_median$LOCATION2}</strong><br />
                      Median % Minority: {svi_county_pc_median$EP_MINRTY}<br />") %>% 
  lapply(htmltools::HTML)

minrty_map = mapview(st_as_sf(svi_county_pc_median), zcol = "EP_MINRTY",
               col.regions = RColorBrewer::brewer.pal(5, "YlGnBu"),
               alpha.regions = 1,
               label = mylabel2,
               legend = F,
               layer.name = "Median % Minority of VA Localities")

## Warning: Found less unique colors (5) than unique zcol values (126)! 
## Interpolating color vector to match number of zcol values.

minrty_map

# percent living in mobile homes
mobile_plot = ggplot(svi_county_pc_median, aes(y = EP_MOBILE, x = cluster, colour = factor(cluster))) +
  geom_violin(show.legend = TRUE) +
  geom_text_repel(aes(label = LOCATION2), show.legend = FALSE, colour = "black") +
  labs(title = "Scatterplot for Percent Living in Mobile Homes & Cluster", 
       x = "Cluster", 
       y = "Median % Living in Mobile Homes") + theme_economist_white()
mobile_plot

## Warning: ggrepel: 90 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# percent mobile home map 
mylabel3 = glue::glue("<strong>{svi_county_pc_median$LOCATION2}</strong><br />
                      Median % Minority: {svi_county_pc_median$EP_MOBILE}<br />") %>% 
  lapply(htmltools::HTML)

mobile_map = mapview(st_as_sf(svi_county_pc_median), zcol = "EP_MOBILE",
               col.regions = RColorBrewer::brewer.pal(5, "RdPu"),
               alpha.regions = 1,
               label = mylabel3,
               legend = F,
               layer.name = "Median % Living in Mobile Homes per VA Locality")

## Warning: Found less unique colors (5) than unique zcol values (91)! 
## Interpolating color vector to match number of zcol values.

mobile_map

# median HOI
HOI_plot = ggplot(svi_county_pc_median, aes(y = Health_Opportunity_Index, x = cluster, colour = factor(cluster))) +
  geom_violin(show.legend = TRUE) +
  geom_text_repel(aes(label = LOCATION2), show.legend = FALSE, colour = "black") +
  labs(title = "Scatterplot for Health Opportunity Index & Cluster", 
       x = "Cluster", 
       y = "Median HOI") + theme_economist_white()
HOI_plot

## Warning: ggrepel: 100 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# HOI map
mylabel4 = glue::glue("<strong>{svi_county_pc_median$LOCATION2}</strong><br />
                      Median Health Opportunity Index (HOI): {svi_county_pc_median$Health_Opportunity_Index}<br />") %>% 
  lapply(htmltools::HTML)

HOI_map = mapview(st_as_sf(svi_county_pc_median), zcol = "Health_Opportunity_Index",
               col.regions = RColorBrewer::brewer.pal(4, "YlOrRd"),
               alpha.regions = 1,
               label = mylabel4,
               legend = F,
               layer.name = "Median HOI per VA Locality")

## Warning: Found less unique colors (4) than unique zcol values (128)! 
## Interpolating color vector to match number of zcol values.

HOI_map

# HSDP
HSD_plot = ggplot(svi_county_pc_median, aes(y = EP_NOHSDP, x = cluster, colour = factor(cluster))) +
  geom_violin(show.legend = TRUE) +
  geom_text_repel(aes(label = LOCATION2), show.legend = FALSE, colour = "black") +
  labs(title = "Scatterplot for High School Diploma & Cluster", 
       x = "Cluster", 
       y = "Median % Without High School Diploma") + theme_economist_white()
HSD_plot

## Warning: ggrepel: 86 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# HOI map
mylabel5 = glue::glue("<strong>{svi_county_pc_median$LOCATION2}</strong><br />
                      Median % Without High School Diploma: {svi_county_pc_median$EP_NOHSDP}<br />") %>% 
  lapply(htmltools::HTML)

HSD_map = mapview(st_as_sf(svi_county_pc_median), zcol = "EP_NOHSDP",
               col.regions = RColorBrewer::brewer.pal(4, "PuRd"),
               alpha.regions = 1,
               label = mylabel5,
               legend = F,
               layer.name = "Median % without HS Diploma per VA Locality")

## Warning: Found less unique colors (4) than unique zcol values (108)! 
## Interpolating color vector to match number of zcol values.

HSD_map