PCA COA Index

Author

CRG

Principal Components Analysis - COA Check

For reference, I am working off of Dr. Sparks’s notes here.

I am using the ArcGIS export file from KW’s composite index.

library(car)
Warning: package 'car' was built under R version 4.4.2
Loading required package: carData
Warning: package 'carData' was built under R version 4.4.2
library(stargazer)

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(survey)
Loading required package: grid
Loading required package: Matrix
Loading required package: survival

Attaching package: 'survey'
The following object is masked from 'package:graphics':

    dotchart
library(ggplot2)
library(pander)
Warning: package 'pander' was built under R version 4.4.3
library(dplyr)

Attaching package: 'dplyr'
The following object is masked from 'package:car':

    recode
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(knitr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
✔ readr     2.1.5     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ dplyr::recode() masks car::recode()
✖ purrr::some()   masks car::some()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# #note: I imported csv directly via enviornment pane 
# index <- IndexLayer_0
# 
# library(dplyr)
# 
# #convers first row to character so I can turn it into a header 
# index_fixed <- index %>%
#   rename_with(~ as.character(unlist(index[1, ]))) %>%
#   slice(-1)
#                      # drop the header row
# 
# 
# str(index_fixed)
# 
# 
# # View(index)
# 
# #select variables --- not necessary, but doing this on raw just to get workflow
# vars <- c(
#   "uninsured_est","est_child_pov","disability_est","EAL_VALP_x100",
#   "med_inc_hh_est","persistent_poverty","est_underemp_perc","eviction_filing_rate",
#   "below_pov_est","Energy_Burden____income_","limited_english_hh_est","est_no_internet_perc",
#   "est_65plus_ambulatory","less_than_highschool_est","low_physical_activity_est",
#   "hh_support_risk_score"
# )
# 
# #convert to numeric
# index_num <- index_fixed %>%
#   mutate(across(all_of(vars), ~as.numeric(.)))
# 
# 
# 
# #need to scale. Dr. Sparks used mutate_as, but can use new command (across)
# index_scaled <- index_num %>%
#   mutate(across(all_of(vars), scale))
# 
# #PCA
# pca_res <- prcomp(index_scaled[vars], center = TRUE, scale. = FALSE)
# 
# #Screeplot
# screeplot(pca_res, type = "l", main = "Scree Plot")
# abline(h=1)
# summary(pca_res)
# 
# loadings <- pca_res$rotation
# round(loadings, 3)
# 
# 
# 
# corr_mat <- cor(index_num[vars], use = "pairwise.complete.obs")
# round(corr_mat, 2)
# 
# library(reshape2)
# library(ggplot2)
# 
# corr_df <- melt(corr_mat)
# 
# ggplot(corr_df, aes(Var1, Var2, fill = value)) +
#   geom_tile(color = "white") +
#   scale_fill_gradient2(
#     low = "blue", high = "red", mid = "white",
#     midpoint = 0, limit = c(-1,1)
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#   labs(
#     title = "Correlation Heatmap of PCA Variables",
#     x = "",
#     y = "",
#     fill = "Corr"
#   )
# 
# ggplot(corr_df, aes(Var1, Var2, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value,2)), size = 3) +
#   scale_fill_gradient2(
#     low = "blue", high = "red", mid = "white",
#     midpoint = 0, limit = c(-1,1)
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#   labs(
#     title = "Correlation Heatmap with Labels",
#     x = "",
#     y = "",
#     fill = "Corr"
#   )

PCA on Pre-processed Variables

KW ran PCA on raw variables

# str(index)
index2 <- readr::read_csv("IndexLayer_0.csv")
Rows: 261 Columns: 41
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (41): OBJECTID, SOURCE_ID, uninsured_est, est_child_pov, disability_est,...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
class(index2)
[1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
names(index2)
 [1] "OBJECTID"                                        
 [2] "SOURCE_ID"                                       
 [3] "uninsured_est"                                   
 [4] "est_child_pov"                                   
 [5] "disability_est"                                  
 [6] "EAL_VALP_x100"                                   
 [7] "med_inc_hh_est"                                  
 [8] "persistent_poverty"                              
 [9] "est_underemp_perc"                               
[10] "eviction_filing_rate"                            
[11] "below_pov_est"                                   
[12] "Energy_Burden____income_"                        
[13] "limited_english_hh_est"                          
[14] "est_no_internet_perc"                            
[15] "est_65plus_ambulatory"                           
[16] "less_than_highschool_est"                        
[17] "low_physical_activity_est"                       
[18] "hh_support_risk_score"                           
[19] "uninsured_est (Reversed, Percentile)"            
[20] "est_child_pov (Reversed, Percentile)"            
[21] "disability_est (Reversed, Percentile)"           
[22] "EAL_VALP_x100 (Reversed, Percentile)"            
[23] "med_inc_hh_est (Percentile)"                     
[24] "persistent_poverty (Reversed, Percentile)"       
[25] "est_underemp_perc (Reversed, Percentile)"        
[26] "eviction_filing_rate (Reversed, Percentile)"     
[27] "below_pov_est (Reversed, Percentile)"            
[28] "Energy_Burden____income_ (Reversed, Percentile)" 
[29] "limited_english_hh_est (Reversed, Percentile)"   
[30] "est_no_internet_perc (Reversed, Percentile)"     
[31] "est_65plus_ambulatory (Reversed, Percentile)"    
[32] "less_than_highschool_est (Reversed, Percentile)" 
[33] "low_physical_activity_est (Reversed, Percentile)"
[34] "hh_support_risk_score (Reversed, Percentile)"    
[35] "Index - Mean (Raw)"                              
[36] "Index - Mean"                                    
[37] "Index - Mean (Rank)"                             
[38] "Index - Mean (Percentile)"                       
[39] "Index - Mean (Equal Interval Classes)"           
[40] "Index - Mean (Quantile Classes)"                 
[41] "Index - Mean (Standard Deviation Classes)"       
# identify percentile-normalized columns
rev_cols <- grep(" \\((Reversed, )?Percentile\\)$", 
                 names(index2), value = TRUE)

rev_cols
 [1] "uninsured_est (Reversed, Percentile)"            
 [2] "est_child_pov (Reversed, Percentile)"            
 [3] "disability_est (Reversed, Percentile)"           
 [4] "EAL_VALP_x100 (Reversed, Percentile)"            
 [5] "med_inc_hh_est (Percentile)"                     
 [6] "persistent_poverty (Reversed, Percentile)"       
 [7] "est_underemp_perc (Reversed, Percentile)"        
 [8] "eviction_filing_rate (Reversed, Percentile)"     
 [9] "below_pov_est (Reversed, Percentile)"            
[10] "Energy_Burden____income_ (Reversed, Percentile)" 
[11] "limited_english_hh_est (Reversed, Percentile)"   
[12] "est_no_internet_perc (Reversed, Percentile)"     
[13] "est_65plus_ambulatory (Reversed, Percentile)"    
[14] "less_than_highschool_est (Reversed, Percentile)" 
[15] "low_physical_activity_est (Reversed, Percentile)"
[16] "hh_support_risk_score (Reversed, Percentile)"    
[17] "Index - Mean (Percentile)"                       
#take out mean
rev_cols <- rev_cols[rev_cols != "Index - Mean (Percentile)"]
rev_cols
 [1] "uninsured_est (Reversed, Percentile)"            
 [2] "est_child_pov (Reversed, Percentile)"            
 [3] "disability_est (Reversed, Percentile)"           
 [4] "EAL_VALP_x100 (Reversed, Percentile)"            
 [5] "med_inc_hh_est (Percentile)"                     
 [6] "persistent_poverty (Reversed, Percentile)"       
 [7] "est_underemp_perc (Reversed, Percentile)"        
 [8] "eviction_filing_rate (Reversed, Percentile)"     
 [9] "below_pov_est (Reversed, Percentile)"            
[10] "Energy_Burden____income_ (Reversed, Percentile)" 
[11] "limited_english_hh_est (Reversed, Percentile)"   
[12] "est_no_internet_perc (Reversed, Percentile)"     
[13] "est_65plus_ambulatory (Reversed, Percentile)"    
[14] "less_than_highschool_est (Reversed, Percentile)" 
[15] "low_physical_activity_est (Reversed, Percentile)"
[16] "hh_support_risk_score (Reversed, Percentile)"    
#create \- still use across
pca_df <- index2 %>%
  dplyr::select(all_of(rev_cols)) %>%
  mutate(across(everything(), as.numeric))



#correlation matrix on normalized variables
cor_mat <- cor(pca_df, use = "pairwise.complete.obs", method = "pearson")
cor_mat
                                                 uninsured_est (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                       1.00000000
est_child_pov (Reversed, Percentile)                                       0.45904347
disability_est (Reversed, Percentile)                                      0.31797702
EAL_VALP_x100 (Reversed, Percentile)                                       0.08229485
med_inc_hh_est (Percentile)                                                0.67061602
persistent_poverty (Reversed, Percentile)                                  0.09288380
est_underemp_perc (Reversed, Percentile)                                  -0.07639159
eviction_filing_rate (Reversed, Percentile)                                0.43182597
below_pov_est (Reversed, Percentile)                                       0.55460363
Energy_Burden____income_ (Reversed, Percentile)                            0.57423148
limited_english_hh_est (Reversed, Percentile)                              0.56000811
est_no_internet_perc (Reversed, Percentile)                                0.42794426
est_65plus_ambulatory (Reversed, Percentile)                               0.37397707
less_than_highschool_est (Reversed, Percentile)                            0.70567416
low_physical_activity_est (Reversed, Percentile)                           0.72101873
hh_support_risk_score (Reversed, Percentile)                               0.14390056
                                                 est_child_pov (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                       0.45904347
est_child_pov (Reversed, Percentile)                                       1.00000000
disability_est (Reversed, Percentile)                                      0.29091928
EAL_VALP_x100 (Reversed, Percentile)                                       0.02152931
med_inc_hh_est (Percentile)                                                0.42823955
persistent_poverty (Reversed, Percentile)                                  0.12462145
est_underemp_perc (Reversed, Percentile)                                   0.01504475
eviction_filing_rate (Reversed, Percentile)                                0.28137023
below_pov_est (Reversed, Percentile)                                       0.73295177
Energy_Burden____income_ (Reversed, Percentile)                            0.44839974
limited_english_hh_est (Reversed, Percentile)                              0.32817863
est_no_internet_perc (Reversed, Percentile)                                0.37196001
est_65plus_ambulatory (Reversed, Percentile)                               0.20640518
less_than_highschool_est (Reversed, Percentile)                            0.49292832
low_physical_activity_est (Reversed, Percentile)                           0.59528158
hh_support_risk_score (Reversed, Percentile)                               0.25785645
                                                 disability_est (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                        0.31797702
est_child_pov (Reversed, Percentile)                                        0.29091928
disability_est (Reversed, Percentile)                                       1.00000000
EAL_VALP_x100 (Reversed, Percentile)                                       -0.07528297
med_inc_hh_est (Percentile)                                                 0.38623981
persistent_poverty (Reversed, Percentile)                                   0.10529111
est_underemp_perc (Reversed, Percentile)                                   -0.02436843
eviction_filing_rate (Reversed, Percentile)                                 0.31829389
below_pov_est (Reversed, Percentile)                                        0.33796562
Energy_Burden____income_ (Reversed, Percentile)                             0.39629374
limited_english_hh_est (Reversed, Percentile)                               0.24665584
est_no_internet_perc (Reversed, Percentile)                                 0.40489720
est_65plus_ambulatory (Reversed, Percentile)                                0.46994681
less_than_highschool_est (Reversed, Percentile)                             0.39850104
low_physical_activity_est (Reversed, Percentile)                            0.50611354
hh_support_risk_score (Reversed, Percentile)                                0.13731236
                                                 EAL_VALP_x100 (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                      0.082294853
est_child_pov (Reversed, Percentile)                                      0.021529314
disability_est (Reversed, Percentile)                                    -0.075282972
EAL_VALP_x100 (Reversed, Percentile)                                      1.000000000
med_inc_hh_est (Percentile)                                              -0.070970094
persistent_poverty (Reversed, Percentile)                                 0.029263232
est_underemp_perc (Reversed, Percentile)                                  0.023781615
eviction_filing_rate (Reversed, Percentile)                               0.120459000
below_pov_est (Reversed, Percentile)                                     -0.073903357
Energy_Burden____income_ (Reversed, Percentile)                           0.073521989
limited_english_hh_est (Reversed, Percentile)                             0.057549906
est_no_internet_perc (Reversed, Percentile)                              -0.002783473
est_65plus_ambulatory (Reversed, Percentile)                             -0.007243160
less_than_highschool_est (Reversed, Percentile)                           0.079839980
low_physical_activity_est (Reversed, Percentile)                          0.052752372
hh_support_risk_score (Reversed, Percentile)                             -0.016899184
                                                 med_inc_hh_est (Percentile)
uninsured_est (Reversed, Percentile)                              0.67061602
est_child_pov (Reversed, Percentile)                              0.42823955
disability_est (Reversed, Percentile)                             0.38623981
EAL_VALP_x100 (Reversed, Percentile)                             -0.07097009
med_inc_hh_est (Percentile)                                       1.00000000
persistent_poverty (Reversed, Percentile)                         0.16651190
est_underemp_perc (Reversed, Percentile)                         -0.05052851
eviction_filing_rate (Reversed, Percentile)                       0.39919715
below_pov_est (Reversed, Percentile)                              0.65133487
Energy_Burden____income_ (Reversed, Percentile)                   0.56303361
limited_english_hh_est (Reversed, Percentile)                     0.56951178
est_no_internet_perc (Reversed, Percentile)                       0.41696433
est_65plus_ambulatory (Reversed, Percentile)                      0.30403315
less_than_highschool_est (Reversed, Percentile)                   0.52852639
low_physical_activity_est (Reversed, Percentile)                  0.60120293
hh_support_risk_score (Reversed, Percentile)                      0.16529451
                                                 persistent_poverty (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                           0.092883797
est_child_pov (Reversed, Percentile)                                           0.124621446
disability_est (Reversed, Percentile)                                          0.105291109
EAL_VALP_x100 (Reversed, Percentile)                                           0.029263232
med_inc_hh_est (Percentile)                                                    0.166511905
persistent_poverty (Reversed, Percentile)                                      1.000000000
est_underemp_perc (Reversed, Percentile)                                       0.103653420
eviction_filing_rate (Reversed, Percentile)                                   -0.027385061
below_pov_est (Reversed, Percentile)                                           0.315942715
Energy_Burden____income_ (Reversed, Percentile)                                0.041816858
limited_english_hh_est (Reversed, Percentile)                                 -0.024913010
est_no_internet_perc (Reversed, Percentile)                                    0.150720963
est_65plus_ambulatory (Reversed, Percentile)                                   0.043414154
less_than_highschool_est (Reversed, Percentile)                                0.101888948
low_physical_activity_est (Reversed, Percentile)                               0.171990164
hh_support_risk_score (Reversed, Percentile)                                  -0.009067479
                                                 est_underemp_perc (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                          -0.07639159
est_child_pov (Reversed, Percentile)                                           0.01504475
disability_est (Reversed, Percentile)                                         -0.02436843
EAL_VALP_x100 (Reversed, Percentile)                                           0.02378161
med_inc_hh_est (Percentile)                                                   -0.05052851
persistent_poverty (Reversed, Percentile)                                      0.10365342
est_underemp_perc (Reversed, Percentile)                                       1.00000000
eviction_filing_rate (Reversed, Percentile)                                   -0.15795828
below_pov_est (Reversed, Percentile)                                           0.06088119
Energy_Burden____income_ (Reversed, Percentile)                                0.03687662
limited_english_hh_est (Reversed, Percentile)                                 -0.06937118
est_no_internet_perc (Reversed, Percentile)                                   -0.05527089
est_65plus_ambulatory (Reversed, Percentile)                                  -0.15946614
less_than_highschool_est (Reversed, Percentile)                               -0.10955032
low_physical_activity_est (Reversed, Percentile)                              -0.01435140
hh_support_risk_score (Reversed, Percentile)                                   0.08114248
                                                 eviction_filing_rate (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                              0.43182597
est_child_pov (Reversed, Percentile)                                              0.28137023
disability_est (Reversed, Percentile)                                             0.31829389
EAL_VALP_x100 (Reversed, Percentile)                                              0.12045900
med_inc_hh_est (Percentile)                                                       0.39919715
persistent_poverty (Reversed, Percentile)                                        -0.02738506
est_underemp_perc (Reversed, Percentile)                                         -0.15795828
eviction_filing_rate (Reversed, Percentile)                                       1.00000000
below_pov_est (Reversed, Percentile)                                              0.26378210
Energy_Burden____income_ (Reversed, Percentile)                                   0.46666053
limited_english_hh_est (Reversed, Percentile)                                     0.39198397
est_no_internet_perc (Reversed, Percentile)                                       0.25093263
est_65plus_ambulatory (Reversed, Percentile)                                      0.27515035
less_than_highschool_est (Reversed, Percentile)                                   0.53165949
low_physical_activity_est (Reversed, Percentile)                                  0.53856357
hh_support_risk_score (Reversed, Percentile)                                      0.12636878
                                                 below_pov_est (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                       0.55460363
est_child_pov (Reversed, Percentile)                                       0.73295177
disability_est (Reversed, Percentile)                                      0.33796562
EAL_VALP_x100 (Reversed, Percentile)                                      -0.07390336
med_inc_hh_est (Percentile)                                                0.65133487
persistent_poverty (Reversed, Percentile)                                  0.31594271
est_underemp_perc (Reversed, Percentile)                                   0.06088119
eviction_filing_rate (Reversed, Percentile)                                0.26378210
below_pov_est (Reversed, Percentile)                                       1.00000000
Energy_Burden____income_ (Reversed, Percentile)                            0.40592282
limited_english_hh_est (Reversed, Percentile)                              0.40601689
est_no_internet_perc (Reversed, Percentile)                                0.42250289
est_65plus_ambulatory (Reversed, Percentile)                               0.24975244
less_than_highschool_est (Reversed, Percentile)                            0.46742452
low_physical_activity_est (Reversed, Percentile)                           0.59290035
hh_support_risk_score (Reversed, Percentile)                               0.21265667
                                                 Energy_Burden____income_ (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                                  0.57423148
est_child_pov (Reversed, Percentile)                                                  0.44839974
disability_est (Reversed, Percentile)                                                 0.39629374
EAL_VALP_x100 (Reversed, Percentile)                                                  0.07352199
med_inc_hh_est (Percentile)                                                           0.56303361
persistent_poverty (Reversed, Percentile)                                             0.04181686
est_underemp_perc (Reversed, Percentile)                                              0.03687662
eviction_filing_rate (Reversed, Percentile)                                           0.46666053
below_pov_est (Reversed, Percentile)                                                  0.40592282
Energy_Burden____income_ (Reversed, Percentile)                                       1.00000000
limited_english_hh_est (Reversed, Percentile)                                         0.49066678
est_no_internet_perc (Reversed, Percentile)                                           0.36756488
est_65plus_ambulatory (Reversed, Percentile)                                          0.34306769
less_than_highschool_est (Reversed, Percentile)                                       0.61273380
low_physical_activity_est (Reversed, Percentile)                                      0.70847711
hh_support_risk_score (Reversed, Percentile)                                          0.29649531
                                                 limited_english_hh_est (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                                0.56000811
est_child_pov (Reversed, Percentile)                                                0.32817863
disability_est (Reversed, Percentile)                                               0.24665584
EAL_VALP_x100 (Reversed, Percentile)                                                0.05754991
med_inc_hh_est (Percentile)                                                         0.56951178
persistent_poverty (Reversed, Percentile)                                          -0.02491301
est_underemp_perc (Reversed, Percentile)                                           -0.06937118
eviction_filing_rate (Reversed, Percentile)                                         0.39198397
below_pov_est (Reversed, Percentile)                                                0.40601689
Energy_Burden____income_ (Reversed, Percentile)                                     0.49066678
limited_english_hh_est (Reversed, Percentile)                                       1.00000000
est_no_internet_perc (Reversed, Percentile)                                         0.37167446
est_65plus_ambulatory (Reversed, Percentile)                                        0.19546507
less_than_highschool_est (Reversed, Percentile)                                     0.59021451
low_physical_activity_est (Reversed, Percentile)                                    0.55652765
hh_support_risk_score (Reversed, Percentile)                                        0.15658170
                                                 est_no_internet_perc (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                             0.427944264
est_child_pov (Reversed, Percentile)                                             0.371960006
disability_est (Reversed, Percentile)                                            0.404897199
EAL_VALP_x100 (Reversed, Percentile)                                            -0.002783473
med_inc_hh_est (Percentile)                                                      0.416964327
persistent_poverty (Reversed, Percentile)                                        0.150720963
est_underemp_perc (Reversed, Percentile)                                        -0.055270886
eviction_filing_rate (Reversed, Percentile)                                      0.250932627
below_pov_est (Reversed, Percentile)                                             0.422502888
Energy_Burden____income_ (Reversed, Percentile)                                  0.367564883
limited_english_hh_est (Reversed, Percentile)                                    0.371674460
est_no_internet_perc (Reversed, Percentile)                                      1.000000000
est_65plus_ambulatory (Reversed, Percentile)                                     0.234683700
less_than_highschool_est (Reversed, Percentile)                                  0.532183618
low_physical_activity_est (Reversed, Percentile)                                 0.496388433
hh_support_risk_score (Reversed, Percentile)                                     0.193434409
                                                 est_65plus_ambulatory (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                               0.37397707
est_child_pov (Reversed, Percentile)                                               0.20640518
disability_est (Reversed, Percentile)                                              0.46994681
EAL_VALP_x100 (Reversed, Percentile)                                              -0.00724316
med_inc_hh_est (Percentile)                                                        0.30403315
persistent_poverty (Reversed, Percentile)                                          0.04341415
est_underemp_perc (Reversed, Percentile)                                          -0.15946614
eviction_filing_rate (Reversed, Percentile)                                        0.27515035
below_pov_est (Reversed, Percentile)                                               0.24975244
Energy_Burden____income_ (Reversed, Percentile)                                    0.34306769
limited_english_hh_est (Reversed, Percentile)                                      0.19546507
est_no_internet_perc (Reversed, Percentile)                                        0.23468370
est_65plus_ambulatory (Reversed, Percentile)                                       1.00000000
less_than_highschool_est (Reversed, Percentile)                                    0.42572679
low_physical_activity_est (Reversed, Percentile)                                   0.41203998
hh_support_risk_score (Reversed, Percentile)                                       0.14224776
                                                 less_than_highschool_est (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                                  0.70567416
est_child_pov (Reversed, Percentile)                                                  0.49292832
disability_est (Reversed, Percentile)                                                 0.39850104
EAL_VALP_x100 (Reversed, Percentile)                                                  0.07983998
med_inc_hh_est (Percentile)                                                           0.52852639
persistent_poverty (Reversed, Percentile)                                             0.10188895
est_underemp_perc (Reversed, Percentile)                                             -0.10955032
eviction_filing_rate (Reversed, Percentile)                                           0.53165949
below_pov_est (Reversed, Percentile)                                                  0.46742452
Energy_Burden____income_ (Reversed, Percentile)                                       0.61273380
limited_english_hh_est (Reversed, Percentile)                                         0.59021451
est_no_internet_perc (Reversed, Percentile)                                           0.53218362
est_65plus_ambulatory (Reversed, Percentile)                                          0.42572679
less_than_highschool_est (Reversed, Percentile)                                       1.00000000
low_physical_activity_est (Reversed, Percentile)                                      0.83289599
hh_support_risk_score (Reversed, Percentile)                                          0.18663194
                                                 low_physical_activity_est (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                                   0.72101873
est_child_pov (Reversed, Percentile)                                                   0.59528158
disability_est (Reversed, Percentile)                                                  0.50611354
EAL_VALP_x100 (Reversed, Percentile)                                                   0.05275237
med_inc_hh_est (Percentile)                                                            0.60120293
persistent_poverty (Reversed, Percentile)                                              0.17199016
est_underemp_perc (Reversed, Percentile)                                              -0.01435140
eviction_filing_rate (Reversed, Percentile)                                            0.53856357
below_pov_est (Reversed, Percentile)                                                   0.59290035
Energy_Burden____income_ (Reversed, Percentile)                                        0.70847711
limited_english_hh_est (Reversed, Percentile)                                          0.55652765
est_no_internet_perc (Reversed, Percentile)                                            0.49638843
est_65plus_ambulatory (Reversed, Percentile)                                           0.41203998
less_than_highschool_est (Reversed, Percentile)                                        0.83289599
low_physical_activity_est (Reversed, Percentile)                                       1.00000000
hh_support_risk_score (Reversed, Percentile)                                           0.24557865
                                                 hh_support_risk_score (Reversed, Percentile)
uninsured_est (Reversed, Percentile)                                              0.143900555
est_child_pov (Reversed, Percentile)                                              0.257856448
disability_est (Reversed, Percentile)                                             0.137312357
EAL_VALP_x100 (Reversed, Percentile)                                             -0.016899184
med_inc_hh_est (Percentile)                                                       0.165294509
persistent_poverty (Reversed, Percentile)                                        -0.009067479
est_underemp_perc (Reversed, Percentile)                                          0.081142478
eviction_filing_rate (Reversed, Percentile)                                       0.126368777
below_pov_est (Reversed, Percentile)                                              0.212656670
Energy_Burden____income_ (Reversed, Percentile)                                   0.296495307
limited_english_hh_est (Reversed, Percentile)                                     0.156581704
est_no_internet_perc (Reversed, Percentile)                                       0.193434409
est_65plus_ambulatory (Reversed, Percentile)                                      0.142247761
less_than_highschool_est (Reversed, Percentile)                                   0.186631945
low_physical_activity_est (Reversed, Percentile)                                  0.245578647
hh_support_risk_score (Reversed, Percentile)                                      1.000000000
library(ggplot2)
library(tidyr)
library(dplyr)

cor_long <- as.data.frame(cor_mat) %>%
  tibble::rownames_to_column("var1") %>%
  pivot_longer(-var1, names_to = "var2", values_to = "r")

ggplot(cor_long, aes(var1, var2, fill = r)) +
  geom_tile() +
  scale_fill_gradient2(limits = c(-1,1)) +
  theme_minimal() +
  coord_fixed() +
  theme(axis.text.x = element_text(angle=45, hjust=1))

# No correlations > |0.7| → no multicollinearity

PCA and Testing

A good reference here

#Run PCA
pca <- prcomp(pca_df, center = TRUE, scale. = TRUE)
summary(pca)
Importance of components:
                          PC1     PC2     PC3     PC4     PC5     PC6     PC7
Standard deviation     2.4987 1.18613 1.07638 1.02974 1.00024 0.91754 0.85057
Proportion of Variance 0.3902 0.08793 0.07241 0.06627 0.06253 0.05262 0.04522
Cumulative Proportion  0.3902 0.47815 0.55056 0.61683 0.67936 0.73198 0.77720
                           PC8     PC9    PC10    PC11    PC12    PC13    PC14
Standard deviation     0.83055 0.79625 0.73378 0.66626 0.65063 0.58665 0.45483
Proportion of Variance 0.04311 0.03963 0.03365 0.02774 0.02646 0.02151 0.01293
Cumulative Proportion  0.82031 0.85994 0.89359 0.92133 0.94779 0.96930 0.98223
                         PC15    PC16
Standard deviation     0.4021 0.35023
Proportion of Variance 0.0101 0.00767
Cumulative Proportion  0.9923 1.00000
loadings <- as.data.frame(pca$rotation) %>%
  tibble::rownames_to_column("variable")
loadings
                                           variable         PC1         PC2
1              uninsured_est (Reversed, Percentile)  0.32449936 -0.06850199
2              est_child_pov (Reversed, Percentile)  0.26873488  0.27721111
3             disability_est (Reversed, Percentile)  0.22849269 -0.03137502
4              EAL_VALP_x100 (Reversed, Percentile)  0.01558623 -0.18288076
5                       med_inc_hh_est (Percentile)  0.30955086  0.08060139
6         persistent_poverty (Reversed, Percentile)  0.07052055  0.49620772
7          est_underemp_perc (Reversed, Percentile) -0.02661286  0.48451873
8       eviction_filing_rate (Reversed, Percentile)  0.23804187 -0.33851621
9              below_pov_est (Reversed, Percentile)  0.29075455  0.39487274
10  Energy_Burden____income_ (Reversed, Percentile)  0.30439454 -0.07412874
11    limited_english_hh_est (Reversed, Percentile)  0.26998581 -0.14669103
12      est_no_internet_perc (Reversed, Percentile)  0.24749143  0.08227005
13     est_65plus_ambulatory (Reversed, Percentile)  0.20015641 -0.22235009
14  less_than_highschool_est (Reversed, Percentile)  0.33880886 -0.14888920
15 low_physical_activity_est (Reversed, Percentile)  0.36109548 -0.01236693
16     hh_support_risk_score (Reversed, Percentile)  0.12361333  0.14315980
           PC3          PC4         PC5          PC6         PC7          PC8
1  -0.10928824 -0.131924812 -0.13022695  0.082846170 -0.06328888  0.055981702
2  -0.07702109  0.056669663 -0.11905520 -0.347403434 -0.34134950 -0.450581677
3   0.42587888  0.088158677  0.37333466  0.213142645  0.08365028 -0.232990627
4  -0.63249041 -0.266171885  0.51444119 -0.243148503  0.01211809 -0.194787087
5   0.03590283 -0.093940488 -0.25397999  0.145399741 -0.04895117  0.207760915
6   0.10046013 -0.509264106  0.30898371 -0.110709924  0.09506278  0.530456318
7  -0.32065698  0.290649220  0.25176761  0.639192026 -0.02664965 -0.112849808
8  -0.11869234 -0.018025176  0.05968208  0.047825016 -0.17419282  0.213315380
9   0.03938396 -0.103161488 -0.18226112 -0.169539144 -0.24368150 -0.158277940
10 -0.14613761  0.194430383  0.07977947  0.159299142 -0.07947654  0.153987775
11 -0.21696740 -0.010645770 -0.31458463  0.173440063  0.24297306  0.139890088
12  0.12201857 -0.039248819  0.03808748 -0.116638083  0.76299349 -0.320319746
13  0.41613740  0.047864061  0.41431719  0.001602407 -0.29514744  0.013994074
14 -0.06641580 -0.065523280  0.02559762  0.025668452  0.11732655 -0.010499594
15 -0.05142764 -0.005542484  0.06935934  0.060897788 -0.04286444 -0.006585638
16 -0.08555697  0.700766788  0.14418712 -0.470605271  0.14225779  0.384110613
           PC9        PC10        PC11          PC12        PC13         PC14
1   0.28799957  0.16192307  0.07546236  3.078928e-01  0.53050245  0.575896309
2  -0.15528682  0.08466952 -0.08229062 -1.664042e-01 -0.14619922  0.138690837
3  -0.18252132 -0.47300402 -0.26485129 -1.996232e-01  0.34289956  0.088851410
4   0.19547188 -0.28829778 -0.02222927  3.999708e-02  0.02259895 -0.091741126
5   0.19943698 -0.45683566  0.04165428  3.800331e-01 -0.03252702 -0.469836149
6  -0.10884481  0.08048530 -0.07116560 -1.437175e-01 -0.06111391  0.123758674
7   0.01421610  0.09068345  0.27786289 -2.765131e-03 -0.02192328 -0.005970361
8  -0.68191521 -0.11958981  0.46701073  7.899189e-02 -0.10593117  0.132449002
9   0.04188610 -0.20107558  0.19021458  1.537889e-05 -0.03602821 -0.054094335
10 -0.05040452  0.08387809 -0.62216637  2.824208e-01 -0.46783612  0.173967943
11  0.23604446 -0.21786821  0.04836016 -6.752615e-01 -0.21348453  0.201557353
12 -0.05806976  0.08043636  0.20673012  2.865727e-01 -0.25393405  0.092914530
13  0.46992493  0.16052536  0.32864649 -4.661273e-02 -0.33518063  0.023501093
14 -0.03191610  0.46312652  0.01073853 -1.843861e-01  0.20003766 -0.477870532
15 -0.11864386  0.26987751 -0.17560339 -1.001222e-01  0.21394569 -0.260083604
16  0.07264724 -0.08700434  0.08647488 -2.600699e-02  0.18505799 -0.012293918
           PC15         PC16
1  -0.067782138 -0.048563490
2  -0.528609460  0.040036128
3  -0.032237850 -0.134219740
4   0.034826053  0.001241791
5  -0.365696601  0.078663520
6  -0.133968281 -0.013082791
7  -0.073680168 -0.015835749
8  -0.008630850 -0.012117410
9   0.688546090 -0.210383798
10  0.153112235 -0.178640069
11 -0.011655823  0.053981292
12  0.004275305  0.088850858
13 -0.030410760  0.073926532
14 -0.075940411 -0.561592136
15  0.228003199  0.750472297
16 -0.013049707 -0.014963412
plot(pca, type = "l")

library(factoextra)
Warning: package 'factoextra' was built under R version 4.4.3
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#scree plot (fancier)
fviz_eig(pca, addlabels = TRUE, barfill = "steelblue", barcolor = "black") +
  ggtitle("Scree Plot: Variance Explained by Principal Components")
Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
Ignoring empty aesthetic: `width`.

PCA1 explains 39% of all variation in the normalized indicators. PCA2 explains 8.8%, and PCSs after that each explain 3.7%. After PCA1, the drop off in variance is steep, meaning that most of the shared structure among the variables is captures in one dominant dimension (here we are not referring to the dimensions of the index, fyi). The indicators share a general vulnerability disadvantage direction.

They are not perfectly collinear (otherwise one PC would explain >80%) but they are co-mocing in the expected direction.

The steep elbow suggests the dataset behaves exactly like a typical SES index with one strong unifying dimension and many smaller domain specific nuances.

#loading plot 
fviz_pca_var(
  pca,
  col.var = "cos2",
  gradient.cols = c("darkorchid4", "gold", "darkorange"),
  repel = TRUE
) +
  ggtitle("PCA Variable Loadings (Correlation Circle)")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
ℹ The deprecated feature was likely used in the ggpubr package.
  Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
ℹ The deprecated feature was likely used in the factoextra package.
  Please report the issue at <https://github.com/kassambara/factoextra/issues>.

#the fact they don’t collapse into a single arrow means they are not multicollinear...This plot shows that the indicators all point in a similar direction—which means the index is measuring a coherent construct. However, no two indicators overlap, which confirms that none are redundant or duplicative.


#contributions of PCA1 and 2
library(gridExtra)
Warning: package 'gridExtra' was built under R version 4.4.3

Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':

    combine
p1 <- fviz_contrib(pca, choice = "var", axes = 1) +
  ggtitle("Contribution of Variables to PC1")

p2 <- fviz_contrib(pca, choice = "var", axes = 2) +
  ggtitle("Contribution of Variables to PC2")

grid.arrange(p1, p2, ncol = 2)

#biplot - variables and observations
fviz_pca_biplot(
  pca,
  repel = TRUE,
  col.var = "firebrick",
  col.ind = "grey60",
  alpha.ind = 0.6
) +
  ggtitle("PCA Biplot: Variables and Observations")

library(corrplot)
Warning: package 'corrplot' was built under R version 4.4.3
corrplot 0.95 loaded
corr_matrix <- cor(pca_df, use = "pairwise.complete.obs")
corrplot(
  corr_matrix,
  method = "color",
  type = "full",
  addCoef.col = "black",
  tl.col = "black",
  tl.cex = 0.7,
  number.cex = 0.6,
  col = colorRampPalette(c("navy", "white", "firebrick"))(200)
)

fviz_cos2(pca, choice = "var", axes = 1:2) +
  ggtitle("Variable Representation Quality (cos²) on PC1–PC2")

Biplot - Arrows = variables and points = census tracts