Attaching package: 'janitor'
The following objects are masked from 'package:stats':
chisq.test, fisher.test
library(car)
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
Rows: 3143 Columns: 126
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (65): StateAbbr, StateDesc, CountyName, CountyFIPS, ACCESS2_Crude95CI, A...
dbl (61): TotalPopulation, ACCESS2_CrudePrev, ACCESS2_AdjPrev, ARTHRITIS_Cru...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(pacman)options(tigris_class ="sf")# Load variables for 2010 censusV10 <-load_variables(2010, "pl", cache =TRUE)# Load variables for 2020 censusv20 <-load_variables(2020, "pl", cache =TRUE)# 2010 Total Population variables = P001001# 2020 Total Population variables = P1_001N# 2010 Total Hisp/Latino Population = P002002# 2020 Total Hisp/Latino Population = P2_002N# 2010 Total Population for US countiesus_2010 <-get_decennial(geography ="county",variables ="P001001",year =2010)
Getting data from the 2010 decennial Census
Using Census Summary File 1
# 2020 Total Population for US countiesus_2020 <-get_decennial(geography ="county",variables ="P1_001N",year =2020)
Getting data from the 2020 decennial Census
Using the PL 94-171 Redistricting Data summary file
Note: 2020 decennial Census data use differential privacy, a technique that
introduces errors into data to preserve respondent confidentiality.
ℹ Small counts should be interpreted with caution.
ℹ See https://www.census.gov/library/fact-sheets/2021/protecting-the-confidentiality-of-the-2020-census-redistricting-data.html for additional guidance.
This message is displayed once per session.
# Merge data for 2010 and 2020 using GEOIDus_pop <-inner_join(us_2010,us_2020, by =c("GEOID"))# Order by GEOIDus_total <- us_pop[order(us_pop$GEOID),]# Rename variablesus_percentchg <- us_total %>%rename(county_2010 = NAME.x, county_2020 = NAME.y,variable_2010 = variable.x, variable_2020 = variable.y,total_2010 = value.x, total_2020 = value.y)# Calculate percent change from 2010 to 2020us_percentchg <-mutate(us_percentchg, percent_change = (total_2020 - total_2010) / total_2010)# Format number as percentage with scales package#us_percentchg$percent_change <- percent(us_percentchg$percent_change, accuracy=0.01)# 2010 Hispanic Population for US countiesus_2010_h <-get_decennial(geography ="county",variables ="P002002",year =2010)
Getting data from the 2010 decennial Census
Using Census Summary File 1
# 2020 Hispanic Population for US countiesus_2020_h <-get_decennial(geography ="county",variables ="P2_002N",year =2020)
Getting data from the 2020 decennial Census
Using the PL 94-171 Redistricting Data summary file
# Merge data for 2010 and 2020 using GEOIDus_pop_h <-inner_join(us_2010_h,us_2020_h, by =c("GEOID"))# Order by GEOIDus_total_h <- us_pop_h[order(us_pop$GEOID),]# Rename variablesus_percentchg_h <- us_total_h %>%rename(county_2010 = NAME.x, county_2020 = NAME.y,variable_2010 = variable.x, variable_2020 = variable.y,hisp_pop_2010 = value.x, hisp_pop_2020 = value.y)# Calculate percent change from 2010 to 2020us_percentchg_h <-mutate(us_percentchg_h, percent_change_h = (hisp_pop_2020 - hisp_pop_2010) / hisp_pop_2010)
Loading ACS data and merging with previous data sources
# I want to include SNAP benefits data but they're not available at county leveldat$prop_pov <- (dat$B06012_002E / dat$B06012_001E) *100dat$prop_lths <- (dat$B06009_002E / dat$B06009_001E) *100dat$prop_hs <- (dat$B06009_003E / dat$B06009_001E) *100dat$prop_sm_col <- (dat$B06009_004E / dat$B06009_001E) *100dat$prop_bach <- (dat$B06009_005E / dat$B06009_001E) *100dat$prop_grad_dg <- (dat$B06009_006E / dat$B06009_001E) *100dat$med_hh_inc <- dat$B07411_001Edat$med_age <- dat$B07402_001E# B06012_001 = total estimate pop# B06012_002 = est pop living under 100% pov# B06009_001 = tot est pop# B06009_002 = est pop less than hs# B06009_003 = est pop hs grad# B06009_004 = est pop some coll# B06009_005 = est pop bach deg# B06009_006 = est pop grad deg# B07411_001 = est median hh income# B07402_001 = est median agedog <-merge(dat, us_percentchg, by ="GEOID")us_counties <-merge(dog, plcs, by ="GEOID")data_final <-merge(us_counties, us_percentchg_h, by ="GEOID")data_final$hisp_prop_20 <- (data_final$hisp_pop_2020/data_final$total_2020) *100data_final$hisp_prop_10 <- (data_final$hisp_pop_2010/data_final$total_2010) *100data_final$hisp_maj <-if_else(data_final$hisp_prop_20>=50, "hisp_majority", "hisp_non_majority")tabyl(data_final$hisp_maj)
data_final$hisp_maj n percent
hisp_majority 101 0.03217585
hisp_non_majority 3038 0.96782415
Characterizing counties by Hispanic population growth; established, high growth, and other
# adding the extra 2 counties to the established groupdata_final$hisp_cat <- car::recode(data_final$hisp_check, "'0 0 1' = 'other'; '0 1 0' = 'high growth'; '1 0 0' = 'established'; '1 1 0' = 'established'")tabyl(data_final$hisp_cat)
data_final$hisp_cat n percent
established 711 0.2265053
high growth 718 0.2287353
other 1710 0.5447595
Food insecurity data
library(readr)food_acc <-read_csv("C:/Users/maman/OneDrive - University of Texas at San Antonio/MHM Fellowship/2019 Food Access Research Atlas Data/Food Access Research Atlas.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
Rows: 72531 Columns: 147
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (113): State, County, MedianFamilyIncome, LAPOP1_10, LAPOP05_10, LAPOP1_...
dbl (34): CensusTract, Urban, Pop2010, OHU2010, GroupQuartersFlag, NUMGQTRS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# food_acc is by census tract, might use laterlibrary(readxl)mg_data <-read_excel("C:/Users/maman/OneDrive - University of Texas at San Antonio/MHM Fellowship/Food Security Data/meal_gap_data_2022.xlsx")mg_2020 <- mg_data %>%filter(Year ==2020)# merging food insecurity data with the restdata_final$FIPS <-substr(data_final$GEOID, 2,5)data_final1 <-merge(data_final,mg_2020, by ="FIPS")# having an issue merging these data with the final data
Creating my descriptive table
# subsetting to remove Hawaii and Alaskadata_final <- data_final[-(545:549),] #deleting rows by rangedata_final <- data_final[-(68:94),] table_data <- data_final %>%st_drop_geometry()library(gtsummary)table1 <- table_data %>%select(hisp_cat, percent_change, med_age, uninsured, ann_chck, diabetes, obesity, prop_pov, prop_lths, prop_hs, prop_bach, prop_grad_dg) %>%tbl_summary(by = hisp_cat,label =list(percent_change ~"percent population change", med_age ~"median age", uninsured ~"uninsured rate", ann_chck ~"annual check-up rate", diabetes ~"diabetes rate", obesity ~"obesity rate", prop_pov ~"proportion under poverty threshold", prop_lths ~"proportion with less than hs educ", prop_hs ~"proportion with hs educ", prop_bach ~"proportion with bachelor's degree", prop_grad_dg ~"proportion with grad degree"),percent ="column",missing ="always",missing_text ="Missing" )table1
Characteristic
established, N = 7081
high growth, N = 7021
other, N = 1,6971
percent population change
0.05 (-0.01, 0.11)
-0.05 (-0.09, 0.00)
0.00 (-0.05, 0.05)
Missing
0
0
0
median age
38.7 (35.9, 41.6)
45.2 (41.8, 49.0)
41.8 (39.4, 44.3)
Missing
0
2
0
uninsured rate
18.5 (13.8, 25.4)
14.0 (11.7, 18.4)
13.3 (10.9, 17.2)
Missing
0
0
0
annual check-up rate
72.6 (69.6, 75.9)
75.9 (73.3, 78.0)
76.9 (75.0, 78.6)
Missing
0
0
0
diabetes rate
12.00 (10.50, 13.80)
13.40 (11.80, 15.17)
12.20 (10.80, 14.20)
Missing
0
0
0
obesity rate
35.1 (30.9, 37.9)
36.6 (34.1, 39.3)
36.9 (34.0, 39.3)
Missing
0
0
0
proportion under poverty threshold
13.5 (10.7, 16.6)
13.5 (9.9, 18.9)
13.8 (10.4, 17.8)
Missing
0
0
0
proportion with less than hs educ
12.9 (9.6, 18.2)
11.4 (7.4, 17.4)
10.4 (7.6, 14.4)
Missing
0
0
0
proportion with hs educ
30 (25, 34)
36 (31, 41)
35 (31, 40)
Missing
0
0
0
proportion with bachelor's degree
14.8 (11.6, 20.2)
13.0 (9.4, 16.8)
13.4 (10.4, 17.5)
Missing
0
0
0
proportion with grad degree
7.5 (5.5, 11.6)
5.4 (4.2, 6.9)
7.0 (5.4, 10.0)
Missing
0
0
0
1 Median (IQR)
# Still want to include variables for SNAP benefits and food insecurity (ex: prop of the pop living greater than 1 mile from a supermarket)
Making maps showing pop change, obesity rates, and Hispanic growth
library(tmap)tmap_mode("plot")
tmap mode set to plotting
map_1 <-tm_shape(data_final)+tm_polygons(c("percent_change"), title=c("% pop change"), palette="RdBu", breaks=c(-.5, -.25, -.1, -.05, -.01, .01, .05, .1, .25, .5),style="fixed",midpoint=0, border.alpha =0)+tmap_options(max.categories =9)+tm_format("World", legend.outside=T, title.size =4)+tm_layout(main.title="2010-2020 Percentage Population Change by County", title.size =8, bg.color ="grey85", legend.frame =TRUE, title.position =c('right', 'top'))+tm_credits("Data source: 2010 and 2020 US Decennial Census", position =c("left","bottom"), size =1)map_1
Warning: Values have found that are higher than the highest break
Warning: `qplot()` was deprecated in ggplot2 3.4.0.
library(spdep)
Loading required package: sp
Loading required package: spData
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
# testing for autocorrelation using Moran's Imoran.test(data_final$obesity, listw=q_list,zero.policy =TRUE)
Moran I test under randomisation
data: data_final$obesity
weights: q_list n reduced by no-neighbour observations
Moran I statistic standard deviate = 58.32, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
0.6275177511 -0.0003222688 0.0001158946
# Moran's I value of 0.62 suggests counties with high obesity rates are clustered around each other.
Calculating local Moran’s I and plotting Local Indicator of Spatial Autocorrelation (LISA) map
locali<-localmoran(data_final$obesity, listw = q_list, zero.policy =TRUE, alternative ="two.sided" )data_final$locali<-locali[,1]data_final$localp<-locali[,5]data_final$sobese<-scale(data_final$obesity)data_final$lag_obese<-lag.listw(var=data_final$sobese, x = q_list)
Warning in lag.listw(var = data_final$sobese, x = q_list): NAs in lagged values
data_final$quad_sig <-NAdata_final$quad_sig[(data_final$sobese >=0& data_final$lag_obese >=0) & (data_final$localp <=0.05)] <-"H-H"#high highdata_final$quad_sig[(data_final$sobese <=0& data_final$lag_obese <=0) & (data_final$localp <=0.05)] <-"L-L"#low lowdata_final$quad_sig[(data_final$sobese >=0& data_final$lag_obese <=0) & (data_final$localp <=0.05)] <-"H-L"#high lowdata_final$quad_sig[(data_final$sobese <=0& data_final$lag_obese >=0) & (data_final$localp <=0.05)] <-"L-H"#low high#WE ASSIGN A # Set the breaks for the thematic map classesbreaks <-seq(1, 5, 1)# Set the corresponding labels for the thematic map classeslabels <-c("High-High", "Low-Low", "High-Low", "Low-High", "Not Clustered")# see ?findInterval - This is necessary for making a mapnp <-findInterval(data_final$quad_sig, breaks)
Warning in findInterval(data_final$quad_sig, breaks): NAs introduced by coercion
# Assign colors to each map classcolors <-c("red", "blue", "lightpink", "skyblue2", "white")