#Loading libraries
Code
# Loading libraries
librarian:: shelf (knitr, car, foreign, dplyr, janitor, ggplot2, ggpubr, usmapdata, usmap, gtsummary, gt, lme4, emmeans, summarytools, data.table, cobalt, WeightIt, margins, marginaleffects, readxl, nlme, multilevel, MASS, survey)
The 'cran_repo' argument in shelf() was not set, so it will use
cran_repo = 'https://cran.r-project.org' by default.
To avoid this message, set the 'cran_repo' argument to a CRAN
mirror URL (see https://cran.r-project.org/mirrors.html) or set
'quiet = TRUE'.
Code
opts_chunk$ set (echo = TRUE )
setwd ("/Users/briansurratt/Library/CloudStorage/OneDrive-UniversityofTexasatSanAntonio/PAA 2025/R files" )
options (scipen = 999 ) # This turns off scientific notation.
I’m going to add libraries as needed.
library(stargazer) library(webshot2)
Reading in the data
Importing the data for Household Pulse Survey phase 3.2 to 3.4.
Code
alldata <- read.csv ("./data/alldata.csv" )
nrow (alldata)
Selecting variables from alldata for a working dataframe titled ‘allhh’. Remember that rent amount was not in the initial survey and isn’t in every wave. Started asking phase 3.5, week 46 (I think).
Code
allhh <- alldata %>%
dplyr:: select (week, income, tenure, evict, rentcur, anxious, worry, interest, down, tbirth_year, genid_describe, rhispanic, rrace, ms, thhld_numadlt, thhld_numper, thhld_numkid, eeduc, hweight, pweight, est_st, state, abbv, score, est_msa, wrklossrv, anywork)
nrow (allhh)
Cleaning and exploring the data
Check distribution of tenure and clear NAs.
Check the distribution of tenure.
TENURE 1) Owned by you or someone in this household free and clear? 2) Owned by your or someone in this household with a mortgage or loan (including home equity loans)? 3) Rented? 4) Occupied without payment of rent? -99) Question seen but category not selected -88) Missing / Did not report
Code
allhh$tenure n percent
-99 6498 0.008083045
-88 117839 0.146583241
1 194842 0.242369434
2 316637 0.393873654
3 159649 0.198591873
4 8440 0.010498753
Code
# Remove NAs
allhh <- allhh %>%
filter (tenure %in% c (1 : 4 ))
tabyl (allhh$ tenure)
allhh$tenure n percent
1 194842 0.28671450
2 316637 0.46593866
3 159649 0.23492719
4 8440 0.01241965
Code
15% of the respondents did not report on housing tenure!
Create a dichotomous variable ‘renting’
Code
allhh <- allhh %>%
mutate (renting = case_when (tenure == 3 ~ 1 ,
TRUE ~ 0 ))
tabyl (allhh$ renting)
allhh$renting n percent
0 519919 0.7650728
1 159649 0.2349272
Code
Create a dichotomous variable ‘owner’
Code
allhh <- allhh %>%
mutate (owner = case_when (tenure %in% c (1 : 2 ) ~ 1 ,
TRUE ~ 0 ))
tabyl (allhh$ owner)
allhh$owner n percent
0 168089 0.2473468
1 511479 0.7526532
Code
Clean and recode ‘evict’
Checking distribution of evict.
EVICT 1) Very likely 2) Somewhat likely 3) Not very likely 4) Not likely at all -99) Question seen but category not selected -88) Missing / Did not report
Only asked if rentcur = 2 which is “no”.
Code
allhh$evict n percent
-99 143 0.0002104278
-88 661780 0.9738245474
1 2509 0.0036920514
2 4986 0.0073370141
3 5275 0.0077622843
4 4875 0.0071736750
This needs to be recoded so that 0 is current on rent (not at risk of eviction due to late on rent), 1 is not likely at all, 2 is not very likely, 3 is somewhat likely, and 4 is very likely.
Code ‘evictrisk’ = 0 when rentcur = 1. Change other codes as well.
Code
allhh <- allhh %>%
mutate (evictrisk = case_when (tenure != 3 ~ 0 , # Homeowners are not at risk of eviction.
rentcur == 1 ~ 0 , # Current renters not at risk.
evict == 4 ~ 1 , # Late renters not likely at all to be evicted.
evict == 3 ~ 2 , # Late renters not very likely to be evicted.
evict == 2 ~ 3 , # Late renters somewhat likely to be evicted.
evict == 1 ~ 4 # Late renters very likely to be evicted.
)
)
tabyl (allhh$ evictrisk)
allhh$evictrisk n percent valid_percent
0 660858 0.972467803 0.973994220
1 4875 0.007173675 0.007184935
2 5275 0.007762284 0.007774468
3 4986 0.007337014 0.007348531
4 2509 0.003692051 0.003697847
NA 1065 0.001567172 NA
Code
Filer out NAs.
Code
allhh <- allhh %>%
filter (evictrisk %in% c (0 : 4 ))
tabyl (allhh$ evictrisk)
allhh$evictrisk n percent
0 660858 0.973994220
1 4875 0.007184935
2 5275 0.007774468
3 4986 0.007348531
4 2509 0.003697847
Code
Create a dichotomous variable called “highrisk” for renters who are very or somewhat likely to be evicted in the next 2 months.
Code
allhh <- allhh %>%
mutate (highrisk = case_when (evictrisk %in% c (3 : 4 ) ~ 1 ,
TRUE ~ 0 ))
tabyl (allhh$ highrisk)
allhh$highrisk n percent
0 671008 0.98895362
1 7495 0.01104638
Code
Creating dummy variables for eviction risk
Code
allhh$ evict0 <- car:: recode (allhh$ evictrisk, "0=1; else=0" )
tabyl (allhh$ evict0)
allhh$evict0 n percent
0 17645 0.02600578
1 660858 0.97399422
Code
allhh$ evict1 <- car:: recode (allhh$ evictrisk, "1=1; else=0" )
tabyl (allhh$ evict1)
allhh$evict1 n percent
0 673628 0.992815065
1 4875 0.007184935
Code
allhh$ evict2 <- car:: recode (allhh$ evictrisk, "2=1; else=0" )
tabyl (allhh$ evict2)
allhh$evict2 n percent
0 673228 0.992225532
1 5275 0.007774468
Code
allhh$ evict3 <- car:: recode (allhh$ evictrisk, "3=1; else=0" )
tabyl (allhh$ evict3)
allhh$evict3 n percent
0 673517 0.992651469
1 4986 0.007348531
Code
allhh$ evict4 <- car:: recode (allhh$ evictrisk, "4=1; else=0" )
tabyl (allhh$ evict4)
allhh$evict4 n percent
0 675994 0.996302153
1 2509 0.003697847
Check distribution of state policy variable
Code
score n percent
0.00 69106 0.101850692
0.23 17733 0.026135478
0.30 5200 0.007663931
0.38 19289 0.028428762
0.53 11170 0.016462713
0.60 9545 0.014067734
0.63 6668 0.009827517
0.75 5471 0.008063339
0.78 17846 0.026302021
0.83 26963 0.039738955
1.03 12318 0.018154673
1.15 12241 0.018041188
1.23 11491 0.016935813
1.28 50548 0.074499302
1.33 52349 0.077153675
1.43 12465 0.018371326
1.58 31366 0.046228241
1.84 12119 0.017861380
1.98 6508 0.009591704
2.08 12776 0.018829688
2.29 7469 0.011008057
2.30 9824 0.014478934
2.48 7584 0.011177548
2.65 9019 0.013292498
2.73 18098 0.026673427
2.78 10430 0.015372076
3.00 11432 0.016848857
3.08 14155 0.020862104
3.13 17260 0.025438355
3.15 7190 0.010596858
3.25 12727 0.018757470
3.33 18639 0.027470770
3.38 17830 0.026278439
3.73 11128 0.016400812
3.75 27522 0.040562827
3.78 12054 0.017765581
3.80 14936 0.022013167
3.88 7970 0.011746448
4.03 19823 0.029215788
4.30 11395 0.016794325
4.63 8846 0.013037525
Clean up covariates
Clean income
INCOME: Total household income (before taxes). 1) Less than $25,000
2) $25,000 - $34,999
3) $35,000 - $49,999
4) $50,000 - $74,999
5) $75,000 - $99,999
6) $100,000 - $149,999
7) $150,000 - $199,999 8) $200,000 and above -99) Question seen but category not selected -88) Missing / Did not report
Check distribution of income.
Code
allhh$income n percent
-99 17392 0.02563290
-88 19977 0.02944276
1 70996 0.10463624
2 56929 0.08390383
3 69383 0.10225894
4 109065 0.16074358
5 91249 0.13448577
6 116218 0.17128590
7 56936 0.08391415
8 70358 0.10369593
Remove NAs
Code
allhh <- allhh %>%
filter (.$ income %in% c (1 : 8 ))
tabyl (allhh$ income)
allhh$income n percent
1 70996 0.11073504
2 56929 0.08879423
3 69383 0.10821919
4 109065 0.17011264
5 91249 0.14232438
6 116218 0.18126944
7 56936 0.08880515
8 70358 0.10973993
Code
Histogram of income levels.
Create a dichotomous variable of less than $75,000 income, ‘less75k’.
Code
allhh$income n percent
1 70996 0.11073504
2 56929 0.08879423
3 69383 0.10821919
4 109065 0.17011264
5 91249 0.14232438
6 116218 0.18126944
7 56936 0.08880515
8 70358 0.10973993
Code
allhh <- allhh %>%
mutate (less75k = case_when (income %in% c (1 ,2 ,3 ,4 ) ~ 1 ,
TRUE ~ 0
)
)
tabyl (allhh$ less75k)
allhh$less75k n percent
0 334761 0.5221389
1 306373 0.4778611
Code
Recode income with 3 levels
Code
allhh <- allhh %>%
mutate (inclvl = case_when (income == 1 ~ "lessthan25" ,
income %in% c (2 ,3 ) ~ "25to49" ,
TRUE ~ "50andover"
)
)
allhh$ inclvl <- factor (allhh$ inclvl, levels= c ("25to49" , "lessthan25" , "50andover" ))
tabyl (allhh$ inclvl)
allhh$inclvl n percent
25to49 126312 0.1970134
lessthan25 70996 0.1107350
50andover 443826 0.6922515
Clean age
Creating a new variable ‘age’ by subtracting birth year from the year the data were collected. Data were collected in 2021 and 2022, but I am using 2021.
Code
allhh$ age <- 2021 - allhh$ tbirth_year
tabyl (allhh$ age)
allhh$age n percent
17 152 0.0002370799
18 806 0.0012571475
19 1099 0.0017141502
20 1485 0.0023162085
21 1976 0.0030820390
22 2599 0.0040537548
23 3052 0.0047603153
24 4024 0.0062763790
25 4880 0.0076115133
26 5340 0.0083289921
27 6003 0.0093630973
28 6825 0.0106452005
29 7363 0.0114843387
30 8082 0.0126057891
31 8672 0.0135260336
32 8968 0.0139877155
33 9390 0.0146459243
34 9649 0.0150498960
35 10195 0.0159015120
36 10928 0.0170447987
37 11205 0.0174768457
38 11601 0.0180945013
39 12249 0.0191052105
40 12368 0.0192908191
41 12697 0.0198039723
42 12061 0.0188119800
43 11733 0.0183003865
44 11817 0.0184314044
45 11400 0.0177809943
46 11226 0.0175096002
47 11213 0.0174893236
48 10925 0.0170401195
49 11286 0.0176031844
50 11988 0.0186981193
51 12886 0.0200987625
52 12238 0.0190880534
53 11532 0.0179868795
54 11332 0.0176749322
55 11871 0.0185156301
56 12094 0.0188634513
57 12964 0.0202204219
58 13232 0.0206384313
59 13488 0.0210377238
60 13769 0.0214760097
61 14241 0.0222122052
62 14064 0.0219361319
63 14259 0.0222402805
64 14732 0.0229780358
65 14965 0.0233414544
66 15296 0.0238577271
67 15165 0.0236534016
68 14367 0.0224087320
69 14071 0.0219470501
70 12942 0.0201861077
71 12052 0.0187979424
72 11427 0.0178231072
73 10667 0.0166377076
74 10800 0.0168451525
75 8846 0.0137974277
76 6737 0.0105079437
77 5892 0.0091899665
78 5705 0.0088982958
79 4884 0.0076177523
80 3826 0.0059675512
81 3108 0.0048476606
82 2584 0.0040303587
83 2161 0.0033705902
84 1630 0.0025423702
85 1378 0.0021493167
86 1079 0.0016829555
87 1864 0.0029073485
88 1729 0.0026967841
Clean genid_describe
genid_describe is defined as current gender identity.
Male
Female
Transgender
None of these -99) Question seen but category not selected -88) Missing / Did not report
Checking the distribution of gender identity.
Code
tabyl (allhh$ genid_describe)
allhh$genid_describe n percent
-99 3868 0.006033060
1 258539 0.403252674
2 370652 0.578119395
3 1979 0.003086718
4 6096 0.009508153
Creating dummy variables for gender.
Code
allhh$ male <- car:: recode (allhh$ genid_describe, "1=1; else=0" ) #gen1 = male
tabyl (allhh$ male)
allhh$male n percent
0 382595 0.5967473
1 258539 0.4032527
Code
allhh$ female <- car:: recode (allhh$ genid_describe, "2=1; else=0" ) #gen2 = female
tabyl (allhh$ female)
allhh$female n percent
0 270482 0.4218806
1 370652 0.5781194
Code
allhh$ transgender <- car:: recode (allhh$ genid_describe, "3=1; else=0" ) #gen3 = transgender
tabyl (allhh$ transgender)
allhh$transgender n percent
0 639155 0.996913282
1 1979 0.003086718
Code
allhh$ gen_none <- car:: recode (allhh$ genid_describe, "4=1; else=0" ) #gen4 = None of these
tabyl (allhh$ gen_none)
allhh$gen_none n percent
0 635038 0.990491847
1 6096 0.009508153
Code
allhh$ gen_notsel <- car:: recode (allhh$ genid_describe, "-99=1; else=0" ) #gen5 = not selected
tabyl (allhh$ gen_notsel)
allhh$gen_notsel n percent
0 637266 0.99396694
1 3868 0.00603306
Code
Clean race_eth
race_eth: Categorical variable derived from ‘rhispanic’ and ‘rrace’.
rhispanic categories
No, not of Hispanic, Latino, or Spanish origin
Yes, of Hispanic, Latino, or Spanish origin
rrace categories
White, Alone
Black, Alone
Asian, Alone
Any other race alone, or race in combination
Combining race/ethnicity into a race_eth variable.
Code
allhh$rrace n percent
1 534307 0.83337805
2 46232 0.07210973
3 31961 0.04985073
4 28634 0.04466149
Code
allhh$rhispanic n percent
1 586791 0.91523925
2 54343 0.08476075
Code
allhh <- allhh %>%
mutate (race_eth = case_when (.$ rhispanic == 1 & .$ rrace == 1 ~ "nh_white" ,
.$ rhispanic == 1 & .$ rrace == 2 ~ "nh_black" ,
.$ rhispanic == 1 & .$ rrace == 3 ~ "nh_asian" ,
.$ rhispanic == 1 & .$ rrace == 4 ~ "other" ,
.$ rhispanic == 2 & .$ rrace %in% c (1 : 4 ) ~ "hispanic"
)
)
tabyl (allhh$ race_eth)
allhh$race_eth n percent
hispanic 54343 0.08476075
nh_asian 30413 0.04743626
nh_black 43677 0.06812460
nh_white 490036 0.76432696
other 22665 0.03535142
Creating dummy variables for race_eth.
Code
allhh$ hispanic <- car:: recode (allhh$ race_eth, "'hispanic'=1; else=0" )
tabyl (allhh$ hispanic)
allhh$hispanic n percent
0 586791 0.91523925
1 54343 0.08476075
Code
allhh$ nh_white <- car:: recode (allhh$ race_eth, "'nh_white'=1; else=0" )
tabyl (allhh$ nh_white)
allhh$nh_white n percent
0 151098 0.235673
1 490036 0.764327
Code
allhh$ nh_black <- car:: recode (allhh$ race_eth, "'nh_black'=1; else=0" )
tabyl (allhh$ nh_black)
allhh$nh_black n percent
0 597457 0.9318754
1 43677 0.0681246
Code
allhh$ nh_asian <- car:: recode (allhh$ race_eth, "'nh_asian'=1; else=0" )
tabyl (allhh$ nh_asian)
allhh$nh_asian n percent
0 610721 0.95256374
1 30413 0.04743626
Code
allhh$ other <- car:: recode (allhh$ race_eth, "'other'=1; else=0" )
tabyl (allhh$ other)
allhh$other n percent
0 618469 0.96464858
1 22665 0.03535142
Code
Relevel race_eth
Code
allhh$ race_eth <- factor (allhh$ race_eth, levels = c ("nh_white" , "nh_black" , "hispanic" , "nh_asian" , "other" ))
tabyl (allhh$ race_eth)
allhh$race_eth n percent
nh_white 490036 0.76432696
nh_black 43677 0.06812460
hispanic 54343 0.08476075
nh_asian 30413 0.04743626
other 22665 0.03535142
Clean single_adult
thhld_numadlt: Recode for the number of Adults in the household 1-40) number of people (whole number)
Creating dummy variable for single vs. multiple adults in household. Single adult is coded as “1”, multiple adults is coded as “0”.
Code
tabyl (allhh$ thhld_numadlt)
allhh$thhld_numadlt n percent
1 154302 0.2406704371
2 357826 0.5581142164
3 83572 0.1303502856
4 32097 0.0500628574
5 9346 0.0145772959
6 2332 0.0036373051
7 687 0.0010715389
8 286 0.0004460846
9 389 0.0006067374
10 297 0.0004632417
Code
allhh$ single_adult <- recode (allhh$ thhld_numadlt, "1=1; else=0" )
Warning: Unreplaced values treated as NA as `.x` is not compatible.
Please specify replacements exhaustively or supply `.default`.
Code
tabyl (allhh$ single_adult)
allhh$single_adult n percent valid_percent
1=1; else=0 154302 0.2406704 1
<NA> 486832 0.7593296 NA
Code
Clean thhld_numper
thhld_numper: Total number of people in household
Distribution of number of people in household. I’m leaving this as an integer.
Code
tabyl (allhh$ thhld_numper)
allhh$thhld_numper n percent
1 125064 0.195066866
2 257737 0.402001766
3 105072 0.163884617
4 88549 0.138113093
5 38548 0.060124717
6 15586 0.024310051
7 5380 0.008391382
8 2316 0.003612349
9 922 0.001438077
10 1960 0.003057083
Code
Clean child
thhld_numkid: Total number of people under 18-years-old in household.
Check distribution of ‘thhld_numkid’.
Code
tabyl (allhh$ thhld_numkid)
allhh$thhld_numkid n percent
0 444838 0.693829995
1 89460 0.139534013
2 69547 0.108474983
3 24980 0.038962214
4 8095 0.012626066
5 4214 0.006572729
Create dummy variables for number of children.
Code
allhh$ child0 <- car:: recode (allhh$ thhld_numkid, "0=1; else=0" )
tabyl (allhh$ child0)
allhh$child0 n percent
0 196296 0.30617
1 444838 0.69383
Code
allhh$ child1 <- car:: recode (allhh$ thhld_numkid, "1=1; else=0" )
tabyl (allhh$ child1)
allhh$child1 n percent
0 551674 0.860466
1 89460 0.139534
Code
allhh$ child2 <- car:: recode (allhh$ thhld_numkid, "2=1; else=0" )
tabyl (allhh$ child2)
allhh$child2 n percent
0 571587 0.891525
1 69547 0.108475
Code
allhh$ child3 <- car:: recode (allhh$ thhld_numkid, "3=1; else=0" )
tabyl (allhh$ child3)
allhh$child3 n percent
0 616154 0.96103779
1 24980 0.03896221
Code
allhh$ child4 <- car:: recode (allhh$ thhld_numkid, "4=1; else=0" )
tabyl (allhh$ child4)
allhh$child4 n percent
0 633039 0.98737393
1 8095 0.01262607
Code
allhh$ child5 <- car:: recode (allhh$ thhld_numkid, "5=1; else=0" )
tabyl (allhh$ child5)
allhh$child5 n percent
0 636920 0.993427271
1 4214 0.006572729
Code
Dummy variable for children vs. no children in household.
Code
allhh$ children <- car:: recode (allhh$ thhld_numkid, "0=0; else=1" )
tabyl (allhh$ children)
allhh$children n percent
0 444838 0.69383
1 196296 0.30617
Clean eeduc
eeduc 1) Less than high school 2) Some high school 3) High school graduate or equivalent (for example GED) 4) Some college, but degree not received or is in progress 5) Associate’s degree (for example AA, AS) 6) Bachelor’s degree (for example BA, BS, AB) 7) Graduate degree (for example master’s, professional, doctorate)
For educational attainment, create ‘education’ variable with values “less than high school”, “high school”, “some college”, “bachelor’s or higher”.
Code
allhh$eeduc n percent
1 3311 0.005164287
2 7164 0.011173951
3 67551 0.105361750
4 130047 0.202839032
5 66213 0.103274822
6 190114 0.296527715
7 176734 0.275658443
Code
Make a new categorical variable for education.
Code
allhh <- allhh %>%
mutate (education = case_when (eeduc %in% c (1 ,2 ) ~ '1lessthanhs' ,
eeduc == 3 ~ '2highschool' ,
eeduc %in% c (4 ,5 ) ~ '3somecollege' ,
eeduc %in% c (6 ,7 ) ~ '4BAorhigher'
)
)
tabyl (allhh$ education)
allhh$education n percent
1lessthanhs 10475 0.01633824
2highschool 67551 0.10536175
3somecollege 196260 0.30611385
4BAorhigher 366848 0.57218616
Code
Creating dummy variables for educational attainment.
Code
allhh$ hsorless <- car:: recode (allhh$ eeduc, "1=1; 2=1; 3=1; else=0" )
tabyl (allhh$ hsorless)
allhh$hsorless n percent
0 563108 0.8783
1 78026 0.1217
Code
allhh$ somecollege <- car:: recode (allhh$ eeduc, "4=1; 5=1; else=0" )
tabyl (allhh$ somecollege)
allhh$somecollege n percent
0 444874 0.6938861
1 196260 0.3061139
Code
allhh$ baorhigher <- car:: recode (allhh$ eeduc, "6=1; 7=1; else=0" )
tabyl (allhh$ baorhigher)
allhh$baorhigher n percent
0 274286 0.4278138
1 366848 0.5721862
Code
Recode education with 3 levels
Code
allhh$eeduc n percent
1 3311 0.005164287
2 7164 0.011173951
3 67551 0.105361750
4 130047 0.202839032
5 66213 0.103274822
6 190114 0.296527715
7 176734 0.275658443
Code
allhh <- allhh %>%
mutate (educ = case_when (eeduc %in% c (1 : 3 ) ~ "HSorless" ,
eeduc == 4 ~ "somecollege" ,
TRUE ~ "BAorhigher"
)
)
allhh$ educ <- factor (allhh$ educ, levels= c ("somecollege" , "HSorless" , "BAorhigher" ))
tabyl (allhh$ educ)
allhh$educ n percent
somecollege 130047 0.202839
HSorless 78026 0.121700
BAorhigher 433061 0.675461
Clean wrklossrv
wrklossrv: Recent household job loss. 1) Yes 2) No -99) Question seen but category not selected -88) Missing / Did not report
Code
allhh$wrklossrv n percent
-99 804 0.001254028
1 69013 0.107642084
2 571317 0.891103888
Remove 99s
Code
allhh <- allhh %>%
filter (wrklossrv %in% c (1 : 2 ))
tabyl (allhh$ wrklossrv)
allhh$wrklossrv n percent
1 69013 0.1077772
2 571317 0.8922228
Code
Create dummy variable ‘workloss’ so 1 is Yes and 0 is No.
Code
allhh <- allhh %>%
mutate (workloss = (case_when (wrklossrv == 1 ~ 1 ,
TRUE ~ 0 )
)
)
tabyl (allhh$ workloss)
allhh$workloss n percent
0 571317 0.8922228
1 69013 0.1077772
Code
Clean marital status
ms: Marital status 1) Now married 2) Widowed 3) Divorced 4) Separated 5) Never married -99) Question seen but category not selected -88) Missing / Did not report
Code
allhh$ms n percent
-99 2455 0.003833961
1 367908 0.574559993
2 36916 0.057651523
3 101558 0.158602596
4 10846 0.016938141
5 120647 0.188413787
Remove 99s
Code
allhh <- allhh %>%
filter (ms %in% c (1 : 5 ))
tabyl (allhh$ ms)
allhh$ms n percent
1 367908 0.57677131
2 36916 0.05787341
3 101558 0.15921301
4 10846 0.01700333
5 120647 0.18913894
Code
How should marital status be coded? Dummy variable of married/unmarried? or Dichotomized classes?
Recode married
Create a dummy marital status variable ‘married’.
Code
allhh <- allhh %>%
mutate (married = (case_when (ms == 1 ~ 1 ,
TRUE ~ 0 )
)
)
tabyl (allhh$ married)
allhh$married n percent
0 269967 0.4232287
1 367908 0.5767713
Code
Create a dummy marital status variable ‘unmarried’.
Code
allhh <- allhh %>%
mutate (unmarried = (case_when (ms != 1 ~ 1 ,
TRUE ~ 0 )
)
)
tabyl (allhh$ unmarried)
allhh$unmarried n percent
0 367908 0.5767713
1 269967 0.4232287
Code
Recode rentcur
rentcur: Caught up on rent? 1) Yes 2) No -99) Question seen but category not selected -88) Missing / Did not report
Code
allhh$rentcur n percent
-88 487485 0.76423280
1 133730 0.20964923
2 16660 0.02611797
Creating new dummy variable ‘laterent’, 1 is yes, 0 is no.
Code
allhh <- allhh %>%
mutate (laterent = (case_when (rentcur == 2 ~ 1 ,
rentcur == 1 ~ 0 ,
TRUE ~ NA )
)
)
tabyl (allhh$ laterent)
allhh$laterent n percent valid_percent
0 133730 0.20964923 0.8892214
1 16660 0.02611797 0.1107786
NA 487485 0.76423280 NA
Code
Filter and write csv files for allhh, ownerhh, renterhh, currenthh, latehh
Write out csv for allhh
Code
# Export to csv files
fwrite (allhh, "./data/allhh.csv" )
Filter for only owners
Code
ownerhh <- allhh %>%
filter (owner == 1 )
nrow (ownerhh)
Write out csv for ownerhh
Code
# Export to csv files
fwrite (ownerhh, "./data/ownerhh.csv" )
Filter for only renters
Code
renterhh <- allhh %>%
filter (renting == 1 )
nrow (renterhh)
Write out csv for renterhh
Code
# Export to csv files
fwrite (renterhh, "./data/renterhh.csv" )
Filter for only current renters
Code
renterhh$laterent n percent
0 133730 0.8892214
1 16660 0.1107786
Code
currenthh <- renterhh %>%
filter (laterent == 0 )
nrow (currenthh)
Write out csv for currenthh
Code
# Export to csv files
fwrite (currenthh, "./data/currenthh.csv" )
Filter for only late renters
Code
latehh <- renterhh %>%
filter (laterent == 1 )
nrow (latehh)
Write out csv for latehh.
Code
# Export to csv files
fwrite (latehh, "./data/latehh.csv" )
Descriptive tables, no weights
PAA: change these variables to match models
Questions: Weights or no weights? Household or person? HPS or matching?
Descriptive statistics for all households
Code
# Summary statistics of all households
table_allhh <- allhh %>%
tbl_summary (include = c (age, female, nh_black, hispanic, unmarried, hsorless, less75k, children),
statistic = list (all_categorical () ~ "{p}%" ),
label = list (
age ~ "Age" ,
female ~ "Female" ,
nh_black ~ "Non-Hispanic Black" ,
hispanic ~ "Hispanic" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or less" ,
less75k ~ "Income < $75,000" ,
children ~ "Households with Children"
)
) %>%
bold_labels () %>%
modify_caption ("**Descriptive Statistics**" ) # %>%
# as_gt() # %>% # convert to gt table
# gtsave(filename = "all_households.png") # save table as image
table_allhh
Descriptive Statistics
Characteristic
N = 637,875
Age
54 (41, 66)
Female
58%
Non-Hispanic Black
6.8%
Hispanic
8.5%
Unmarried
42%
Highschool or less
12%
Income < $75,000
48%
Households with Children
31%
Descriptive statistics for owner households
Code
# Summary statistics of owner households
table_ownerhh <- ownerhh %>%
tbl_summary (include = c (age, female, nh_black, hispanic, unmarried, hsorless, less75k, children),
statistic = list (all_categorical () ~ "{p}%" ),
label = list (
age ~ "Age" ,
female ~ "Female" ,
nh_black ~ "Non-Hispanic Black" ,
hispanic ~ "Hispanic" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or less" ,
less75k ~ "Income < $75,000" ,
children ~ "Households with Children"
)
) %>%
bold_labels () %>%
modify_caption ("**Descriptive Statistics**" ) # %>%
# as_gt() # %>% # convert to gt table
# gtsave(filename = "owner_households.png") # save table as image
table_ownerhh
Descriptive Statistics
Characteristic
N = 479,624
Age
57 (44, 67)
Female
57%
Non-Hispanic Black
5.1%
Hispanic
7.1%
Unmarried
34%
Highschool or less
10%
Income < $75,000
39%
Households with Children
31%
Descriptive statistics for all renters
Code
# Summary statistics of all renters
table_renters <- renterhh %>%
tbl_summary (include = c (age, female, nh_black, hispanic, unmarried, hsorless, less75k, children),
statistic = list (all_categorical () ~ "{p}%" ),
label = list (
age ~ "Age" ,
female ~ "Female" ,
nh_black ~ "Non-Hispanic Black" ,
hispanic ~ "Hispanic" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or less" ,
less75k ~ "Income < $75,000" ,
children ~ "Households with Children"
)
)%>%
bold_labels () #%>%
#modify_caption("**Renters**") # %>%
# as_gt() # %>% # convert to gt table
# gt::gtsave(filename = "renters.png") # save table as image
table_renters
Characteristic
N = 150,390
Age
44 (33, 58)
Female
61%
Non-Hispanic Black
12%
Hispanic
13%
Unmarried
69%
Highschool or less
18%
Income < $75,000
73%
Households with Children
29%
Descriptive statistics for current renters
Code
# Summary statistics of all current renters
table_currenthh <- currenthh %>%
tbl_summary (include = c (age, female, nh_black, hispanic, unmarried, hsorless, less75k, children),
statistic = list (all_categorical () ~ "{p}%" ),
label = list (
age ~ "Age" ,
female ~ "Female" ,
nh_black ~ "Non-Hispanic Black" ,
hispanic ~ "Hispanic" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or less" ,
less75k ~ "Income < $75,000" ,
children ~ "Households with Children"
)
)%>%
bold_labels () # %>%
# modify_caption("**Current Renters**") # %>%
# as_gt() #%>% # convert to gt table
#gt::gtsave(filename = "currentrenters.png") # save table as image
table_currenthh
Characteristic
N = 133,730
Age
44 (32, 59)
Female
60%
Non-Hispanic Black
10%
Hispanic
12%
Unmarried
69%
Highschool or less
16%
Income < $75,000
71%
Households with Children
27%
Descriptive statistics for late renters
Code
# Summary statistics of all late renters
table_latehh <- latehh %>%
tbl_summary (include = c (age, female, nh_black, hispanic, unmarried, hsorless, less75k, children),
statistic = list (all_categorical () ~ "{p}%" ),
label = list (
age ~ "Age" ,
female ~ "Female" ,
nh_black ~ "Non-Hispanic Black" ,
hispanic ~ "Hispanic" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or less" ,
less75k ~ "Income < $75,000" ,
children ~ "Households with Children"
)
)%>%
bold_labels () # %>%
# modify_caption("**Late Renters**") # %>%
# as_gt() #%>% # convert to gt table
#gt::gtsave(filename = "laterenters.png") # save table as image
table_latehh
Characteristic
N = 16,660
Age
44 (35, 54)
Female
66%
Non-Hispanic Black
23%
Hispanic
18%
Unmarried
70%
Highschool or less
28%
Income < $75,000
90%
Households with Children
47%
Code
[1] "tbl_summary" "gtsummary"
Code
table_compare <- tbl_merge (tbls = list (table_ownerhh, table_currenthh, table_latehh),
tab_spanner = c ("**Owner HH**" , "**Current HH**" , "**Late HH**" )
) %>%
as_gt ()
table_compare
Descriptive Statistics
Characteristic
Owner HH
Current HH
Late HH
N = 479,624
N = 133,730
N = 16,660
Age
57 (44, 67)
44 (32, 59)
44 (35, 54)
Female
57%
60%
66%
Non-Hispanic Black
5.1%
10%
23%
Hispanic
7.1%
12%
18%
Unmarried
34%
69%
70%
Highschool or less
10%
16%
28%
Income < $75,000
39%
71%
90%
Households with Children
31%
27%
47%
Code
gtsave (table_compare, filename = "./figures/table_compare.png" )
file:////var/folders/n0/x0mfrjpd1p75y8wj5zn84ztc0000gn/T//RtmpPUVwLF/file30dea90c554.html screenshot completed
Test for difference between Current and Late Renters
Code
renterhh <- renterhh %>%
mutate (late_label = case_when (laterent == 1 ~ "Late" ,
TRUE ~ "Current" ))
tabyl (renterhh$ late_label)
renterhh$late_label n percent
Current 133730 0.8892214
Late 16660 0.1107786
Code
#PAA: Change nh_white to nh_black. Change Bachelors to HS or less. Tinker with other things for PAA.
# Summary statistics of all renters by rentcur
renterhh %>%
tbl_summary (include = c (age, female, nh_black, hispanic, unmarried, hsorless, less75k, children),
by = late_label,
#by = laterent,
label = list (
age ~ "Age" ,
female ~ "Female" ,
nh_black ~ "Non-Hispanic Black" ,
hispanic ~ "Hispanic" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or less" ,
less75k ~ "Income < $75,000" ,
children ~ "Children in Household"
)
)%>%
bold_labels () %>%
modify_caption ("**Current vs. Late Renters**" ) %>%
add_p () %>% # test for a difference between groups
as_gt () # %>% # convert to gt table
Current vs. Late Renters
Characteristic
Current
N = 133,730
Late
N = 16,660
p-value
Age
44 (32, 59)
44 (35, 54)
0.012
Female
80,590 (60%)
11,008 (66%)
<0.001
Non-Hispanic Black
14,005 (10%)
3,854 (23%)
<0.001
Hispanic
16,285 (12%)
2,960 (18%)
<0.001
Unmarried
92,522 (69%)
11,686 (70%)
0.011
Highschool or less
21,811 (16%)
4,659 (28%)
<0.001
Income < $75,000
94,362 (71%)
15,051 (90%)
<0.001
Children in Household
36,136 (27%)
7,865 (47%)
<0.001
Code
#gt::gtsave(filename = "./figures/renters_by_laterent.png") # save table as image
Descriptive tables with weights
Descriptive statistics for owner households
Code
#allhh$pweight12 <- (allhh$pweight/12) # Dividing weights by number of weeks of data
svy_ownerhhw <- survey:: svydesign (
id = ~ 0 ,
data = allhh,
weights = ~ hweight,
)
table_ownerhhw <- svy_ownerhhw %>%
tbl_svysummary (include = c (score, age, female, nh_white, hispanic, nh_black, nh_asian, unmarried, hsorless, somecollege, baorhigher, children),
statistic = list (all_categorical () ~ "{p}%" ),
label = list (
score ~ "State Score" ,
age ~ "Age" ,
female ~ "Female" ,
nh_white ~ "Non-Hispanic White" ,
hispanic ~ "Hispanic" ,
nh_black ~ "Non-Hispanic Black" ,
nh_asian ~ "Non-Hispanic Asian" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or Less" ,
somecollege ~ "Some College" ,
baorhigher ~ "Bachelor's or Higher" ,
children ~ "Households with Children"
)
) %>%
bold_labels () %>%
modify_caption ("**Descriptive Statistics**" ) # %>%
# as_gt() # %>% # convert to gt table
# gtsave(filename = "all_households.png") # save table as image
table_ownerhhw
Descriptive Statistics
Characteristic
N = 1,090,516,562
State Score
1.58 (0.83, 3.13)
Age
50 (36, 65)
Female
51%
Non-Hispanic White
68%
Hispanic
13%
Non-Hispanic Black
11%
Non-Hispanic Asian
4.6%
Unmarried
48%
Highschool or Less
35%
Some College
30%
Bachelor's or Higher
35%
Households with Children
33%
Descriptive statistics for current renters
Code
#currenthh$pweight12 <- (renterhh$pweight/12)
svy_currenthh <- survey:: svydesign (
id = ~ 0 ,
weights = ~ hweight,
data = currenthh
)
table_currenthhw <- svy_currenthh %>%
tbl_svysummary (include = c (score, age, female, nh_white, hispanic, nh_black, nh_asian, unmarried, hsorless, somecollege, baorhigher, children),
statistic = list (all_categorical () ~ "{p}%" ),
label = list (
score ~ "State Score" ,
age ~ "Age" ,
female ~ "Female" ,
nh_white ~ "Non-Hispanic White" ,
hispanic ~ "Hispanic" ,
nh_black ~ "Non-Hispanic Black" ,
nh_asian ~ "Non-Hispanic Asian" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or Less" ,
somecollege ~ "Some College" ,
baorhigher ~ "Bachelor's or Higher" ,
children ~ "Households with Children"
)
) %>%
bold_labels () #%>%
#modify_caption("**Renters**") # %>%
# as_gt() # %>% # convert to gt table
# gt::gtsave(filename = "renters.png") # save table as image
table_currenthhw
Characteristic
N = 286,666,653
State Score
1.43 (1.15, 3.13)
Age
39 (30, 56)
Female
53%
Non-Hispanic White
58%
Hispanic
18%
Non-Hispanic Black
15%
Non-Hispanic Asian
4.3%
Unmarried
70%
Highschool or Less
39%
Some College
31%
Bachelor's or Higher
30%
Households with Children
30%
Descriptive statistics for late renters
Code
#latehh$hweight12 <- (latehh$hweight/12)
svy_latehhw <- survey:: svydesign (
id = ~ 0 ,
weights = ~ hweight,
data = latehh
)
table_latehhw <- svy_latehhw %>%
tbl_svysummary (include = c (score, age, female, nh_white, hispanic, nh_black, nh_asian, unmarried, hsorless, somecollege, baorhigher, children),
statistic = list (all_categorical () ~ "{p}%" ),
label = list (
score ~ "State Score" ,
age ~ "Age" ,
female ~ "Female" ,
nh_white ~ "Non-Hispanic White" ,
hispanic ~ "Hispanic" ,
nh_black ~ "Non-Hispanic Black" ,
nh_asian ~ "Non-Hispanic Asian" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or Less" ,
somecollege ~ "Some College" ,
baorhigher ~ "Bachelor's or Higher" ,
children ~ "Households with Children"
)
) %>%
bold_labels () # %>%
# modify_caption("**Late Renters**") # %>%
# as_gt() #%>% # convert to gt table
#gt::gtsave(filename = "laterenters.png") # save table as image
table_latehhw
Characteristic
N = 47,259,273
State Score
1.58 (1.28, 3.13)
Age
41 (32, 52)
Female
57%
Non-Hispanic White
36%
Hispanic
25%
Non-Hispanic Black
29%
Non-Hispanic Asian
4.8%
Unmarried
72%
Highschool or Less
55%
Some College
32%
Bachelor's or Higher
13%
Households with Children
50%
Merging descriptive statistics
Code
# class(table_allhh)
table_comparew <- tbl_merge (tbls = list (table_ownerhhw, table_currenthhw, table_latehhw),
tab_spanner = c ("**Owner HH**" , "**Current HH**" , "**Late HH**" )
) %>%
as_gt ()
table_comparew
Descriptive Statistics
Characteristic
Owner HH
Current HH
Late HH
N = 1,090,516,562
N = 286,666,653
N = 47,259,273
State Score
1.58 (0.83, 3.13)
1.43 (1.15, 3.13)
1.58 (1.28, 3.13)
Age
50 (36, 65)
39 (30, 56)
41 (32, 52)
Female
51%
53%
57%
Non-Hispanic White
68%
58%
36%
Hispanic
13%
18%
25%
Non-Hispanic Black
11%
15%
29%
Non-Hispanic Asian
4.6%
4.3%
4.8%
Unmarried
48%
70%
72%
Highschool or Less
35%
39%
55%
Some College
30%
31%
32%
Bachelor's or Higher
35%
30%
13%
Households with Children
33%
30%
50%
Code
gtsave (table_comparew, filename = "table_comparew.png" )
file:////var/folders/n0/x0mfrjpd1p75y8wj5zn84ztc0000gn/T//RtmpPUVwLF/file30de347c40f7.html screenshot completed
Test for difference between Current and Late renters with weights
Code
#PAA: Change nh_white to nh_black. Change Bachelors to HS or less. Tinker with other things for PAA.
# Summary statistics of all renters by rentcur
svy_renterhhw <- survey:: svydesign (
id = ~ 0 ,
weights = ~ hweight,
data = renterhh
)
tbl_renterhhw <- svy_renterhhw %>%
tbl_svysummary (include = c (age, female, nh_black, hispanic, unmarried, hsorless, less75k, children),
by = late_label,
#by = laterent,
label = list (
age ~ "Age" ,
female ~ "Female" ,
nh_black ~ "Non-Hispanic Black" ,
hispanic ~ "Hispanic" ,
unmarried ~ "Unmarried" ,
hsorless ~ "Highschool or less" ,
less75k ~ "Income < $75,000" ,
children ~ "Children in Household"
)
)%>%
bold_labels () %>%
modify_caption ("**Current vs. Late Renters**" ) %>%
add_p () %>% # test for a difference between groups
as_gt () # %>% # convert to gt table
#gt::gtsave(filename = "./figures/renters_by_laterent.png") # save table as image
tbl_renterhhw
Current vs. Late Renters
Characteristic
Current
N = 286,666,653
Late
N = 47,259,273
p-value
Age
39 (30, 56)
41 (32, 52)
0.090
Female
150,638,894 (53%)
27,032,320 (57%)
<0.001
Non-Hispanic Black
43,767,431 (15%)
13,873,755 (29%)
<0.001
Hispanic
52,990,154 (18%)
11,641,697 (25%)
<0.001
Unmarried
200,445,010 (70%)
33,824,257 (72%)
0.017
Highschool or less
111,272,101 (39%)
25,821,549 (55%)
<0.001
Income < $75,000
221,290,632 (77%)
44,135,243 (93%)
<0.001
Children in Household
86,618,470 (30%)
23,766,491 (50%)
<0.001
U.S. map of state scores
Code
# Get centroids
centroid_labels <- usmapdata:: centroid_labels ("states" )
# Create the dataframe with the state scores
statedat <- read.csv ("./data/state_policy.csv" )
statescore <- statedat %>%
dplyr:: select (abbv, score) %>%
rename (abbr = abbv)
# Join data to centroids
data_labels <- merge (centroid_labels, statescore, by = "abbr" )
map1dat <- data_labels %>%
dplyr:: select (fips, score)
Code
scoremap <- plot_usmap (data = map1dat, values = "score" ) +
labs (title= "Housing Policy Score By State" ,
caption = "Source: COVID 19 Housing Policy Score" ) +
theme (panel.background = element_rect (colour = "black" )) +
scale_fill_continuous (name = "State Housing Policy Score" ,
low = "white" ,
high = "darkgreen"
) +
theme (legend.position = "right" ) +
geom_sf_text (data = data_labels, ggplot2:: aes (label = scales:: number (score, scale = 1 , accuracy = .1 )), color = "black" ) # I'm having problems with this geom_text function. I am not sure geom_text is a good function? Fix for PAA or use old map.
scoremap
Code
ggsave ("./figures/scoremap.png" ) # saves last plot made