##Phase 1: April 23, 2020 - July 21, 2020
##Phase 2: August 19, 2020 - October 26, 2020
##Phase 3: October 28, 2020 - March 29, 2021
##Phase 3.1: April 14, 2021 - July 5, 2021
##Phase 3.2: July 21, 2021 – October 11, 2021
##Phase 3.3: December 1, 2021 – February 7, 2022
##Phase 3.4: March 2, 2022 – May 9, 2022
##Phase 3.5: June 1, 2022 – August 8, 2022
##Phase 3.6: September 14, 2022 – November 14, 2022
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(testit)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(stringr)
library(httr)
library(here)
## here() starts at C:/Users/Mahmuda Sultana/Desktop/COMPLETE/Fall'21/Dem5213/pop data
library(ggplot2)
library(dplyr)
In this part, we are going to perform 3 tasks. First, we we will read the CSV file. Then we will read the weighted file. Finally, we will merge these two file
url <- "https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk38/HPS_Week38_PUF_CSV.zip"
download.file(url, "data38.zip")
unzip("data1.zip")
## Warning in unzip("data1.zip"): error 1 in extracting from zip file
unzip("data2.zip")
## Warning in unzip("data2.zip"): error 1 in extracting from zip file
unzip("data3.zip")
## Warning in unzip("data3.zip"): error 1 in extracting from zip file
unzip("data4.zip")
## Warning in unzip("data4.zip"): error 1 in extracting from zip file
unzip("data5.zip")
## Warning in unzip("data5.zip"): error 1 in extracting from zip file
unzip("data6.zip")
## Warning in unzip("data6.zip"): error 1 in extracting from zip file
unzip("data7.zip")
## Warning in unzip("data7.zip"): error 1 in extracting from zip file
unzip("data8.zip")
## Warning in unzip("data8.zip"): error 1 in extracting from zip file
unzip("data9.zip")
## Warning in unzip("data9.zip"): error 1 in extracting from zip file
unzip("data10.zip")
## Warning in unzip("data10.zip"): error 1 in extracting from zip file
unzip("data11.zip")
## Warning in unzip("data11.zip"): error 1 in extracting from zip file
unzip("data12.zip")
## Warning in unzip("data12.zip"): error 1 in extracting from zip file
unzip("data13.zip")
## Warning in unzip("data13.zip"): error 1 in extracting from zip file
unzip("data14.zip")
## Warning in unzip("data14.zip"): error 1 in extracting from zip file
unzip("data15.zip")
## Warning in unzip("data15.zip"): error 1 in extracting from zip file
unzip("data16.zip")
## Warning in unzip("data16.zip"): error 1 in extracting from zip file
unzip("data17.zip")
## Warning in unzip("data17.zip"): error 1 in extracting from zip file
unzip("data18.zip")
## Warning in unzip("data18.zip"): error 1 in extracting from zip file
unzip("data19.zip")
## Warning in unzip("data19.zip"): error 1 in extracting from zip file
unzip("data20.zip")
## Warning in unzip("data20.zip"): error 1 in extracting from zip file
unzip("data21.zip")
## Warning in unzip("data21.zip"): error 1 in extracting from zip file
unzip("data22.zip")
## Warning in unzip("data22.zip"): error 1 in extracting from zip file
unzip("data23.zip")
## Warning in unzip("data23.zip"): error 1 in extracting from zip file
unzip("data24.zip")
## Warning in unzip("data24.zip"): error 1 in extracting from zip file
unzip("data25.zip")
## Warning in unzip("data25.zip"): error 1 in extracting from zip file
unzip("data26.zip")
## Warning in unzip("data26.zip"): error 1 in extracting from zip file
unzip("data27.zip")
## Warning in unzip("data27.zip"): error 1 in extracting from zip file
unzip("data28.zip")
## Warning in unzip("data28.zip"): error 1 in extracting from zip file
unzip("data29.zip")
## Warning in unzip("data29.zip"): error 1 in extracting from zip file
unzip("data30.zip")
## Warning in unzip("data30.zip"): error 1 in extracting from zip file
unzip("data31.zip")
## Warning in unzip("data31.zip"): error 1 in extracting from zip file
unzip("data32.zip")
## Warning in unzip("data32.zip"): error 1 in extracting from zip file
unzip("data33.zip")
## Warning in unzip("data33.zip"): error 1 in extracting from zip file
unzip("data34.zip")
unzip("data35.zip")
## Warning in unzip("data35.zip"): error 1 in extracting from zip file
unzip("data36.zip")
## Warning in unzip("data36.zip"): error 1 in extracting from zip file
unzip("data37.zip")
## Warning in unzip("data37.zip"): error 1 in extracting from zip file
unzip("data38.zip")
unzip("data39.zip")
## Warning in unzip("data39.zip"): error 1 in extracting from zip file
unzip("data40.zip")
## Warning in unzip("data40.zip"): error 1 in extracting from zip file
unzip("data38.zip")
unzip("data41.zip")
## Warning in unzip("data41.zip"): error 1 in extracting from zip file
unzip("data42.zip")
## Warning in unzip("data42.zip"): error 1 in extracting from zip file
unzip("data43.zip")
## Warning in unzip("data43.zip"): error 1 in extracting from zip file
unzip("data44.zip")
## Warning in unzip("data44.zip"): error 1 in extracting from zip file
unzip("data45.zip")
## Warning in unzip("data45.zip"): error 1 in extracting from zip file
unzip("data46.zip")
## Warning in unzip("data46.zip"): error 1 in extracting from zip file
unzip("data47.zip")
## Warning in unzip("data47.zip"): error 1 in extracting from zip file
unzip("data48.zip")
## Warning in unzip("data48.zip"): error 1 in extracting from zip file
unzip("data49.zip")
## Warning in unzip("data49.zip"): error 1 in extracting from zip file
unzip("data50.zip")
## Warning in unzip("data50.zip"): error 1 in extracting from zip file
unzip("data51.zip")
## Warning in unzip("data51.zip"): error 1 in extracting from zip file
unzip("data52.zip")
## Warning in unzip("data52.zip"): error 1 in extracting from zip file
unzip("data53.zip")
## Warning in unzip("data53.zip"): error 1 in extracting from zip file
We worked only with Week13 to Week 45
data13 <-read.csv("pulse2020_puf_13.csv")
data14 <-read.csv("pulse2020_puf_14.csv")
data15 <-read.csv("pulse2020_puf_15.csv")
data16 <-read.csv("pulse2020_puf_16.csv")
data17 <-read.csv("pulse2020_puf_17.csv")
data18 <-read.csv("pulse2020_puf_18.csv")
data19 <-read.csv("pulse2020_puf_19.csv")
data20 <-read.csv("pulse2020_puf_20.csv")
data21 <-read.csv("pulse2020_puf_21.csv")
data22 <-read.csv("pulse2021_puf_22.csv")
data23 <-read.csv("pulse2021_puf_23.csv")
data24 <-read.csv("pulse2021_puf_24.csv")
data25 <-read.csv("pulse2021_puf_25.csv")
data26 <-read.csv("pulse2021_puf_26.csv")
data27 <-read.csv("pulse2021_puf_27.csv")
data28 <-read.csv("pulse2021_puf_28.csv")
data29 <-read.csv("pulse2021_puf_29.csv")
data30 <-read.csv("pulse2021_puf_30.csv")
data31 <-read.csv("pulse2021_puf_31.csv")
data32 <-read.csv("pulse2021_puf_32.csv")
data33 <-read.csv("pulse2021_puf_33.csv")
data34 <-read.csv("pulse2021_puf_34.csv")
data35 <-read.csv("pulse2021_puf_35.csv")
data36 <-read.csv("pulse2021_puf_36.csv")
data37 <-read.csv("pulse2021_puf_37.csv")
data38 <-read.csv("pulse2021_puf_38.csv")
data39 <-read.csv("pulse2021_puf_39.csv")
data40 <-read.csv("pulse2021_puf_40.csv")
data41 <-read.csv("pulse2022_puf_41.csv")
data42 <-read.csv("pulse2022_puf_42.csv")
data43 <-read.csv("pulse2022_puf_43.csv")
data44 <-read.csv("pulse2022_puf_44.csv")
data45 <-read.csv("pulse2022_puf_45.csv")
w_data13 <- read.csv("pulse2020_repwgt_puf_13.csv")
w_data14 <- read.csv("pulse2020_repwgt_puf_14.csv")
w_data15 <- read.csv("pulse2020_repwgt_puf_15.csv")
w_data16 <- read.csv("pulse2020_repwgt_puf_16.csv")
w_data17 <- read.csv("pulse2020_repwgt_puf_17.csv")
w_data18 <- read.csv("pulse2020_repwgt_puf_18.csv")
w_data19 <- read.csv("pulse2020_repwgt_puf_19.csv")
w_data20 <- read.csv("pulse2020_repwgt_puf_20.csv")
w_data21 <- read.csv("pulse2020_repwgt_puf_21.csv")
w_data22 <- read.csv("pulse2021_repwgt_puf_22.csv")
w_data23 <- read.csv("pulse2021_repwgt_puf_23.csv")
w_data24 <- read.csv("pulse2021_repwgt_puf_24.csv")
w_data25 <- read.csv("pulse2021_repwgt_puf_25.csv")
w_data26 <- read.csv("pulse2021_repwgt_puf_26.csv")
w_data27 <- read.csv("pulse2021_repwgt_puf_27.csv")
w_data28 <- read.csv("pulse2021_repwgt_puf_28.csv")
w_data29 <- read.csv("pulse2021_repwgt_puf_29.csv")
w_data30 <- read.csv("pulse2021_repwgt_puf_30.csv")
w_data31 <- read.csv("pulse2021_repwgt_puf_31.csv")
w_data32 <- read.csv("pulse2021_repwgt_puf_32.csv")
w_data33 <- read.csv("pulse2021_repwgt_puf_33.csv")
w_data34 <- read.csv("pulse2021_repwgt_puf_34.csv")
w_data35 <- read.csv("pulse2021_repwgt_puf_35.csv")
w_data36 <- read.csv("pulse2021_repwgt_puf_36.csv")
w_data37 <- read.csv("pulse2021_repwgt_puf_37.csv")
w_data38 <- read.csv("pulse2021_repwgt_puf_38.csv")
w_data39 <- read.csv("pulse2021_repwgt_puf_39.csv")
w_data40 <- read.csv("pulse2021_repwgt_puf_40.csv")
w_data41 <- read.csv("pulse2022_repwgt_puf_41.csv")
w_data42 <- read.csv("pulse2022_repwgt_puf_42.csv")
w_data43 <- read.csv("pulse2022_repwgt_puf_43.csv")
w_data44 <- read.csv("pulse2022_repwgt_puf_44.csv")
w_data45 <- read.csv("pulse2022_repwgt_puf_45.csv")
–> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –>
data13<-merge(data13, w_data13, by = "SCRAM")
data14<-merge(data14, w_data14, by = "SCRAM")
data15<-merge(data15, w_data15, by = "SCRAM")
data16<-merge(data16, w_data16, by = "SCRAM")
data17<-merge(data17, w_data17, by = "SCRAM")
data18<-merge(data18, w_data18, by = "SCRAM")
data19<-merge(data19, w_data19, by = "SCRAM")
data20<-merge(data20, w_data20, by = "SCRAM")
data21<-merge(data21, w_data21, by = "SCRAM")
data22<-merge(data22, w_data22, by = "SCRAM")
data23<-merge(data23, w_data23, by = "SCRAM")
data24<-merge(data24, w_data24, by = "SCRAM")
data25<-merge(data25, w_data25, by = "SCRAM")
data26<-merge(data26, w_data26, by = "SCRAM")
data27<-merge(data27, w_data27, by = "SCRAM")
data28<-merge(data28, w_data28, by = "SCRAM")
data29<-merge(data29, w_data29, by = "SCRAM")
data30<-merge(data30, w_data30, by = "SCRAM")
data31<-merge(data31, w_data31, by = "SCRAM")
data32<-merge(data32, w_data32, by = "SCRAM")
data33<-merge(data33, w_data33, by = "SCRAM")
data34<-merge(data34, w_data34, by = "SCRAM")
data35<-merge(data35, w_data35, by = "SCRAM")
data36<-merge(data36, w_data36, by = "SCRAM")
data37<-merge(data37, w_data37, by = "SCRAM")
data38<-merge(data38, w_data38, by = "SCRAM")
data39<-merge(data39, w_data39, by = "SCRAM")
data40<-merge(data40, w_data40, by = "SCRAM")
data41<-merge(data41, w_data41, by = "SCRAM")
data42<-merge(data42, w_data42, by = "SCRAM")
data43<-merge(data43, w_data43, by = "SCRAM")
data44<-merge(data44, w_data44, by = "SCRAM")
data45<-merge(data45, w_data45, by = "SCRAM")
rm(w_data13, w_data14, w_data15, w_data16, w_data17, w_data18, w_data19)
rm(w_data20, w_data21, w_data22, w_data23, w_data24, w_data25, w_data26, w_data27, w_data28, w_data29, w_data30, w_data31, w_data32, w_data33, w_data34, w_data35, w_data36, w_data37, w_data38, w_data39, w_data40, w_data41, w_data42, w_data43, w_data44, w_data45)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3697037 197.5 11109673 593.4 21698578 1158.9
## Vcells 682445710 5206.7 1594937980 12168.5 1329000216 10139.5
WEEK 13 TO WEEK 45. Combine weeks with the same variables data6, data33, data45, data48, data51, data52 are single.
data13_21 <- rbind(data13, data14, data15, data16, data17, data18, data19, data20, data21)
data22_27<-rbind(data22, data23, data24, data25, data26, data27)
data28_33<-rbind(data28, data29, data30, data31, data32, data33)
data34_39 <-rbind(data34, data35, data36, data37, data38, data39)
data40_42 <-rbind(data40, data41, data42)
data43_45 <- rbind(data43, data44)
rm(data13, data14, data15, data16, data17, data18, data19, data20, data21)
rm(data22, data23, data24, data25, data26, data27, data28, data29, data30, data31, data32)
rm(data33, data34, data35, data36, data37, data39, data40, data41, data42,data43, data44, data45)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3626139 193.7 11109673 593.4 21698578 1158.9
## Vcells 713010626 5439.9 1594937980 12168.5 1594874459 12168.0
data13_21 <- data13_21%>%
select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK )
data22_27 <- data22_27 %>%
select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK )
data28_33 <- data28_33 %>%
select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK)
data34_39 <- data34_39 %>%
select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK)
data40_42 <- data40_42%>%
select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK )
data43_45 <- data43_45%>%
select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK)
data_all<-rbind(data13_21, data22_27, data28_33,data34_39, data40_42, data43_45)
#clean up
rm(data13_21, data22_27, data28_33,data34_39, data40_42, data43_45)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1293680 69.1 8887739 474.7 21698578 1158.9
## Vcells 54807998 418.2 1275950384 9734.8 1594874459 12168.0
saveRDS(data_all, file ="FINAL_allweeks.rds")
write.csv(data_all, 'final_pulse.csv')
data_all <- readRDS("C:/Users/Mahmuda Sultana/Desktop/COMPLETE/Fall'21/Dem5213/pop data/FINAL_allweeks.rds")
#data13_45 <- readRDS("C:/Users/Mahmuda Sultana/Desktop/COMPLETE/Fall'21/Dem5213/pop data/13_45.rds")
data_all$hisp<-ifelse(data_all$RHISPANIC==1, "Not_hispanic", "hispanic")
data_all$race <- car::Recode(data_all$RRACE, recodes = "1= 'White'; 2= 'Black'; 3 = 'Asian'; 4='Other'; else=NA")
data_all$race_eth<-ifelse(data_all$hisp==1, "hisp",data_all$race)
data_all$race_eth <- interaction(data_all$hisp, data_all$race)
data_all$race_ethnicity<- mutate(data_all, ifelse(hisp==0 & race==1, 1,
ifelse(hisp==0 & race==2, 2,
ifelse(hisp==1, 3,
ifelse(hisp==0 & race==3, 4,
ifelse(hisp==0 & race==4, 5, "NA"))))))
data_all$race_ethnicity<- ifelse(substr(data_all$race_eth, 1, 4)=="hisp", "hisp", as.character(data_all$race_eth))
MORTAGE TREND
#Input:
#1) Yes
#2) No
#-99) Question seen but category not selected
#-88) Missing / Did not report
#Output: What percentage of households are currently caught up on mortgage.
library(tidyverse)
data_all %>%
select(WEEK.x, MORTCUR) %>%
mutate( caught_up = case_when(MORTCUR< 0 ~ NA_real_,
MORTCUR %in% 2 ~ 0,
MORTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_Mort_caught_up = mean(caught_up, na.rm=TRUE)) %>%
ggplot(aes(x=WEEK.x, y=mean_Mort_caught_up*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
ylim(92, 96) +
labs(title = "Updated: %Household Currently Caught Up on Mortage From 19th August 2020 to 9th May 2022",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_GENERAL_MORTGAGE.png",height=8, width=10, dpi = "print" )
# Input:
# 1) Yes
# 2) No
# -99) Question seen but category not selected
# -88) Missing / Did not report
# Output: What percentage of households are currently caught up on mortgage.
library(ggplot2)
# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of households that are currently caught up on mortgage for each wave before the peak of the pandemic
mort_caught_up_before <- data_peak_before %>%
select(WEEK.x, MORTCUR) %>%
mutate(caught_up = case_when(MORTCUR < 0 ~ NA_real_,
MORTCUR %in% 2 ~ 0,
MORTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_Mort_caught_up = mean(caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
# Calculate the percentage of households that are currently caught up on mortgage for each wave after the peak of the pandemic
mort_caught_up_after <- data_peak_after %>%
select(WEEK.x, MORTCUR) %>%
mutate(caught_up = case_when(MORTCUR < 0 ~ NA_real_,
MORTCUR %in% 2 ~ 0,
MORTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_Mort_caught_up = mean(caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
# Combine the two datasets
mort_caught_up_combined <- rbind(mort_caught_up_before, mort_caught_up_after)
# Plot the trend of the percentage of households that are currently caught up on mortgage before and after the peak of the pandemic
ggplot(mort_caught_up_combined, aes(x = WEEK.x, y = mean_Mort_caught_up * 100, color = Pandemic)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("% of Households Caught Up on Mortgage") +
ylim(92, 96) +
labs(title = "Trend of Households Caught Up on Mortgage Before and After the Peak of the Pandemic",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_MORTGAGE_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )
# Input:
# 1) Yes
# 2) No
# -99) Question seen but category not selected
# -88) Missing / Did not report
# Output: What percentage of Hispanic households are currently caught up on mortgage.
library(ggplot2)
# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of Hispanic households that are currently caught up on mortgage for each wave before the peak of the pandemic
Hisp_mort_caught_up_before <- data_peak_before %>%
select(WEEK.x, MORTCUR, RHISPANIC) %>%
filter(RHISPANIC == 2) %>% # Select only Hispanic households
mutate(hisp_caught_up = case_when(MORTCUR < 0 ~ NA_real_,
MORTCUR %in% 2 ~ 0,
MORTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(hisp_mean_Mort_caught_up = mean(hisp_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
# Calculate the percentage of Hispanic households that are currently caught up on mortgage for each wave after the peak of the pandemic
Hisp_mort_caught_up_after <- data_peak_after %>%
select(WEEK.x, MORTCUR, RHISPANIC) %>%
filter(RHISPANIC == 2) %>% # Select only Hispanic households
mutate(hisp_caught_up = case_when(MORTCUR < 0 ~ NA_real_,
MORTCUR %in% 2 ~ 0,
MORTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(hisp_mean_Mort_caught_up = mean(hisp_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
# Combine the two datasets
Hisp_caught_up_combined <- rbind(Hisp_mort_caught_up_before, Hisp_mort_caught_up_after)
# Plot the trend of the percentage of Hispanic households that are currently caught up on mortgage before and after the peak of the pandemic
ggplot(Hisp_caught_up_combined, aes(x = WEEK.x, y = hisp_mean_Mort_caught_up * 100, color = Pandemic)) +
geom_line() +
theme_minimal() +
xlab("Month") +
ylab("% of Hispanic Households Caught Up on Mortgage") +
ylim(80, 100) +
labs(title = "Trend of Hispanic Households Caught Up on Mortgage Before and After the Peak of the Pandemic",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_MORTGAGE_HISP_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )
#Input:
#1) Yes
#2) No
#-99) Question seen but category not selected
#-88) Missing / Did not report
#1=HISPANIC
#2=NON-Hispanic
#Output: What percentage of households are currently caught up on Mortgage
# Create a subset of data for before and after the peak of the pandemic
mort_data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
mort_data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
one_mort_caught_up_before <- data_peak_before %>%
select(WEEK.x, RHISPANIC, MORTCUR) %>%
mutate(mor_caught_up = case_when(MORTCUR < 0 ~ NA_real_,
MORTCUR %in% 2 ~ 0,
MORTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x, RHISPANIC) %>%
summarize(mean_mort_caught_up = mean(mor_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Calculate the percentage of households that are currently caught up on Mortgage for each wave after the peak of the pandemic
one_mort_caught_up_after <- data_peak_after %>%
select(WEEK.x, RHISPANIC, MORTCUR) %>%
mutate(mor_caught_up = case_when(MORTCUR < 0 ~ NA_real_,
MORTCUR %in% 2 ~ 0,
MORTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x, RHISPANIC) %>%
summarize(mean_mort_caught_up = mean(mor_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Combine the two datasets
one_mort_caught_up_combined <- rbind(one_mort_caught_up_before, one_mort_caught_up_after)
# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic for all races
ggplot(one_mort_caught_up_combined, aes(x = WEEK.x, y = mean_mort_caught_up * 100, color = Pandemic)) +
geom_line() +
scale_color_manual(values = c("red", "blue"), labels = c("after", "before")) +
facet_wrap(~RHISPANIC) +
theme_minimal() +
xlab("HPS wave") +
ylab("% of Households Caught Up on Mortgage") +
ylim(85, 100) +
labs(title = "Trend of Households Caught Up on Mortgage Before and After the Peak of the Pandemic for Hispanic and Non-Hispanic",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_MORTGAGE_HISP_NON_HISP.png",height=8, width=10, dpi = "print" )
library(dplyr)
library(ggplot2)
# Create a subset of data for each race/ethnicity
data_by_race <- data_all %>%
filter(race_ethnicity != "All Races") %>%
select(WEEK.x, race_ethnicity, MORTCUR)
# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity and week
mortgage_caught_up_by_week <- data_by_race %>%
mutate(mortgage = ifelse(MORTCUR %in% 1, 1, ifelse(MORTCUR < 0, NA, 0))) %>%
group_by(race_ethnicity, WEEK.x) %>%
summarize(pct_mortgage_caught_up = weighted.mean(mortgage, na.rm = TRUE))
## `summarise()` has grouped output by 'race_ethnicity'. You can override using
## the `.groups` argument.
# Plot the trend of the percentage of households that are currently caught up on mortgage for different races/ethnicities over time
ggplot(mortgage_caught_up_by_week, aes(x = WEEK.x, y = pct_mortgage_caught_up * 100, color = race_ethnicity)) +
geom_line(size = 1) +
scale_color_manual(values = c("black", "red", "green", "blue", "yellow", "orange")) +
theme_minimal() +
xlab("Week") +
ylab("% of Households Caught Up on Mortgage") +
labs(title = "Trend of Households Caught Up on Mortgage by Race/Ethnicity",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
ggsave("TREND_MORTGAGE_RACE_ETHNICITY_4_TOGETHER.png")
## Saving 7 x 5 in image
****What percentage of households are currently caught up on Rent**** TREND
#Input:
#1) Yes
#2) No
#-99) Question seen but category not selected
#-88) Missing / Did not report
#Output: What percentage of households are currently caught up on Rent
data_all %>%
select(WEEK.x, RENTCUR) %>%
mutate( ren_caught_up = case_when(RENTCUR< 0 ~ NA_real_,
RENTCUR %in% 2 ~ 0,
RENTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm=TRUE)) %>%
ggplot(aes(x=WEEK.x, y=mean_RENT_caught_up*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
ylim(85, 90) +
labs(title = "Updated: %Household currently caught up on Rent Payment From 19th August 2020 to 9th May 2022",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_GENERAL_RENT.png",height=8, width=10, dpi = "print" )
# Input:
# 1) Yes
# 2) No
# -99) Question seen but category not selected
# -88) Missing / Did not report
library(ggplot2)
# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
rent_caught_up_before <- data_peak_before %>%
select(WEEK.x, RENTCUR) %>%
mutate(rent_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
RENTCUR %in% 2 ~ 0,
RENTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_RENT_caught_up = mean(rent_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
# Calculate the percentage of households that are currently caught up on rent for each wave after the peak of the pandemic
rent_caught_up_after <- data_peak_after %>%
select(WEEK.x, RENTCUR) %>%
mutate(rent_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
RENTCUR %in% 2 ~ 0,
RENTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_RENT_caught_up = mean(rent_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
# Combine the two datasets
rent_caught_up_combined <- rbind(rent_caught_up_before, rent_caught_up_after)
# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic
ggplot(rent_caught_up_combined, aes(x = WEEK.x, y = mean_RENT_caught_up * 100, color = Pandemic)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("% of Households Caught Up on Rent") +
ylim(85, 90) +
labs(title = "Trend of Households Caught Up on Rent During and After the Peak of the Pandemic",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_RENT_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )
#Input:
#1) Yes
#2) No
#-99) Question seen but category not selected
#-88) Missing / Did not report
#Output: What percentage of households are currently caught up on Rent
# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
o_rent_caught_up_before <- data_peak_before %>%
select(WEEK.x, RHISPANIC, RENTCUR) %>%
mutate(ren_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
RENTCUR %in% 2 ~ 0,
RENTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x, RHISPANIC) %>%
summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Calculate the percentage of households that are currently caught up on rent for each wave after the peak of the pandemic
o_rent_caught_up_after <- data_peak_after %>%
select(WEEK.x, RHISPANIC, RENTCUR) %>%
mutate(ren_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
RENTCUR %in% 2 ~ 0,
RENTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x, RHISPANIC) %>%
summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Combine the two datasets
o_rent_caught_up_combined <- rbind(o_rent_caught_up_before, o_rent_caught_up_after)
# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic for all races
ggplot(o_rent_caught_up_combined, aes(x = WEEK.x, y = mean_RENT_caught_up * 100, color = Pandemic)) +
geom_line() +
scale_color_manual(values = c("red", "blue"), labels = c("after", "before")) +
facet_wrap(~RHISPANIC) +
theme_minimal() +
xlab("HPS wave") +
ylab("% of Households Caught Up on Rent") +
ylim(75, 95) +
labs(title = "Trend of Households Caught Up on Rent During and After the Peak of the Pandemic for Hispanic and Non-Hispanic",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_RENT_HISP_NON_HISP.png",height=8, width=10, dpi = "print" )
# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
two_rent_caught_up_before <- data_peak_before %>%
select(WEEK.x, RRACE, RENTCUR) %>%
mutate(ren_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
RENTCUR %in% 2 ~ 0,
RENTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x, RRACE) %>%
summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Calculate the percentage of households that are currently caught up on rent for each wave after the peak of the pandemic
two_rent_caught_up_after <- data_peak_after %>%
select(WEEK.x, RRACE, RENTCUR) %>%
mutate(ren_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
RENTCUR %in% 2 ~ 0,
RENTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x, RRACE) %>%
summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Combine the two datasets
two_rent_caught_up_combined <- rbind(two_rent_caught_up_before, two_rent_caught_up_after)
# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic for different races
ggplot(two_rent_caught_up_combined, aes(x = WEEK.x, y = mean_RENT_caught_up * 100, color = Pandemic)) +
geom_line() +
scale_color_manual(values = c("red", "blue"), labels = c("After", "Before")) +
facet_wrap(~ RRACE) +
theme_minimal() +
xlab("HPS wave") +
ylab("% of Households Caught Up on Rent") +
ylim(75, 95) +
labs(title = "Trend of Households Caught Up on Rent Before and After the Peak of the Pandemic for Different Races",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_RENT_FOR_ALL_RACE.png",height=8, width=10, dpi = "print" )
library(dplyr)
library(ggplot2)
# Create a subset of data for each race/ethnicity
rent_data_by_race <- data_all %>%
filter(race_ethnicity != "All Races") %>%
select(WEEK.x, race_ethnicity, RENTCUR)
# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity and week
rent_caught_up_by_week <- rent_data_by_race %>%
mutate(rent = ifelse(RENTCUR %in% 1, 1, ifelse(RENTCUR < 0, NA, 0))) %>%
group_by(race_ethnicity, WEEK.x) %>%
summarize(pct_rent_caught_up = weighted.mean(rent, na.rm = TRUE))
## `summarise()` has grouped output by 'race_ethnicity'. You can override using
## the `.groups` argument.
# Plot the trend of the percentage of households that are currently caught up on mortgage for different races/ethnicities over time
ggplot(rent_caught_up_by_week, aes(x = WEEK.x, y = pct_rent_caught_up * 100, color = race_ethnicity)) +
geom_line(size = 1) +
scale_color_manual(values = c("black", "red", "green", "blue", "yellow", "orange")) +
theme_minimal() +
xlab("Week") +
ylab("% of Households Caught Up on Rent Payment") +
labs(title = "Trend of Households Caught Up on Rent by Race/Ethnicity",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave("TREND_RENT_RACE_ETHNICITY_4_TOGETHER.png")
## Saving 7 x 5 in image
****What percent of household are Confident to pay next rent or mortgage****
#1) No confidence
#2) Slight confidence
#3) Moderate confidence
#4) High confidence
#5) Payment is/will be deferred
#(1,2,5)~1 means not confident enough.
data_all%>%
mutate(re_mort_conf = ifelse(MORTCONF %in% c(3,4),1, ifelse( c(1,2,5), 0)))%>%
group_by(WEEK.x)%>%
summarise(pct_re_mort_conf = mean(re_mort_conf, na.rm=T))%>%
ggplot(aes(x=WEEK.x, y=pct_re_mort_conf*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
labs(title = "Update: % Confident to pay mortgage or rent From 19th August 2020 to 9th May 2022",
subtitle = "Unweighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_GENERAL_MORTCONF.png",height=8, width=10, dpi = "print" )
# Input:
# 1) Yes
# 2) No
# -99) Question seen but category not selected
# -88) Missing / Did not report
library(ggplot2)
# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
conf_caught_up_before <- data_peak_before %>%
select(WEEK.x, MORTCONF) %>%
mutate(CONF_caught_up = case_when(MORTCONF < 0 ~ NA_real_,
MORTCONF %in% c(1,2,5) ~ 0,
MORTCONF %in% c(3,4) ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_CONF_caught_up = mean(CONF_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
# Calculate the percentage of households that are currently caught up on rent for each wave after the peak of the pandemic
conf_caught_up_after <- data_peak_after %>%
select(WEEK.x, MORTCONF) %>%
mutate(CONF_caught_up = case_when(MORTCONF < 0 ~ NA_real_,
MORTCONF %in% c(1,2,5) ~ 0,
MORTCONF %in% c(3,4) ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_CONF_caught_up = mean(CONF_caught_up, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
# Combine the two datasets
conf_caught_up_combined <- rbind(conf_caught_up_before, conf_caught_up_after)
# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic
ggplot(conf_caught_up_combined, aes(x = WEEK.x, y = mean_CONF_caught_up * 100, color = Pandemic)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("% of Households Confidence to pay rent or mortgage of next month") +
ylim(85, 90) +
labs(title = "Trend of Households Confident to pay Rent or Mortgage During and After the Peak of the Pandemic",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
## Warning: Removed 3 rows containing missing values (`geom_line()`).
ggsave(filename = "TREND_CONF_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )
## Warning: Removed 3 rows containing missing values (`geom_line()`).
library(dplyr)
library(ggplot2)
# Create a subset of data for each race/ethnicity
conf_data_by_race <- data_all %>%
filter(race_ethnicity != "All Races") %>%
select(WEEK.x, race_ethnicity, MORTCONF)
# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity and week
mortconf_caught_up_by_week <- conf_data_by_race %>%
mutate(mortconf = ifelse(MORTCONF %in% c(3,4),1, ifelse( c(1,2,5), 0))) %>%
group_by(race_ethnicity, WEEK.x) %>%
summarize(pct_conf_caught_up = weighted.mean(mortconf, na.rm = TRUE))
## `summarise()` has grouped output by 'race_ethnicity'. You can override using
## the `.groups` argument.
# Plot the trend of the percentage of households that are currently caught up on mortgage for different races/ethnicities over time
ggplot(mortconf_caught_up_by_week, aes(x = WEEK.x, y = pct_conf_caught_up * 100, color = race_ethnicity)) +
geom_line(size = 1) +
scale_color_manual(values = c("black", "red", "green", "blue", "yellow", "orange")) +
theme_minimal() +
xlab("Week") +
ylab("% of Households confident to pay next rent or mortgage") +
labs(title = "Trend of Households confident to pay rent or mortgage by Race/Ethnicity",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave("TREND_CONF_RACE_ETHNICITY_4_TOGETHER.png")
## Saving 7 x 5 in image
What percent of people are Worry
#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day
#(2,3,4)~1 means worry
data_all%>%
mutate(re_worry = ifelse(WORRY %in% c(2,3,4),1, ifelse( c(1), 0)))%>%
group_by(WEEK.x)%>%
summarise(pct_re_worry = mean(re_worry, na.rm=T))%>%
ggplot(aes(x=WEEK.x, y=pct_re_worry*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
labs(title = "Update: % people stays worry for a week From 19th August 2020 to 9th May 2022",
subtitle = "Unweighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_GENERAL_worry.png",height=8, width=10, dpi = "print" )
# Input:
# 1) Not at all
# 2) Several days
# 3) More than half the days
# 4) Nearly every day
# Output: What percentage of Hispanic households stay worried for a week
library(ggplot2)
# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of Hispanic households that stay worried for a week for each wave before the peak of the pandemic
Hisp_worry_before <- data_peak_before %>%
select(WEEK.x, WORRY, RHISPANIC) %>%
filter(RHISPANIC == 2) %>% # Select only Hispanic households
mutate(hisp_worry = ifelse(WORRY %in% 2:4, 1, 0)) %>%
group_by(WEEK.x) %>%
summarize(hisp_mean_worry = mean(hisp_worry, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
# Calculate the percentage of Hispanic households that stay worried for a week for each wave after the peak of the pandemic
Hisp_worry_after <- data_peak_after %>%
select(WEEK.x, WORRY, RHISPANIC) %>%
filter(RHISPANIC == 2) %>% # Select only Hispanic households
mutate(hisp_worry = ifelse(WORRY %in% 2:4, 1, 0)) %>%
group_by(WEEK.x) %>%
summarize(hisp_mean_worry = mean(hisp_worry, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
# Combine the two datasets
Hisp_worry_combined <- rbind(Hisp_worry_before, Hisp_worry_after)
# Plot the trend of the percentage of Hispanic households that stay worried for a week before and after the peak of the pandemic
ggplot(Hisp_worry_combined, aes(x = WEEK.x, y = hisp_mean_worry * 100, color = Pandemic)) +
geom_line() +
theme_minimal() +
xlab("Month") +
ylab("% of Hispanic Households that Stay Worried for a Week") +
ylim(0, 100) +
labs(title = "Trend of Hispanic Households that Stay Worried for a Week Before and After the Peak of the Pandemic",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_WORRY_HISP_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )
# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of people who stay worried for each wave before the peak of the pandemic by race
worry_by_race_before <- data_peak_before %>%
select(WEEK.x, WORRY, RRACE) %>%
mutate(re_worry = ifelse(WORRY %in% c(2,3,4),1,0)) %>%
group_by(WEEK.x, RRACE) %>%
summarise(pct_re_worry = mean(re_worry, na.rm=T)) %>%
mutate(Pandemic = "Before")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Calculate the percentage of people who stay worried for each wave after the peak of the pandemic by race
worry_by_race_after <- data_peak_after %>%
select(WEEK.x, WORRY, RRACE) %>%
mutate(re_worry = ifelse(WORRY %in% c(2,3,4),1,0)) %>%
group_by(WEEK.x, RRACE) %>%
summarise(pct_re_worry = mean(re_worry, na.rm=T)) %>%
mutate(Pandemic = "After")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Combine the two datasets
worry_by_race_combined <- rbind(worry_by_race_before, worry_by_race_after)
# Convert RRACE variable to factor
worry_by_race_combined$RRACE <- factor(worry_by_race_combined$RRACE)
# Plot the trend of the percentage of people who stay worried by race before and after the peak of the pandemic
ggplot(worry_by_race_combined, aes(x = WEEK.x, y = pct_re_worry * 100, color = Pandemic)) +
geom_line() +
facet_wrap(~ RRACE, ncol = 2) +
theme_minimal() +
xlab("Month") +
ylab("% of People Staying Worried") +
ylim(25, 60) +
labs(title = "Trend of People Staying Worried by Race Before and After the Peak of the Pandemic",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_worry_ALL_race_before_and_after.png", height = 8, width = 10, dpi = "print")
#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day
#(2,3,4)~1 means anxious
data_all%>%
mutate(re_anxious = ifelse(ANXIOUS %in% c(2,3,4),1, ifelse( c(1), 0)))%>%
group_by(WEEK.x)%>%
summarise(pct_re_anxious = mean(re_anxious, na.rm=T))%>%
ggplot(aes(x=WEEK.x, y=pct_re_anxious*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
labs(title = "Update: % people are anxious for a week From 19th August 2020 to 9th May 2022",
subtitle = "Unweighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_anxious.png",height=8, width=10, dpi = "print" )
#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day
# Output: Trend of the percentage of people who felt anxious for each wave, by race.
#WHITE 1
#BLACK 2
#ASIAN 3
#OTHER 4
library(ggplot2)
# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Recode ANXIOUS variable into a binary variable where 1 means anxious
data_peak_before <- data_peak_before %>%
mutate(anxious = ifelse(ANXIOUS %in% c(2, 3, 4), 1, 0))
data_peak_after <- data_peak_after %>%
mutate(anxious = ifelse(ANXIOUS %in% c(2, 3, 4), 1, 0))
# Calculate the percentage of people who felt anxious for each wave, by race, before the peak of the pandemic
anxious_by_race_before <- data_peak_before %>%
select(WEEK.x, RRACE, anxious) %>%
filter(RRACE != -88) %>% # Exclude missing values
group_by(WEEK.x, RRACE) %>%
summarize(mean_anxious = mean(anxious, na.rm = TRUE)) %>%
mutate(Pandemic = "During")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Calculate the percentage of people who felt anxious for each wave, by race, after the peak of the pandemic
anxious_by_race_after <- data_peak_after %>%
select(WEEK.x, RRACE, anxious) %>%
filter(RRACE != -88) %>% # Exclude missing values
group_by(WEEK.x, RRACE) %>%
summarize(mean_anxious = mean(anxious, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Combine the two datasets
anxious_by_race_combined <- rbind(anxious_by_race_before, anxious_by_race_after)
# Plot the trend of the percentage of people who felt anxious for each wave, by race, before and after the peak of the pandemic
ggplot(anxious_by_race_combined, aes(x = WEEK.x, y = mean_anxious * 100, color = Pandemic, linetype = as.factor(RRACE))) +
geom_line() +
scale_linetype_manual(values = c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash")) +
theme_minimal() +
xlab("Month") +
ylab("% of People Feeling Anxious") +
ylim(25, 70) +
labs(title = "Trend of the Percentage of People Feeling Anxious During and After the Peak of the Pandemic, by Race",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_ANXIOUS_ALL_RACE_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )
#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day
# Output: Trend of the percentage of people who felt down for each wave, by race.
library(ggplot2)
# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Recode DOWN variable into a binary variable where 1 means feeling down
data_peak_before <- data_peak_before %>%
mutate(down = ifelse(DOWN %in% c(2, 3, 4), 1, 0))
data_peak_after <- data_peak_after %>%
mutate(down = ifelse(DOWN %in% c(2, 3, 4), 1, 0))
# Calculate the percentage of people who felt down for each wave, by race, before the peak of the pandemic
down_by_race_before <- data_peak_before %>%
select(WEEK.x, RRACE, down) %>%
filter(RRACE != -88) %>% # Exclude missing values
group_by(WEEK.x, RRACE) %>%
summarize(mean_down = mean(down, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Calculate the percentage of people who felt down for each wave, by race, after the peak of the pandemic
down_by_race_after <- data_peak_after %>%
select(WEEK.x, RRACE, down) %>%
filter(RRACE != -88) %>% # Exclude missing values
group_by(WEEK.x, RRACE) %>%
summarize(mean_down = mean(down, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Combine the two datasets
down_by_race_combined <- rbind(down_by_race_before, down_by_race_after)
# Plot the trend of the percentage of people who felt down for each wave, by race, before and after the peak of the pandemic
ggplot(down_by_race_combined, aes(x = WEEK.x, y = mean_down * 100, color = Pandemic, linetype = as.factor(RRACE))) +
geom_line() +
scale_linetype_manual(values = c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash")) +
theme_minimal() +
xlab("Month") +
ylab("% of People Feeling Down") +
ylim(25, 55) +
labs(title = "Trend of the Percentage of People Feeling Down Before and After the Peak of the Pandemic, by Race",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_DOWN_ALL_RACE_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )
#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day
# Output: Trend of the percentage of people who experienced mental health concerns for each wave, by race.
library(ggplot2)
# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Recode mental health variables into a binary variable where 1 means experiencing mental health concerns
data_peak_before <- data_peak_before %>%
mutate(mental_health = ifelse(WORRY %in% c(2, 3, 4) | ANXIOUS %in% c(2, 3, 4) | DOWN %in% c(2, 3, 4), 1, 0))
data_peak_after <- data_peak_after %>%
mutate(mental_health = ifelse(WORRY %in% c(2, 3, 4) | ANXIOUS %in% c(2, 3, 4) | DOWN %in% c(2, 3, 4), 1, 0))
# Calculate the percentage of people who experienced mental health concerns for each wave, by race, before the peak of the pandemic
mental_health_by_race_before <- data_peak_before %>%
select(WEEK.x, RRACE, mental_health) %>%
filter(RRACE != -88) %>% # Exclude missing values
group_by(WEEK.x, RRACE) %>%
summarize(mean_mental_health = mean(mental_health, na.rm = TRUE)) %>%
mutate(Pandemic = "During")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Calculate the percentage of people who experienced mental health concerns for each wave, by race, after the peak of the pandemic
mental_health_by_race_after <- data_peak_after %>%
select(WEEK.x, RRACE, mental_health) %>%
filter(RRACE != -88) %>% # Exclude missing values
group_by(WEEK.x, RRACE) %>%
summarize(mean_mental_health = mean(mental_health, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Combine the two datasets
mental_health_by_race_combined <- rbind(mental_health_by_race_before, mental_health_by_race_after)
# Plot the trend of the percentage of people who experienced mental health concerns for each wave, by race, before and after the peak of the pandemic
ggplot(mental_health_by_race_combined, aes(x = WEEK.x, y = mean_mental_health * 100, color = Pandemic, linetype = as.factor(RRACE))) +
geom_line() +
scale_linetype_manual(values = c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash")) +
theme_minimal() +
xlab("Month") +
ylab("% of People Experiencing Mental Health Concerns") +
ylim(30, 70) +
labs(title = "Trend of the Percentage of People Experiencing Mental Health Concerns During and After the Peak of the Pandemic, by Race",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "TREND_MENTAL_HEALTH_RACE_ALL_BEFORE_AND_AFTER.png")
## Saving 7 x 5 in image
*********BAR CHART*********** ##############BAR CHART##########
#library(questionr)
data_all%>%
mutate(mortage = ifelse(MORTCUR %in% 1, 1, ifelse(MORTCUR <0, NA, 0))) %>%
group_by(race_ethnicity)%>%
summarise(pct_remote = weighted.mean(mortage, na.rm=T))%>%
ggplot(aes(x=race_ethnicity, y=pct_remote, fill=race_ethnicity,label = scales::percent(pct_remote)))+
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 3) +
scale_y_continuous(labels = scales::percent)+
ggtitle("Percent of caught up in Mortage")+
ylab("Percent") +
xlab("Race/Ethnicity")+
theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))
ggsave(filename = "Bar_MORTCUR.png",height=8, width=10, dpi = "print" )
# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity before the peak of the pandemic
mortgage_caught_up_before <- data_peak_before %>%
mutate(mortgage = ifelse(MORTCUR %in% 1, 1, ifelse(MORTCUR <0, NA, 0))) %>%
group_by(race_ethnicity) %>%
summarize(pct_mortgage_caught_up = weighted.mean(mortgage, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity after the peak of the pandemic
mortgage_caught_up_after <- data_peak_after %>%
mutate(mortgage = ifelse(MORTCUR %in% 1, 1, ifelse(MORTCUR <0, NA, 0))) %>%
group_by(race_ethnicity) %>%
summarize(pct_mortgage_caught_up = weighted.mean(mortgage, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
# Combine the two datasets
mortgage_caught_up_combined <- rbind(mortgage_caught_up_before, mortgage_caught_up_after)
# Plot the trend of the percentage of households that are currently caught up on mortgage before and after the peak of the pandemic for different races/ethnicities
ggplot(mortgage_caught_up_combined, aes(x = race_ethnicity, y = pct_mortgage_caught_up * 100, fill = Pandemic, label = scales::percent(pct_mortgage_caught_up))) +
geom_col(position = "dodge", width = 0.7) +
geom_text(position = position_dodge(width = 0.7), vjust = -0.5, size = 3) +
scale_fill_manual(values = c("blue", "red"), labels = c("Peak", "After")) +
coord_flip() +
theme_minimal() +
xlab("Race/Ethnicity") +
ylab("% of Households Caught Up on Mortgage") +
labs(title = "Bar Chart Households Caught Up on Mortgage During Peak and After the Peak of the Pandemic for Different Races/Ethnicities",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "BAR_CHART_MORTGAGE_FOR_ALL_BEFORE_AFTER_PANDEMIC.png",height=8, width=10, dpi = "print" )
#library(questionr)
data_all%>%
mutate(rent_caught = ifelse(RENTCUR %in% 1, 1, ifelse(RENTCUR <0, NA, 0))) %>%
group_by(race_ethnicity)%>%
summarise(pct_remote_rent = weighted.mean(rent_caught, na.rm=T))%>%
ggplot(aes(x=race_ethnicity, y=pct_remote_rent, fill=race_ethnicity,label = scales::percent(pct_remote_rent)))+
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 3) +
scale_y_continuous(labels = scales::percent)+
ggtitle("Percent of caught up in Rent")+
ylab("Percent") +
xlab("Race/Ethnicity")+
theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))
ggsave(filename = "BARCHART_RENT.png",height=8, width=10, dpi = "print" )
# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]
# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity before the peak of the pandemic
three_rent_caught_up_before <- data_peak_before %>%
mutate(rent = ifelse(RENTCUR %in% 1, 1, ifelse(RENTCUR <0, NA, 0))) %>%
group_by(race_ethnicity) %>%
summarize(pct_rent_caught_up = weighted.mean(rent, na.rm = TRUE)) %>%
mutate(Pandemic = "Before")
# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity after the peak of the pandemic
three_rent_caught_up_after <- data_peak_after %>%
mutate(rent = ifelse(RENTCUR %in% 1, 1, ifelse(RENTCUR <0, NA, 0))) %>%
group_by(race_ethnicity) %>%
summarize(pct_rent_caught_up = weighted.mean(rent, na.rm = TRUE)) %>%
mutate(Pandemic = "After")
# Combine the two datasets
three_rent_caught_up_combined <- rbind(three_rent_caught_up_before, three_rent_caught_up_after)
# Plot the trend of the percentage of households that are currently caught up on mortgage before and after the peak of the pandemic for different races/ethnicities
ggplot(three_rent_caught_up_combined, aes(x = race_ethnicity, y = pct_rent_caught_up * 100, fill = Pandemic, label = scales::percent(pct_rent_caught_up))) +
geom_col(position = "dodge", width = 0.7) +
geom_text(position = position_dodge(width = 0.7), vjust = -0.5, size = 3) +
scale_fill_manual(values = c("blue", "red"), labels = c("Peak", "After")) +
coord_flip() +
theme_minimal() +
xlab("Race/Ethnicity") +
ylab("% of Households Caught Up on Rent") +
labs(title = "Trend of Households Caught Up on rent Peak and After the Peak of the Pandemic for Different Races/Ethnicities",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
ggsave(filename = "BAR_CHART_RENT_FOR_ALL_BEFORE_AFTER_PANDEMIC.png",height=8, width=10, dpi = "print" )
#library(questionr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
data_all%>%
mutate(rent_mort_confidence = case_when(
MORTCONF %in% c(3,4)~1, MORTCONF %in% c(1,2,5) ~ 0, TRUE ~ NA_real_)) %>%
group_by(race_ethnicity)%>%
summarise(pct_remote_mort_rent = weighted.mean(rent_mort_confidence, na.rm=T))%>%
ggplot(aes(x=race_ethnicity, y=pct_remote_mort_rent, fill=race_ethnicity,label = scales::percent(pct_remote_mort_rent)))+
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 3) +
scale_y_continuous(labels = scales::percent)+
ggtitle("Percent of confidence to pay rent or mortage")+
ylab("Percent") +
xlab("Race/Ethnicity")+
theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))
ggsave(filename = "BARCHART_CONFIDENCE.png",height=8, width=10, dpi = "print" )
We define a new variable Housing Affordability. This variable will show the portion of people who was caught up on rent, mortgage and were confident to pay their next mortgage and rent. As a result, these household has more housing affordability or less house rent hardship
# Filter the data for week 13 to week 45
data_filtered <- data_all
# Create a new variable for housing insecurity
data_housing_insecurity <- data_filtered %>%
mutate(rent_insecurity = ifelse(RENTCUR %in% c(2, 3), 1, 0),
mortgage_insecurity = ifelse(MORTCUR %in% c(2, 3), 1, 0),
mortgage_conf_insecurity = ifelse(MORTCONF %in% c(3,4), 1, 0),
housing_insecurity = pmax(rent_insecurity, mortgage_insecurity, mortgage_conf_insecurity)) %>%
group_by(race_ethnicity, WEEK.x) %>%
summarize(pct_housing_insecurity = mean(housing_insecurity, na.rm = TRUE))
## `summarise()` has grouped output by 'race_ethnicity'. You can override using
## the `.groups` argument.
# Separate the data for HISPANIC and NON-Hispanic
data_housing_insecurity_hisp <- data_housing_insecurity %>%
filter(race_ethnicity == "HISPANIC")
data_housing_insecurity_nonhisp <- data_housing_insecurity %>%
filter(race_ethnicity == "NON-Hispanic")
# Bar chart for housing insecurity by race/ethnicity
ggplot(data_housing_insecurity, aes(x = race_ethnicity, y = pct_housing_insecurity * 100, fill = race_ethnicity)) +
geom_col(position = "dodge", width = 0.7) +
scale_fill_manual(values = c("red", "blue", "green", "purple", "orange")) +
theme_minimal() +
xlab("") +
ylab("% of Households with Housing Affordibility") +
labs(title = "Housing affordibility by Race/Ethnicity in the U.S. during the COVID-19 Pandemic",
caption = "Source: Census Household Pulse Survey")
ggsave(filename = "BARCHART_MORT_RENT_CONF.png",height=8, width=10, dpi = "print" )
#library(questionr)
library(lubridate)
data_all%>%
mutate(pan_re_worry = case_when(
WORRY %in% c(2,3,4)~1, WORRY %in% c(1) ~ 0, TRUE ~ NA_real_)) %>%
group_by(race_ethnicity)%>%
summarise(pct_pan_re_worry = weighted.mean(pan_re_worry, na.rm=T))%>%
ggplot(aes(x=race_ethnicity, y=pct_pan_re_worry, fill=race_ethnicity,label = scales::percent(pct_pan_re_worry)))+
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 3) +
scale_y_continuous(labels = scales::percent)+
ggtitle("Percent of people who are worried for a week")+
ylab("Percent") +
xlab("Race/Ethnicity")+
theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))
ggsave(filename = "BARCHART_WORRY.png",height=8, width=10, dpi = "print" )
#library(questionr)
library(lubridate)
data_all%>%
mutate(pan_re_anxious = case_when(
ANXIOUS %in% c(2,3,4)~1, ANXIOUS %in% c(1) ~ 0, TRUE ~ NA_real_)) %>%
group_by(race_ethnicity)%>%
summarise(pct_pan_re_anxious = weighted.mean(pan_re_anxious, na.rm=T))%>%
ggplot(aes(x=race_ethnicity, y=pct_pan_re_anxious, fill=race_ethnicity,label = scales::percent(pct_pan_re_anxious)))+
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 3) +
scale_y_continuous(labels = scales::percent)+
ggtitle("Percent of people who are anxious for a week")+
ylab("Percent") +
xlab("Race/Ethnicity")+
theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))
ggsave(filename = "BARCHART_ANXIOUS.png",height=8, width=10, dpi = "print" )
#library(questionr)
library(lubridate)
data_all%>%
mutate(pan_re_down = case_when(
DOWN %in% c(2,3,4)~1, DOWN %in% c(1) ~ 0, TRUE ~ NA_real_)) %>%
group_by(race_ethnicity)%>%
summarise(pct_pan_re_down = weighted.mean(pan_re_down, na.rm=T))%>%
ggplot(aes(x=race_ethnicity, y=pct_pan_re_down, fill=race_ethnicity,label = scales::percent(pct_pan_re_down)))+
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9), # move to center of bars
vjust = -0.5, # nudge above top of bar
size = 3) +
scale_y_continuous(labels = scales::percent)+
ggtitle("Percent of people who are feeling down for a week")+
ylab("Percent") +
xlab("Race/Ethnicity")+
theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))
ggsave(filename = "BARCHART_DOWN.png",height=8, width=10, dpi = "print" )
We create a new variable to define Mental Health. The more score means more mental stress
library(dplyr)
library(ggplot2)
# Calculate total mental health score by summing WORRY, DOWN, and INTEREST scores
data_all <- data_all %>%
mutate(mental_health = WORRY + DOWN + INTEREST)
# Calculate mean mental health score by month
mental_health_by_month <- data_all %>%
group_by(WEEK.x) %>%
summarize(mean_mental_health = mean(mental_health, na.rm = TRUE))
# Plot mean mental health score over time
ggplot(mental_health_by_month, aes(x = WEEK.x, y = mean_mental_health)) +
geom_line() +
ggtitle("Trend of Mental Health") +
xlab("Month") +
ylab("Mean Mental Stress Score")
ggsave("TREND_MENTAL_Stress_COMB.png")
## Saving 7 x 5 in image
Which Group of people suffer from Mental Stress
#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day
# Output: Trend of the percentage of people who felt worried, anxious, or down for each wave, by age group.
library(ggplot2)
# Recode WORRY, ANXIOUS, and DOWN variables into a binary variable where 1 means worried, anxious, or down
data_all <- data_all %>%
mutate(mental_health = ifelse(WORRY %in% c(2, 3, 4) | ANXIOUS %in% c(2, 3, 4) | DOWN %in% c(2, 3, 4), 1, 0))
# Calculate the percentage of people who felt worried, anxious, or down for each wave, by age group
mental_health_by_age <- data_all %>%
select(WEEK.x, TBIRTH_YEAR, mental_health) %>%
filter(TBIRTH_YEAR != -88) %>% # Exclude missing values
mutate(AGE = as.numeric(format(Sys.Date(), "%Y")) - TBIRTH_YEAR) %>%
group_by(WEEK.x, AGE) %>%
summarize(mean_mental_health = mean(mental_health, na.rm = TRUE)) %>%
mutate(AGE_GROUP = case_when(
AGE <= 17 ~ "Under 18",
AGE >= 18 & AGE <= 24 ~ "18-24",
AGE >= 25 & AGE <= 34 ~ "25-34",
AGE >= 35 & AGE <= 44 ~ "35-44",
AGE >= 45 & AGE <= 54 ~ "45-54",
AGE >= 55 & AGE <= 64 ~ "55-64",
AGE >= 65 & AGE <= 74 ~ "65-74",
AGE >= 75 ~ "75 and over"
))
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
# Plot the percentage of people who felt worried, anxious, or down for each age group
ggplot(mental_health_by_age, aes(x = AGE_GROUP, y = mean_mental_health * 100)) +
geom_bar(stat = "identity", fill = "blue") +
theme_minimal() +
xlab("Age Group") +
ylab("% of People with Mental Health Issues") +
ylim(0, 70) +
labs(title = "Percentage of People with Mental Health Issues by Age Group",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
## Warning: Removed 14 rows containing missing values (`position_stack()`).
## Warning: Removed 2251 rows containing missing values (`geom_bar()`).
ggsave(filename = "MENTAL_HEALTH_BY_AGE_BAR.png", height=8, width=10, dpi = "print")
## Warning: Removed 14 rows containing missing values (`position_stack()`).
## Removed 2251 rows containing missing values (`geom_bar()`).
Which Group of people suffer from Housing Insecurity
library(ggplot2)
data_all <- data_all %>%
mutate(rent_mort = RENTCUR + MORTCUR + MORTCONF)
data_all <- data_all %>%
mutate(rent_mort = ifelse(RENTCUR %in% c(1) | MORTCUR %in% c(1) | MORTCONF %in% c(3, 4), 1, 0))
# Calculate the percentage of people who felt worried, anxious, or down for each wave, by age group
house_insecurit_by_age <- data_all %>%
select(WEEK.x, TBIRTH_YEAR, rent_mort) %>%
filter(TBIRTH_YEAR != -88) %>% # Exclude missing values
mutate(AGE = as.numeric(format(Sys.Date(), "%Y")) - TBIRTH_YEAR) %>%
group_by(WEEK.x, AGE) %>%
summarize(mean_rent_mort = mean(rent_mort, na.rm = TRUE)) %>%
mutate(AGE_GROUP = case_when(
AGE <= 17 ~ "Under 18",
AGE >= 18 & AGE <= 24 ~ "18-24",
AGE >= 25 & AGE <= 34 ~ "25-34",
AGE >= 35 & AGE <= 44 ~ "35-44",
AGE >= 45 & AGE <= 54 ~ "45-54",
AGE >= 55 & AGE <= 64 ~ "55-64",
AGE >= 65 & AGE <= 74 ~ "65-74",
AGE >= 75 ~ "75 and over"
))
## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.
ggplot(house_insecurit_by_age, aes(x = AGE_GROUP, y = mean_rent_mort * 100)) +
geom_bar(stat = "identity", fill = "blue") +
theme_minimal() +
xlab("Age Group") +
ylab("% of People Pay their Rent or Mortgage") +
ylim(0, 70) +
labs(title = "Percentage of People Paid House Rent or Mortgage by Age Group",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")
## Warning: Removed 68 rows containing missing values (`position_stack()`).
## Warning: Removed 2196 rows containing missing values (`geom_bar()`).
ggsave(filename = "HOUSING_Affordability_BY_AGE_BAR.png", height=8, width=10, dpi = "print")
## Warning: Removed 68 rows containing missing values (`position_stack()`).
## Removed 2196 rows containing missing values (`geom_bar()`).
##CORRELATION BETWEEN HOUSERENT HARDSHIP and MENTAL STRESS ** A correlation analysis tells if there is a linear relationship between the two variables, while a regression analysis can help understand the strength and direction of the relationship and whether it is statistically significant. We will notice the co-relation between RENTCUR, MORTCUR and WORRY, ANXIOUS, DOWN, INTEREST.
library(ggplot2)
# Select relevant variables
data_rent_worry <- data_all %>%
select(WORRY, RENTCUR) %>%
filter(!is.na(RENTCUR)) # Remove missing values
# Create a scatter plot
ggplot(data_rent_worry, aes(x = RENTCUR, y = WORRY)) +
geom_point(alpha = 0.2) +
xlab("Current House Rent Hardship") +
ylab("Mental Health Issue (WORRY)") +
labs(title = "Correlation between House Rent Hardship and Mental Health",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey") +
theme_minimal()
# Calculate correlation coefficient
correlation <- cor(data_rent_worry$RENTCUR, data_rent_worry$WORRY, use = "complete.obs")
cat("Correlation Coefficient: ", correlation)
## Correlation Coefficient: 0.2000606
# Load required libraries
library(dplyr)
library(ggplot2)
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.2.3
# Subset the data to include only necessary variables
data_alll <- data_all %>%
select(WEEK.x, WORRY, RENTCUR, RRACE) %>%
filter(RENTCUR != -88 & WORRY != -88 & RRACE!= -88 & RENTCUR != -99 & WORRY != -99 & RRACE!= -99)
# Recode WORRY variable into a binary variable where 1 means worried, anxious, or down
data_alll$WORRY <- ifelse(data_alll$WORRY %in% c(2, 3, 4), 1, 0)
#data_alll$ANXIOUS <- ifelse(data_alll$ANXIOUS %in% c(2, 3, 4), 1, 0)
# Create a new binary variable RENT_STRESS where 1 means having difficulty paying rent and 0 means no difficulty
# Conduct logistic regression analysis
model <- glm(WORRY ~ RENTCUR + RRACE, data = data_alll, family = "binomial")
# Print the summary of the model
summary(model)
##
## Call:
## glm(formula = WORRY ~ RENTCUR + RRACE, family = "binomial", data = data_alll)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.767 -1.352 1.012 1.012 1.016
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.521243 0.012959 -40.222 <2e-16 ***
## RENTCUR 0.925475 0.011020 83.980 <2e-16 ***
## RRACE -0.003075 0.003540 -0.869 0.385
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 600029 on 452398 degrees of freedom
## Residual deviance: 591949 on 452396 degrees of freedom
## AIC: 591955
##
## Number of Fisher Scoring iterations: 4
# Create a forest plot to visualize the odds ratio and confidence intervals
plot_model(model, type = "est", vline.color = "gray", ci.lvl = 0.95, axis.labels = c("Odds Ratio"), title = "Logistic Regression Model: WORRY ~ RENT_STRESS + RRACE")
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
**A logit link function to model the relationship between WORRY (binary outcome variable) and two predictor variables, RENTCUR (continuous predictor variable) and RRACE (categorical predictor variable with two levels, White and Black).
The coefficient estimates for each predictor variable are shown, as well as their standard errors and associated p-values, which can be used to assess the statistical significance of the relationship between the predictor and the outcome variables.
The intercept coefficient represents the estimated log-odds of the outcome variable when all predictor variables are equal to zero. In this case, the intercept is 0.446, which means that when RENTCUR and RRACE are equal to zero, the log-odds of WORRY is 0.446.
The coefficient estimate for RENTCUR is 0.009, indicating that for each one-unit increase in RENTCUR, the log-odds of WORRY increases by 0.009. This suggests that higher levels of current rent are associated with higher levels of worry.
The coefficient estimate for RRACE is 0.026, indicating that being Black is associated with higher levels of worry compared to being White. The p-value associated with this coefficient estimate is < 0.001, indicating that this relationship is statistically significant.
The deviance residuals represent the difference between the predicted probabilities of the outcome variable and the actual values. The residual deviance (572682) is smaller than the null deviance (572922), suggesting that the model provides a better fit to the data than the null model. The AIC (Akaike information criterion) provides a measure of model fit, with lower values indicating better fit. The AIC for this model is 572688, which suggests a good fit.
Overall, this model suggests that both current rent and race are significant predictors of worry, with higher current rent and being Black associated with higher levels of worry.**
# Load required libraries
library(dplyr)
library(ggplot2)
library(sjPlot)
# Subset the data to include only necessary variables
data_allll <- data_all %>%
select(WEEK.x, WORRY, MORTCUR, RRACE) %>%
filter(MORTCUR != -88 & WORRY != -88 & RRACE!= -88 & MORTCUR != -99 & WORRY != -99 & RRACE!= -99)
# Recode WORRY variable into a binary variable where 1 means worried, anxious, or down
data_allll$WORRY <- ifelse(data_allll$WORRY %in% c(2, 3, 4), 1, 0)
#data_alll$ANXIOUS <- ifelse(data_alll$ANXIOUS %in% c(2, 3, 4), 1, 0)
# Create a new binary variable RENT_STRESS where 1 means having difficulty paying rent and 0 means no difficulty
# Conduct logistic regression analysis
model <- glm(WORRY ~ MORTCUR + RRACE, data = data_allll, family = "binomial")
# Print the summary of the model
summary(model)
##
## Call:
## glm(formula = WORRY ~ MORTCUR + RRACE, family = "binomial", data = data_allll)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.666 -1.119 -1.119 1.237 1.237
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.279959 0.010589 -120.9 <2e-16 ***
## MORTCUR 1.093767 0.009530 114.8 <2e-16 ***
## RRACE 0.048259 0.002839 17.0 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1315314 on 949438 degrees of freedom
## Residual deviance: 1299821 on 949436 degrees of freedom
## AIC: 1299827
##
## Number of Fisher Scoring iterations: 4
# Create a forest plot to visualize the odds ratio and confidence intervals
plot_model(model, type = "est", vline.color = "gray", ci.lvl = 0.95, axis.labels = c("Odds Ratio"), title = "Logistic Regression Model: WORRY ~ MORT_STRESS + RRACE")
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
allweeks <- data_all
allweeks$hisp<-ifelse(allweeks$RHISPANIC==1, "Not_hispanic", "hispanic")
allweeks$race <- car::Recode(allweeks$RRACE, recodes = "1= 'White'; 2= 'Black'; 3 = 'Asian'; 4='Other'; else=NA")
allweeks$race_eth<-ifelse(allweeks$hisp==1, "hisp",allweeks$race)
allweeks$race_eth <- interaction(allweeks$hisp, allweeks$race)
allweeks$race_ethnicity<- mutate(allweeks, ifelse(hisp==0 & race==1, 1,
ifelse(hisp==0 & race==2, 2,
ifelse(hisp==1, 3,
ifelse(hisp==0 & race==3, 4,
ifelse(hisp==0 & race==4, 5, "NA"))))))
allweeks$race_ethnicity<- ifelse(substr(allweeks$race_eth, 1, 4)=="hisp", "hisp", as.character(allweeks$race_eth))
allweeks$gad2 <- (allweeks$WORRY-1) + (allweeks$ANXIOUS-1)+ (allweeks$DOWN-1) + (allweeks$INTEREST-1)
allweeks$gad_b <- ifelse(allweeks$gad2 >= c(3,4) , 1, 0)
## Warning in allweeks$gad2 >= c(3, 4): longer object length is not a multiple of
## shorter object length
allweeks$mortconf<-car::Recode(allweeks$MORTCONF, recodes ="c(3,4)='Confident';c(1,2,5)='Not-Confident'")
allweeks <- allweeks %>% filter(MORTCONF!=-88 & MORTCONF!=-99)
allweeks$mortgage<-car::Recode(allweeks$MORTCUR, recodes ="1='Yes'; 2='No'")
allweeks <- allweeks %>% filter(MORTCUR!=-88 & MORTCUR!=-99)
library(tidyverse)
allweeks %>%
mutate(mortgage = ifelse(mortgage == "Yes", "Caught up on Mortgage", "Not Caught up on Mortgage")) %>%
group_by(mortgage,race_ethnicity)%>%
summarise(meangad = questionr::wtd.mean(gad_b, weights = PWEIGHT))%>%
ggplot(aes(x = race_ethnicity, y = meangad, fill=race_ethnicity, label = scales::percent(meangad)), stat=
"identity", position="dodge")+ facet_wrap(~mortgage)+ ylab("Mental Stress Due to House Insecurity") +xlab("This household Currently Caught up on Mortgage") + scale_fill_manual( name ="Race_ethnicity",labels=c("Hispanic","Non_Hisp White", "Non_Hisp Black", "Non_Hisp Asian", "Others"),values = c("#004f9e", "#e36c2c", "#CC0000", "#006600", "#669999" )) + labs(caption = "Source: U.S Census Household Pulse survey(Week 13-45: August 13th, 2020- May ,2022)") + geom_col(position ='dodge') + geom_text( vjust = -0.5, size =5) + scale_y_continuous(labels = scales::percent)
## `summarise()` has grouped output by 'mortgage'. You can override using the
## `.groups` argument.
ggsave("MORT_TO_STRESS.png", dpi = 300, width = 12, height = 8)
allweeks3 <- data_all
allweeks3$rentt<-car::Recode(allweeks3$RENTCUR, recodes ="1='Yes'; 2='No'")
allweeks3 <- allweeks3 %>% filter(RENTCUR!=-88 & RENTCUR!=-99)
allweeks3$gad2 <- (allweeks3$WORRY-1) + (allweeks3$ANXIOUS-1)+ (allweeks3$DOWN-1) + (allweeks3$INTEREST-1)
allweeks3$gad_b <- ifelse(allweeks3$gad2 >= c(3,4) , 1, 0)
library(tidyverse)
allweeks3 %>%
mutate(rentt = ifelse(rentt == "Yes", "Caught up on Rent", "Not Caught up on Rent")) %>%
group_by(rentt,race_ethnicity)%>%
summarise(meangad = questionr::wtd.mean(gad_b, weights = PWEIGHT))%>%
ggplot(aes(x = race_ethnicity, y = meangad, fill=race_ethnicity, label = scales::percent(meangad)), stat=
"identity", position="dodge")+ facet_wrap(~rentt)+ ylab("Mental Stress from Rent Payment") +xlab("This household Currently Caught up on Rent") + scale_fill_manual( name ="Race_ethnicity",labels=c("Hispanic","Non_Hisp White", "Non_Hisp Black", "Non_Hisp Asian", "Others"),values = c("#004f9e", "#e36c2c", "#CC0000", "#006600", "#669999" )) + labs(caption = "Source: U.S Census Household Pulse survey(Week 28-44: August 19th, 2020- December 22,2020)") + geom_col(position ='dodge') + geom_text( vjust = -0.5, size =5) + scale_y_continuous(labels = scales::percent)
## `summarise()` has grouped output by 'rentt'. You can override using the
## `.groups` argument.
ggsave("RENT_TO_STRESS.png", dpi = 300, width = 12, height = 8)