##Phase 1: April 23, 2020 - July 21, 2020

##Phase 2: August 19, 2020 - October 26, 2020

##Phase 3: October 28, 2020 - March 29, 2021

##Phase 3.1: April 14, 2021 - July 5, 2021

##Phase 3.2: July 21, 2021 – October 11, 2021

##Phase 3.3: December 1, 2021 – February 7, 2022

##Phase 3.4: March 2, 2022 – May 9, 2022

##Phase 3.5: June 1, 2022 – August 8, 2022

##Phase 3.6: September 14, 2022 – November 14, 2022

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.3.0      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(testit)
library(tigris)

## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.

library(stringr)
library(httr)
library(here)

## here() starts at C:/Users/Mahmuda Sultana/Desktop/COMPLETE/Fall'21/Dem5213/pop data

library(ggplot2)
library(dplyr)

In this part, we are going to perform 3 tasks. First, we we will read the CSV file. Then we will read the weighted file. Finally, we will merge these two file

url <- "https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk38/HPS_Week38_PUF_CSV.zip"
download.file(url, "data38.zip")

UNZIP the files

unzip("data1.zip")

## Warning in unzip("data1.zip"): error 1 in extracting from zip file

unzip("data2.zip")

## Warning in unzip("data2.zip"): error 1 in extracting from zip file

unzip("data3.zip")

## Warning in unzip("data3.zip"): error 1 in extracting from zip file

unzip("data4.zip")

## Warning in unzip("data4.zip"): error 1 in extracting from zip file

unzip("data5.zip")

## Warning in unzip("data5.zip"): error 1 in extracting from zip file

unzip("data6.zip")

## Warning in unzip("data6.zip"): error 1 in extracting from zip file

unzip("data7.zip")

## Warning in unzip("data7.zip"): error 1 in extracting from zip file

unzip("data8.zip")

## Warning in unzip("data8.zip"): error 1 in extracting from zip file

unzip("data9.zip")

## Warning in unzip("data9.zip"): error 1 in extracting from zip file

unzip("data10.zip")

## Warning in unzip("data10.zip"): error 1 in extracting from zip file

unzip("data11.zip")

## Warning in unzip("data11.zip"): error 1 in extracting from zip file

unzip("data12.zip")

## Warning in unzip("data12.zip"): error 1 in extracting from zip file

unzip("data13.zip")

## Warning in unzip("data13.zip"): error 1 in extracting from zip file

unzip("data14.zip")

## Warning in unzip("data14.zip"): error 1 in extracting from zip file

unzip("data15.zip")

## Warning in unzip("data15.zip"): error 1 in extracting from zip file

unzip("data16.zip")

## Warning in unzip("data16.zip"): error 1 in extracting from zip file

unzip("data17.zip")

## Warning in unzip("data17.zip"): error 1 in extracting from zip file

unzip("data18.zip")

## Warning in unzip("data18.zip"): error 1 in extracting from zip file

unzip("data19.zip")

## Warning in unzip("data19.zip"): error 1 in extracting from zip file

unzip("data20.zip")

## Warning in unzip("data20.zip"): error 1 in extracting from zip file

unzip("data21.zip")

## Warning in unzip("data21.zip"): error 1 in extracting from zip file

unzip("data22.zip")

## Warning in unzip("data22.zip"): error 1 in extracting from zip file

unzip("data23.zip")

## Warning in unzip("data23.zip"): error 1 in extracting from zip file

unzip("data24.zip")

## Warning in unzip("data24.zip"): error 1 in extracting from zip file

unzip("data25.zip")

## Warning in unzip("data25.zip"): error 1 in extracting from zip file

unzip("data26.zip")

## Warning in unzip("data26.zip"): error 1 in extracting from zip file

unzip("data27.zip")

## Warning in unzip("data27.zip"): error 1 in extracting from zip file

unzip("data28.zip")

## Warning in unzip("data28.zip"): error 1 in extracting from zip file

unzip("data29.zip")

## Warning in unzip("data29.zip"): error 1 in extracting from zip file

unzip("data30.zip")

## Warning in unzip("data30.zip"): error 1 in extracting from zip file

unzip("data31.zip")

## Warning in unzip("data31.zip"): error 1 in extracting from zip file

unzip("data32.zip")

## Warning in unzip("data32.zip"): error 1 in extracting from zip file

unzip("data33.zip")

## Warning in unzip("data33.zip"): error 1 in extracting from zip file

unzip("data34.zip")
unzip("data35.zip")

## Warning in unzip("data35.zip"): error 1 in extracting from zip file

unzip("data36.zip")

## Warning in unzip("data36.zip"): error 1 in extracting from zip file

unzip("data37.zip")

## Warning in unzip("data37.zip"): error 1 in extracting from zip file

unzip("data38.zip")
unzip("data39.zip")

## Warning in unzip("data39.zip"): error 1 in extracting from zip file

unzip("data40.zip")

## Warning in unzip("data40.zip"): error 1 in extracting from zip file

unzip("data38.zip")

unzip("data41.zip")

## Warning in unzip("data41.zip"): error 1 in extracting from zip file

unzip("data42.zip")

## Warning in unzip("data42.zip"): error 1 in extracting from zip file

unzip("data43.zip")

## Warning in unzip("data43.zip"): error 1 in extracting from zip file

unzip("data44.zip")

## Warning in unzip("data44.zip"): error 1 in extracting from zip file

unzip("data45.zip")

## Warning in unzip("data45.zip"): error 1 in extracting from zip file

unzip("data46.zip")

## Warning in unzip("data46.zip"): error 1 in extracting from zip file

unzip("data47.zip")

## Warning in unzip("data47.zip"): error 1 in extracting from zip file

unzip("data48.zip")

## Warning in unzip("data48.zip"): error 1 in extracting from zip file

unzip("data49.zip")

## Warning in unzip("data49.zip"): error 1 in extracting from zip file

unzip("data50.zip")

## Warning in unzip("data50.zip"): error 1 in extracting from zip file

unzip("data51.zip")

## Warning in unzip("data51.zip"): error 1 in extracting from zip file

unzip("data52.zip")

## Warning in unzip("data52.zip"): error 1 in extracting from zip file

unzip("data53.zip")

## Warning in unzip("data53.zip"): error 1 in extracting from zip file

We worked only with Week13 to Week 45

data13 <-read.csv("pulse2020_puf_13.csv")
data14 <-read.csv("pulse2020_puf_14.csv")
data15 <-read.csv("pulse2020_puf_15.csv")
data16 <-read.csv("pulse2020_puf_16.csv")

data17 <-read.csv("pulse2020_puf_17.csv")
data18 <-read.csv("pulse2020_puf_18.csv")
data19 <-read.csv("pulse2020_puf_19.csv")



data20 <-read.csv("pulse2020_puf_20.csv")
data21 <-read.csv("pulse2020_puf_21.csv")



data22 <-read.csv("pulse2021_puf_22.csv")
data23 <-read.csv("pulse2021_puf_23.csv")
data24 <-read.csv("pulse2021_puf_24.csv")
data25 <-read.csv("pulse2021_puf_25.csv")
data26 <-read.csv("pulse2021_puf_26.csv")
data27 <-read.csv("pulse2021_puf_27.csv")



data28 <-read.csv("pulse2021_puf_28.csv")
data29 <-read.csv("pulse2021_puf_29.csv")
data30 <-read.csv("pulse2021_puf_30.csv")
data31 <-read.csv("pulse2021_puf_31.csv")
data32 <-read.csv("pulse2021_puf_32.csv")



data33 <-read.csv("pulse2021_puf_33.csv")



data34 <-read.csv("pulse2021_puf_34.csv")
data35 <-read.csv("pulse2021_puf_35.csv")
data36 <-read.csv("pulse2021_puf_36.csv")
data37 <-read.csv("pulse2021_puf_37.csv")
data38 <-read.csv("pulse2021_puf_38.csv")
data39 <-read.csv("pulse2021_puf_39.csv")




data40 <-read.csv("pulse2021_puf_40.csv")
data41 <-read.csv("pulse2022_puf_41.csv")
data42 <-read.csv("pulse2022_puf_42.csv")
data43 <-read.csv("pulse2022_puf_43.csv")
data44 <-read.csv("pulse2022_puf_44.csv")



data45 <-read.csv("pulse2022_puf_45.csv")

READ WEIGHTED FILE

w_data13 <- read.csv("pulse2020_repwgt_puf_13.csv")
w_data14 <- read.csv("pulse2020_repwgt_puf_14.csv")
w_data15 <- read.csv("pulse2020_repwgt_puf_15.csv")
w_data16 <- read.csv("pulse2020_repwgt_puf_16.csv")
w_data17 <- read.csv("pulse2020_repwgt_puf_17.csv")
w_data18 <- read.csv("pulse2020_repwgt_puf_18.csv")
w_data19 <- read.csv("pulse2020_repwgt_puf_19.csv")


w_data20 <- read.csv("pulse2020_repwgt_puf_20.csv")
w_data21 <- read.csv("pulse2020_repwgt_puf_21.csv") 

w_data22 <- read.csv("pulse2021_repwgt_puf_22.csv")
w_data23 <- read.csv("pulse2021_repwgt_puf_23.csv")
w_data24 <- read.csv("pulse2021_repwgt_puf_24.csv")
w_data25 <- read.csv("pulse2021_repwgt_puf_25.csv")
w_data26 <- read.csv("pulse2021_repwgt_puf_26.csv")
w_data27 <- read.csv("pulse2021_repwgt_puf_27.csv")


w_data28 <- read.csv("pulse2021_repwgt_puf_28.csv")
w_data29 <- read.csv("pulse2021_repwgt_puf_29.csv")
w_data30 <- read.csv("pulse2021_repwgt_puf_30.csv")


w_data31 <- read.csv("pulse2021_repwgt_puf_31.csv")
w_data32 <- read.csv("pulse2021_repwgt_puf_32.csv")


w_data33 <- read.csv("pulse2021_repwgt_puf_33.csv")


w_data34 <- read.csv("pulse2021_repwgt_puf_34.csv")
w_data35 <- read.csv("pulse2021_repwgt_puf_35.csv")
w_data36 <- read.csv("pulse2021_repwgt_puf_36.csv")
w_data37 <- read.csv("pulse2021_repwgt_puf_37.csv")
w_data38 <- read.csv("pulse2021_repwgt_puf_38.csv")


w_data39 <- read.csv("pulse2021_repwgt_puf_39.csv")


w_data40 <- read.csv("pulse2021_repwgt_puf_40.csv")
w_data41 <- read.csv("pulse2022_repwgt_puf_41.csv")
w_data42 <- read.csv("pulse2022_repwgt_puf_42.csv")


w_data43 <- read.csv("pulse2022_repwgt_puf_43.csv")
w_data44 <- read.csv("pulse2022_repwgt_puf_44.csv")


w_data45 <- read.csv("pulse2022_repwgt_puf_45.csv")

Merge Data and Weights

–> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –>

data13<-merge(data13, w_data13, by = "SCRAM")
data14<-merge(data14, w_data14, by = "SCRAM")

data15<-merge(data15, w_data15, by = "SCRAM")
data16<-merge(data16, w_data16, by = "SCRAM")

data17<-merge(data17, w_data17, by = "SCRAM")
data18<-merge(data18, w_data18, by = "SCRAM")

data19<-merge(data19, w_data19, by = "SCRAM")

data20<-merge(data20, w_data20, by = "SCRAM")
data21<-merge(data21, w_data21, by = "SCRAM")





data22<-merge(data22, w_data22, by = "SCRAM")
data23<-merge(data23, w_data23, by = "SCRAM")
data24<-merge(data24, w_data24, by = "SCRAM")
data25<-merge(data25, w_data25, by = "SCRAM")
data26<-merge(data26, w_data26, by = "SCRAM")
data27<-merge(data27, w_data27, by = "SCRAM")





data28<-merge(data28, w_data28, by = "SCRAM")
data29<-merge(data29, w_data29, by = "SCRAM")
data30<-merge(data30, w_data30, by = "SCRAM")
data31<-merge(data31, w_data31, by = "SCRAM")
data32<-merge(data32, w_data32, by = "SCRAM")





data33<-merge(data33, w_data33, by = "SCRAM")



data34<-merge(data34, w_data34, by = "SCRAM")

data35<-merge(data35, w_data35, by = "SCRAM")
data36<-merge(data36, w_data36, by = "SCRAM")
data37<-merge(data37, w_data37, by = "SCRAM")
data38<-merge(data38, w_data38, by = "SCRAM")
data39<-merge(data39, w_data39, by = "SCRAM")




data40<-merge(data40, w_data40, by = "SCRAM")
data41<-merge(data41, w_data41, by = "SCRAM")
data42<-merge(data42, w_data42, by = "SCRAM")





data43<-merge(data43, w_data43, by = "SCRAM")
data44<-merge(data44, w_data44, by = "SCRAM")




data45<-merge(data45, w_data45, by = "SCRAM")

Remove unused files

rm(w_data13, w_data14, w_data15, w_data16, w_data17, w_data18, w_data19)

rm(w_data20, w_data21, w_data22, w_data23, w_data24, w_data25, w_data26, w_data27, w_data28, w_data29, w_data30, w_data31, w_data32, w_data33, w_data34, w_data35, w_data36, w_data37, w_data38, w_data39, w_data40, w_data41, w_data42, w_data43, w_data44, w_data45)

gc()

##             used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   3697037  197.5   11109673   593.4   21698578  1158.9
## Vcells 682445710 5206.7 1594937980 12168.5 1329000216 10139.5

WEEK 13 TO WEEK 45. Combine weeks with the same variables data6, data33, data45, data48, data51, data52 are single.

data13_21 <- rbind(data13, data14, data15, data16, data17, data18, data19, data20, data21)

data22_27<-rbind(data22, data23, data24, data25, data26, data27)
data28_33<-rbind(data28, data29, data30, data31, data32, data33)

data34_39 <-rbind(data34, data35, data36, data37, data38, data39)

data40_42 <-rbind(data40, data41, data42)
data43_45 <- rbind(data43, data44)

rm(data13, data14, data15, data16, data17, data18, data19, data20, data21)


rm(data22, data23, data24, data25, data26, data27, data28, data29, data30, data31, data32)


rm(data33, data34, data35, data36, data37, data39, data40, data41, data42,data43, data44, data45)

gc()

##             used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   3626139  193.7   11109673   593.4   21698578  1158.9
## Vcells 713010626 5439.9 1594937980 12168.5 1594874459 12168.0

data13_21 <- data13_21%>%
  select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK )

data22_27 <- data22_27 %>%
  select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK )
  

data28_33 <- data28_33 %>% 
  select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK)

data34_39 <- data34_39 %>% 
  select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK)

data40_42 <- data40_42%>%
  select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK )

data43_45 <- data43_45%>%
  select(WEEK.x, RENTCUR, MORTCUR, MORTCONF, EVICT, RHISPANIC, RRACE, ANXIOUS, WORRY, INTEREST, DOWN, TBIRTH_YEAR, PWEIGHT, EEDUC, ANYWORK)

combine weeks together

data_all<-rbind(data13_21, data22_27, data28_33,data34_39, data40_42, data43_45)

#clean up
rm(data13_21, data22_27, data28_33,data34_39, data40_42, data43_45)
gc()

##            used  (Mb) gc trigger   (Mb)   max used    (Mb)
## Ncells  1293680  69.1    8887739  474.7   21698578  1158.9
## Vcells 54807998 418.2 1275950384 9734.8 1594874459 12168.0

saveRDS(data_all, file ="FINAL_allweeks.rds")
write.csv(data_all, 'final_pulse.csv')

data_all <- readRDS("C:/Users/Mahmuda Sultana/Desktop/COMPLETE/Fall'21/Dem5213/pop data/FINAL_allweeks.rds")
#data13_45 <- readRDS("C:/Users/Mahmuda Sultana/Desktop/COMPLETE/Fall'21/Dem5213/pop data/13_45.rds")

WEEK13: 19th August 2020- 31st August 2020

WEEK45: 27th April 2022-9th May 2022

PHASE 2 to PHASE 3.4

RHISPANIC

1) No, not of Hispanic, Latino, or Spanish origin

2) Yes, of Hispanic, Latino, or Spanish origin

RRACE

1) White, Alone

2) Black, Alone

3) Asian, Alone

4) Any other race alone, or race in combination

data_all$hisp<-ifelse(data_all$RHISPANIC==1, "Not_hispanic", "hispanic")
data_all$race <- car::Recode(data_all$RRACE, recodes = "1= 'White'; 2= 'Black'; 3 = 'Asian'; 4='Other'; else=NA")
data_all$race_eth<-ifelse(data_all$hisp==1, "hisp",data_all$race)
data_all$race_eth <- interaction(data_all$hisp, data_all$race)

data_all$race_ethnicity<- mutate(data_all, ifelse(hisp==0 & race==1, 1,
                    ifelse(hisp==0 & race==2, 2,
                     ifelse(hisp==1, 3,
                    ifelse(hisp==0 & race==3, 4,
                     ifelse(hisp==0 & race==4, 5, "NA"))))))

data_all$race_ethnicity<- ifelse(substr(data_all$race_eth, 1, 4)=="hisp", "hisp", as.character(data_all$race_eth))

MORTGAGE

What percentage of households are currently caught up on Mortgage

MORTAGE TREND

#Input:
#1) Yes
#2) No
#-99) Question seen but category not selected
#-88) Missing / Did not report


#Output: What percentage of households are currently caught up on mortgage.
library(tidyverse)

data_all %>%
select(WEEK.x, MORTCUR) %>%
mutate( caught_up = case_when(MORTCUR< 0 ~ NA_real_,
MORTCUR %in% 2 ~ 0,
MORTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_Mort_caught_up = mean(caught_up, na.rm=TRUE)) %>%
ggplot(aes(x=WEEK.x, y=mean_Mort_caught_up*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
ylim(92, 96) +
labs(title = "Updated: %Household Currently Caught Up on Mortage From 19th August 2020 to 9th May 2022",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_GENERAL_MORTGAGE.png",height=8, width=10, dpi = "print" )

Trend of Mortgage Before and After Peak Pandemic

# Input:
# 1) Yes
# 2) No
# -99) Question seen but category not selected
# -88) Missing / Did not report

# Output: What percentage of households are currently caught up on mortgage.

library(ggplot2)

# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of households that are currently caught up on mortgage for each wave before the peak of the pandemic
mort_caught_up_before <- data_peak_before %>% 
  select(WEEK.x, MORTCUR) %>% 
  mutate(caught_up = case_when(MORTCUR < 0 ~ NA_real_,
                               MORTCUR %in% 2 ~ 0,
                               MORTCUR %in% 1 ~ 1)) %>% 
  group_by(WEEK.x) %>% 
  summarize(mean_Mort_caught_up = mean(caught_up, na.rm = TRUE)) %>% 
  mutate(Pandemic = "Before")

# Calculate the percentage of households that are currently caught up on mortgage for each wave after the peak of the pandemic
mort_caught_up_after <- data_peak_after %>% 
  select(WEEK.x, MORTCUR) %>% 
  mutate(caught_up = case_when(MORTCUR < 0 ~ NA_real_,
                               MORTCUR %in% 2 ~ 0,
                               MORTCUR %in% 1 ~ 1)) %>% 
  group_by(WEEK.x) %>% 
  summarize(mean_Mort_caught_up = mean(caught_up, na.rm = TRUE)) %>% 
  mutate(Pandemic = "After")

# Combine the two datasets
mort_caught_up_combined <- rbind(mort_caught_up_before, mort_caught_up_after)

# Plot the trend of the percentage of households that are currently caught up on mortgage before and after the peak of the pandemic
ggplot(mort_caught_up_combined, aes(x = WEEK.x, y = mean_Mort_caught_up * 100, color = Pandemic)) +
  geom_line() +
  theme_minimal() +
  xlab("HPS wave") +
  ylab("% of Households Caught Up on Mortgage") +
  ylim(92, 96) +
  labs(title = "Trend of Households Caught Up on Mortgage Before and After the Peak of the Pandemic",
  subtitle = "Weighted",
  caption = "Source: Census Household Pulse survey")

  ggsave(filename = "TREND_MORTGAGE_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )

MORTGAGE FOR HISPANIC ONLY

# Input:
# 1) Yes
# 2) No
# -99) Question seen but category not selected
# -88) Missing / Did not report

# Output: What percentage of Hispanic households are currently caught up on mortgage.

library(ggplot2)

# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of Hispanic households that are currently caught up on mortgage for each wave before the peak of the pandemic
Hisp_mort_caught_up_before <- data_peak_before %>% 
  select(WEEK.x, MORTCUR, RHISPANIC) %>% 
  filter(RHISPANIC == 2) %>% # Select only Hispanic households
  mutate(hisp_caught_up = case_when(MORTCUR < 0 ~ NA_real_,
                               MORTCUR %in% 2 ~ 0,
                               MORTCUR %in% 1 ~ 1)) %>% 
  group_by(WEEK.x) %>% 
  summarize(hisp_mean_Mort_caught_up = mean(hisp_caught_up, na.rm = TRUE)) %>% 
  mutate(Pandemic = "Before")

# Calculate the percentage of Hispanic households that are currently caught up on mortgage for each wave after the peak of the pandemic
Hisp_mort_caught_up_after <- data_peak_after %>% 
  select(WEEK.x, MORTCUR, RHISPANIC) %>% 
  filter(RHISPANIC == 2) %>% # Select only Hispanic households
  mutate(hisp_caught_up = case_when(MORTCUR < 0 ~ NA_real_,
                               MORTCUR %in% 2 ~ 0,
                               MORTCUR %in% 1 ~ 1)) %>% 
  group_by(WEEK.x) %>% 
  summarize(hisp_mean_Mort_caught_up = mean(hisp_caught_up, na.rm = TRUE)) %>% 
  mutate(Pandemic = "After")

# Combine the two datasets
Hisp_caught_up_combined <- rbind(Hisp_mort_caught_up_before, Hisp_mort_caught_up_after)

# Plot the trend of the percentage of Hispanic households that are currently caught up on mortgage before and after the peak of the pandemic
ggplot(Hisp_caught_up_combined, aes(x = WEEK.x, y = hisp_mean_Mort_caught_up * 100, color = Pandemic)) +
  geom_line() +
  theme_minimal() +
  xlab("Month") +
  ylab("% of Hispanic Households Caught Up on Mortgage") +
  ylim(80, 100) +
  labs(title = "Trend of Hispanic Households Caught Up on Mortgage Before and After the Peak of the Pandemic",
  subtitle = "Weighted",
  caption = "Source: Census Household Pulse survey")

  ggsave(filename = "TREND_MORTGAGE_HISP_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )

HISPANIC AND NON_HISPANIC

#Input:
#1) Yes
#2) No
#-99) Question seen but category not selected
#-88) Missing / Did not report


#1=HISPANIC
#2=NON-Hispanic

#Output: What percentage of households are currently caught up on Mortgage


# Create a subset of data for before and after the peak of the pandemic
mort_data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
mort_data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
one_mort_caught_up_before <- data_peak_before %>%
  select(WEEK.x, RHISPANIC, MORTCUR) %>%
  mutate(mor_caught_up = case_when(MORTCUR < 0 ~ NA_real_,
                                   MORTCUR %in% 2 ~ 0,
                                   MORTCUR %in% 1 ~ 1)) %>%
  group_by(WEEK.x, RHISPANIC) %>%
  summarize(mean_mort_caught_up = mean(mor_caught_up, na.rm = TRUE)) %>%
  mutate(Pandemic = "Before")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Calculate the percentage of households that are currently caught up on Mortgage for each wave after the peak of the pandemic
one_mort_caught_up_after <- data_peak_after %>%
  select(WEEK.x, RHISPANIC, MORTCUR) %>%
  mutate(mor_caught_up = case_when(MORTCUR < 0 ~ NA_real_,
                                   MORTCUR %in% 2 ~ 0,
                                   MORTCUR %in% 1 ~ 1)) %>%
  group_by(WEEK.x, RHISPANIC) %>%
  summarize(mean_mort_caught_up = mean(mor_caught_up, na.rm = TRUE)) %>%
  mutate(Pandemic = "After")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Combine the two datasets
one_mort_caught_up_combined <- rbind(one_mort_caught_up_before, one_mort_caught_up_after)

# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic for all races
ggplot(one_mort_caught_up_combined, aes(x = WEEK.x, y = mean_mort_caught_up * 100, color = Pandemic)) +
  geom_line() +
  scale_color_manual(values = c("red", "blue"), labels = c("after", "before")) +
  facet_wrap(~RHISPANIC) +
  theme_minimal() +
  xlab("HPS wave") +
  ylab("% of Households Caught Up on Mortgage") +
  ylim(85, 100) +
  labs(title = "Trend of Households Caught Up on Mortgage Before and After the Peak of the Pandemic for Hispanic and Non-Hispanic",
  subtitle = "Weighted",
  caption = "Source: Census Household Pulse survey")

  ggsave(filename = "TREND_MORTGAGE_HISP_NON_HISP.png",height=8, width=10, dpi = "print" )

Caught up on Mortgage for All Races

library(dplyr)
library(ggplot2)

# Create a subset of data for each race/ethnicity
data_by_race <- data_all %>%
  filter(race_ethnicity != "All Races") %>%
  select(WEEK.x, race_ethnicity, MORTCUR)

# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity and week
mortgage_caught_up_by_week <- data_by_race %>%
  mutate(mortgage = ifelse(MORTCUR %in% 1, 1, ifelse(MORTCUR < 0, NA, 0))) %>%
  group_by(race_ethnicity, WEEK.x) %>%
  summarize(pct_mortgage_caught_up = weighted.mean(mortgage, na.rm = TRUE))

## `summarise()` has grouped output by 'race_ethnicity'. You can override using
## the `.groups` argument.

# Plot the trend of the percentage of households that are currently caught up on mortgage for different races/ethnicities over time
ggplot(mortgage_caught_up_by_week, aes(x = WEEK.x, y = pct_mortgage_caught_up * 100, color = race_ethnicity)) +
  geom_line(size = 1) +
  scale_color_manual(values = c("black", "red", "green", "blue", "yellow", "orange")) +
  theme_minimal() +
  xlab("Week") +
  ylab("% of Households Caught Up on Mortgage") +
  labs(title = "Trend of Households Caught Up on Mortgage by Race/Ethnicity",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

  ggsave("TREND_MORTGAGE_RACE_ETHNICITY_4_TOGETHER.png")

## Saving 7 x 5 in image

CAUGHT UP ON RENT

****What percentage of households are currently caught up on Rent**** TREND

#Input:
#1) Yes
#2) No
#-99) Question seen but category not selected
#-88) Missing / Did not report


#Output: What percentage of households are currently caught up on Rent


data_all %>%
select(WEEK.x, RENTCUR) %>%
mutate( ren_caught_up = case_when(RENTCUR< 0 ~ NA_real_,
RENTCUR %in% 2 ~ 0,
RENTCUR %in% 1 ~ 1)) %>%
group_by(WEEK.x) %>%
summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm=TRUE)) %>%
ggplot(aes(x=WEEK.x, y=mean_RENT_caught_up*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
ylim(85, 90) +
labs(title = "Updated: %Household currently caught up on Rent Payment From 19th August 2020 to 9th May 2022",
subtitle = "Weighted",
caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_GENERAL_RENT.png",height=8, width=10, dpi = "print" )

Rent before and after peak pandemic

# Input:
# 1) Yes
# 2) No
# -99) Question seen but category not selected
# -88) Missing / Did not report

library(ggplot2)

# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
rent_caught_up_before <- data_peak_before %>% 
  select(WEEK.x, RENTCUR) %>% 
  mutate(rent_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
                                   RENTCUR %in% 2 ~ 0,
                                   RENTCUR %in% 1 ~ 1)) %>% 
  group_by(WEEK.x) %>% 
  summarize(mean_RENT_caught_up = mean(rent_caught_up, na.rm = TRUE)) %>% 
  mutate(Pandemic = "Before")

# Calculate the percentage of households that are currently caught up on rent for each wave after the peak of the pandemic
rent_caught_up_after <- data_peak_after %>% 
  select(WEEK.x, RENTCUR) %>% 
  mutate(rent_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
                                   RENTCUR %in% 2 ~ 0,
                                   RENTCUR %in% 1 ~ 1)) %>% 
  group_by(WEEK.x) %>% 
  summarize(mean_RENT_caught_up = mean(rent_caught_up, na.rm = TRUE)) %>% 
  mutate(Pandemic = "After")

# Combine the two datasets
rent_caught_up_combined <- rbind(rent_caught_up_before, rent_caught_up_after)

# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic
ggplot(rent_caught_up_combined, aes(x = WEEK.x, y = mean_RENT_caught_up * 100, color = Pandemic)) +
  geom_line() +
  theme_minimal() +
  xlab("HPS wave") +
  ylab("% of Households Caught Up on Rent") +
  ylim(85, 90) +
  labs(title = "Trend of Households Caught Up on Rent During and After the Peak of the Pandemic",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_RENT_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )

RENT for HISPANIC vs NON-HISPANIC

#Input:
#1) Yes
#2) No
#-99) Question seen but category not selected
#-88) Missing / Did not report

#Output: What percentage of households are currently caught up on Rent


# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
o_rent_caught_up_before <- data_peak_before %>%
  select(WEEK.x, RHISPANIC, RENTCUR) %>%
  mutate(ren_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
                                   RENTCUR %in% 2 ~ 0,
                                   RENTCUR %in% 1 ~ 1)) %>%
  group_by(WEEK.x, RHISPANIC) %>%
  summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm = TRUE)) %>%
  mutate(Pandemic = "Before")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Calculate the percentage of households that are currently caught up on rent for each wave after the peak of the pandemic
o_rent_caught_up_after <- data_peak_after %>%
  select(WEEK.x, RHISPANIC, RENTCUR) %>%
  mutate(ren_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
                                   RENTCUR %in% 2 ~ 0,
                                   RENTCUR %in% 1 ~ 1)) %>%
  group_by(WEEK.x, RHISPANIC) %>%
  summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm = TRUE)) %>%
  mutate(Pandemic = "After")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Combine the two datasets
o_rent_caught_up_combined <- rbind(o_rent_caught_up_before, o_rent_caught_up_after)

# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic for all races
ggplot(o_rent_caught_up_combined, aes(x = WEEK.x, y = mean_RENT_caught_up * 100, color = Pandemic)) +
  geom_line() +
  scale_color_manual(values = c("red", "blue"), labels = c("after", "before")) +
  facet_wrap(~RHISPANIC) +
  theme_minimal() +
  xlab("HPS wave") +
  ylab("% of Households Caught Up on Rent") +
  ylim(75, 95) +
  labs(title = "Trend of Households Caught Up on Rent During and After the Peak of the Pandemic for Hispanic and Non-Hispanic",
  subtitle = "Weighted",
  caption = "Source: Census Household Pulse survey")

  ggsave(filename = "TREND_RENT_HISP_NON_HISP.png",height=8, width=10, dpi = "print" )

Caught up on Rent For all Races

# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
two_rent_caught_up_before <- data_peak_before %>%
  select(WEEK.x, RRACE, RENTCUR) %>%
  mutate(ren_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
                                   RENTCUR %in% 2 ~ 0,
                                   RENTCUR %in% 1 ~ 1)) %>%
  group_by(WEEK.x, RRACE) %>%
  summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm = TRUE)) %>%
  mutate(Pandemic = "Before")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Calculate the percentage of households that are currently caught up on rent for each wave after the peak of the pandemic
two_rent_caught_up_after <- data_peak_after %>%
  select(WEEK.x, RRACE, RENTCUR) %>%
  mutate(ren_caught_up = case_when(RENTCUR < 0 ~ NA_real_,
                                   RENTCUR %in% 2 ~ 0,
                                   RENTCUR %in% 1 ~ 1)) %>%
  group_by(WEEK.x, RRACE) %>%
  summarize(mean_RENT_caught_up = mean(ren_caught_up, na.rm = TRUE)) %>%
  mutate(Pandemic = "After")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Combine the two datasets
two_rent_caught_up_combined <- rbind(two_rent_caught_up_before, two_rent_caught_up_after)

# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic for different races
ggplot(two_rent_caught_up_combined, aes(x = WEEK.x, y = mean_RENT_caught_up * 100, color = Pandemic)) +
  geom_line() +
  scale_color_manual(values = c("red", "blue"), labels = c("After", "Before")) +
  facet_wrap(~ RRACE) +
  theme_minimal() +
  xlab("HPS wave") +
  ylab("% of Households Caught Up on Rent") +
  ylim(75, 95) +
  labs(title = "Trend of Households Caught Up on Rent Before and After the Peak of the Pandemic for Different Races",
  subtitle = "Weighted",
  caption = "Source: Census Household Pulse survey")

  ggsave(filename = "TREND_RENT_FOR_ALL_RACE.png",height=8, width=10, dpi = "print" )

library(dplyr)
library(ggplot2)

# Create a subset of data for each race/ethnicity
rent_data_by_race <- data_all %>%
  filter(race_ethnicity != "All Races") %>%
  select(WEEK.x, race_ethnicity, RENTCUR)

# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity and week
rent_caught_up_by_week <- rent_data_by_race %>%
  mutate(rent = ifelse(RENTCUR %in% 1, 1, ifelse(RENTCUR < 0, NA, 0))) %>%
  group_by(race_ethnicity, WEEK.x) %>%
  summarize(pct_rent_caught_up = weighted.mean(rent, na.rm = TRUE))

## `summarise()` has grouped output by 'race_ethnicity'. You can override using
## the `.groups` argument.

# Plot the trend of the percentage of households that are currently caught up on mortgage for different races/ethnicities over time
ggplot(rent_caught_up_by_week, aes(x = WEEK.x, y = pct_rent_caught_up * 100, color = race_ethnicity)) +
  geom_line(size = 1) +
  scale_color_manual(values = c("black", "red", "green", "blue", "yellow", "orange")) +
  theme_minimal() +
  xlab("Week") +
  ylab("% of Households Caught Up on Rent Payment") +
  labs(title = "Trend of Households Caught Up on Rent by Race/Ethnicity",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

  ggsave("TREND_RENT_RACE_ETHNICITY_4_TOGETHER.png")

## Saving 7 x 5 in image

CONFIDENCE

****What percent of household are Confident to pay next rent or mortgage****

#1) No confidence
#2) Slight confidence
#3) Moderate confidence
#4) High confidence 
#5) Payment is/will be deferred

#(1,2,5)~1 means not confident enough.

data_all%>%
  mutate(re_mort_conf = ifelse(MORTCONF %in% c(3,4),1, ifelse( c(1,2,5), 0)))%>%
  group_by(WEEK.x)%>%
  summarise(pct_re_mort_conf = mean(re_mort_conf, na.rm=T))%>%
  ggplot(aes(x=WEEK.x, y=pct_re_mort_conf*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
labs(title = "Update: % Confident to pay mortgage or rent From 19th August 2020 to 9th May 2022",
subtitle = "Unweighted",
caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_GENERAL_MORTCONF.png",height=8, width=10, dpi = "print" )

Confidence to pay Rent/Mortgage Before and After Peak Pandemic

# Input:
# 1) Yes
# 2) No
# -99) Question seen but category not selected
# -88) Missing / Did not report

library(ggplot2)

# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of households that are currently caught up on rent for each wave before the peak of the pandemic
conf_caught_up_before <- data_peak_before %>% 
  select(WEEK.x, MORTCONF) %>% 
  mutate(CONF_caught_up = case_when(MORTCONF < 0 ~ NA_real_,
                                   MORTCONF %in% c(1,2,5) ~ 0,
                                   MORTCONF %in% c(3,4) ~ 1)) %>% 
  group_by(WEEK.x) %>% 
  summarize(mean_CONF_caught_up = mean(CONF_caught_up, na.rm = TRUE)) %>% 
  mutate(Pandemic = "Before")

# Calculate the percentage of households that are currently caught up on rent for each wave after the peak of the pandemic
conf_caught_up_after <- data_peak_after %>% 
  select(WEEK.x, MORTCONF) %>% 
  mutate(CONF_caught_up = case_when(MORTCONF < 0 ~ NA_real_,
                                   MORTCONF %in% c(1,2,5) ~ 0,
                                   MORTCONF %in% c(3,4) ~ 1)) %>% 
  group_by(WEEK.x) %>% 
  summarize(mean_CONF_caught_up = mean(CONF_caught_up, na.rm = TRUE)) %>% 
  mutate(Pandemic = "After")

# Combine the two datasets
conf_caught_up_combined <- rbind(conf_caught_up_before, conf_caught_up_after)

# Plot the trend of the percentage of households that are currently caught up on rent before and after the peak of the pandemic
ggplot(conf_caught_up_combined, aes(x = WEEK.x, y = mean_CONF_caught_up * 100, color = Pandemic)) +
  geom_line() +
  theme_minimal() +
  xlab("HPS wave") +
  ylab("% of Households Confidence to pay rent or mortgage of next month") +
  ylim(85, 90) +
  labs(title = "Trend of Households Confident to pay Rent or Mortgage During and After the Peak of the Pandemic",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

## Warning: Removed 3 rows containing missing values (`geom_line()`).

ggsave(filename = "TREND_CONF_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )

## Warning: Removed 3 rows containing missing values (`geom_line()`).

library(dplyr)
library(ggplot2)

# Create a subset of data for each race/ethnicity
conf_data_by_race <- data_all %>%
  filter(race_ethnicity != "All Races") %>%
  select(WEEK.x, race_ethnicity, MORTCONF)

# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity and week
mortconf_caught_up_by_week <- conf_data_by_race %>%
  mutate(mortconf = ifelse(MORTCONF %in% c(3,4),1, ifelse( c(1,2,5), 0))) %>%
  group_by(race_ethnicity, WEEK.x) %>%
  summarize(pct_conf_caught_up = weighted.mean(mortconf, na.rm = TRUE))

## `summarise()` has grouped output by 'race_ethnicity'. You can override using
## the `.groups` argument.

# Plot the trend of the percentage of households that are currently caught up on mortgage for different races/ethnicities over time
ggplot(mortconf_caught_up_by_week, aes(x = WEEK.x, y = pct_conf_caught_up * 100, color = race_ethnicity)) +
  geom_line(size = 1) +
  scale_color_manual(values = c("black", "red", "green", "blue", "yellow", "orange")) +
  theme_minimal() +
  xlab("Week") +
  ylab("% of Households confident to pay next rent or mortgage") +
  labs(title = "Trend of Households confident to pay rent or mortgage by Race/Ethnicity",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

  ggsave("TREND_CONF_RACE_ETHNICITY_4_TOGETHER.png")

## Saving 7 x 5 in image

ANalysis of MENTAL HEALTH

What percent of people are Worry

#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day 


#(2,3,4)~1 means worry

data_all%>%
  mutate(re_worry = ifelse(WORRY %in% c(2,3,4),1, ifelse( c(1), 0)))%>%
  group_by(WEEK.x)%>%
  summarise(pct_re_worry = mean(re_worry, na.rm=T))%>%
  ggplot(aes(x=WEEK.x, y=pct_re_worry*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
labs(title = "Update: % people stays worry for a week From 19th August 2020 to 9th May 2022",
subtitle = "Unweighted",
caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_GENERAL_worry.png",height=8, width=10, dpi = "print" )

WORRY HISPANIC ONLY

# Input:
# 1) Not at all
# 2) Several days
# 3) More than half the days
# 4) Nearly every day 

# Output: What percentage of Hispanic households stay worried for a week

library(ggplot2)

# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of Hispanic households that stay worried for a week for each wave before the peak of the pandemic
Hisp_worry_before <- data_peak_before %>% 
  select(WEEK.x, WORRY, RHISPANIC) %>% 
  filter(RHISPANIC == 2) %>% # Select only Hispanic households
  mutate(hisp_worry = ifelse(WORRY %in% 2:4, 1, 0)) %>% 
  group_by(WEEK.x) %>% 
  summarize(hisp_mean_worry = mean(hisp_worry, na.rm = TRUE)) %>% 
  mutate(Pandemic = "Before")

# Calculate the percentage of Hispanic households that stay worried for a week for each wave after the peak of the pandemic
Hisp_worry_after <- data_peak_after %>% 
  select(WEEK.x, WORRY, RHISPANIC) %>% 
  filter(RHISPANIC == 2) %>% # Select only Hispanic households
  mutate(hisp_worry = ifelse(WORRY %in% 2:4, 1, 0)) %>% 
  group_by(WEEK.x) %>% 
  summarize(hisp_mean_worry = mean(hisp_worry, na.rm = TRUE)) %>% 
  mutate(Pandemic = "After")

# Combine the two datasets
Hisp_worry_combined <- rbind(Hisp_worry_before, Hisp_worry_after)

# Plot the trend of the percentage of Hispanic households that stay worried for a week before and after the peak of the pandemic
ggplot(Hisp_worry_combined, aes(x = WEEK.x, y = hisp_mean_worry * 100, color = Pandemic)) +
  geom_line() +
  theme_minimal() +
  xlab("Month") +
  ylab("% of Hispanic Households that Stay Worried for a Week") +
  ylim(0, 100) +
  labs(title = "Trend of Hispanic Households that Stay Worried for a Week Before and After the Peak of the Pandemic",
  subtitle = "Weighted",
  caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_WORRY_HISP_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )

WORRY ALL RACE

# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of people who stay worried for each wave before the peak of the pandemic by race
worry_by_race_before <- data_peak_before %>%
  select(WEEK.x, WORRY, RRACE) %>%
  mutate(re_worry = ifelse(WORRY %in% c(2,3,4),1,0)) %>%
  group_by(WEEK.x, RRACE) %>%
  summarise(pct_re_worry = mean(re_worry, na.rm=T)) %>%
  mutate(Pandemic = "Before")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Calculate the percentage of people who stay worried for each wave after the peak of the pandemic by race
worry_by_race_after <- data_peak_after %>%
  select(WEEK.x, WORRY, RRACE) %>%
  mutate(re_worry = ifelse(WORRY %in% c(2,3,4),1,0)) %>%
  group_by(WEEK.x, RRACE) %>%
  summarise(pct_re_worry = mean(re_worry, na.rm=T)) %>%
  mutate(Pandemic = "After")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Combine the two datasets
worry_by_race_combined <- rbind(worry_by_race_before, worry_by_race_after)

# Convert RRACE variable to factor
worry_by_race_combined$RRACE <- factor(worry_by_race_combined$RRACE)

# Plot the trend of the percentage of people who stay worried by race before and after the peak of the pandemic
ggplot(worry_by_race_combined, aes(x = WEEK.x, y = pct_re_worry * 100, color = Pandemic)) +
  geom_line() +
  facet_wrap(~ RRACE, ncol = 2) +
  theme_minimal() +
  xlab("Month") +
  ylab("% of People Staying Worried") +
  ylim(25, 60) +
  labs(title = "Trend of People Staying Worried by Race Before and After the Peak of the Pandemic",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_worry_ALL_race_before_and_after.png", height = 8, width = 10, dpi = "print")

ANXIOUS

#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day 


#(2,3,4)~1 means anxious

data_all%>%
  mutate(re_anxious = ifelse(ANXIOUS %in% c(2,3,4),1, ifelse( c(1), 0)))%>%
  group_by(WEEK.x)%>%
  summarise(pct_re_anxious = mean(re_anxious, na.rm=T))%>%
  ggplot(aes(x=WEEK.x, y=pct_re_anxious*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
labs(title = "Update: % people are anxious for a week From 19th August 2020 to 9th May 2022",
subtitle = "Unweighted",
caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_anxious.png",height=8, width=10, dpi = "print" )

ANXIOUS ALL RACE

#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day 


# Output: Trend of the percentage of people who felt anxious for each wave, by race.




#WHITE 1
#BLACK 2
#ASIAN 3
#OTHER 4


library(ggplot2)

# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Recode ANXIOUS variable into a binary variable where 1 means anxious
data_peak_before <- data_peak_before %>% 
  mutate(anxious = ifelse(ANXIOUS %in% c(2, 3, 4), 1, 0))

data_peak_after <- data_peak_after %>% 
  mutate(anxious = ifelse(ANXIOUS %in% c(2, 3, 4), 1, 0))

# Calculate the percentage of people who felt anxious for each wave, by race, before the peak of the pandemic
anxious_by_race_before <- data_peak_before %>% 
  select(WEEK.x, RRACE, anxious) %>% 
  filter(RRACE != -88) %>% # Exclude missing values
  group_by(WEEK.x, RRACE) %>% 
  summarize(mean_anxious = mean(anxious, na.rm = TRUE)) %>% 
  mutate(Pandemic = "During")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Calculate the percentage of people who felt anxious for each wave, by race, after the peak of the pandemic
anxious_by_race_after <- data_peak_after %>% 
  select(WEEK.x, RRACE, anxious) %>% 
  filter(RRACE != -88) %>% # Exclude missing values
  group_by(WEEK.x, RRACE) %>% 
  summarize(mean_anxious = mean(anxious, na.rm = TRUE)) %>% 
  mutate(Pandemic = "After")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Combine the two datasets
anxious_by_race_combined <- rbind(anxious_by_race_before, anxious_by_race_after)

# Plot the trend of the percentage of people who felt anxious for each wave, by race, before and after the peak of the pandemic
ggplot(anxious_by_race_combined, aes(x = WEEK.x, y = mean_anxious * 100, color = Pandemic, linetype = as.factor(RRACE))) +
  geom_line() +
  scale_linetype_manual(values = c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash")) +
  theme_minimal() +
  xlab("Month") +
  ylab("% of People Feeling Anxious") +
  ylim(25, 70) +
  labs(title = "Trend of the Percentage of People Feeling Anxious During and After the Peak of the Pandemic, by Race",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_ANXIOUS_ALL_RACE_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )

DOWN

#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day 

# Output: Trend of the percentage of people who felt down for each wave, by race.

library(ggplot2)

# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Recode DOWN variable into a binary variable where 1 means feeling down
data_peak_before <- data_peak_before %>% 
  mutate(down = ifelse(DOWN %in% c(2, 3, 4), 1, 0))

data_peak_after <- data_peak_after %>% 
  mutate(down = ifelse(DOWN %in% c(2, 3, 4), 1, 0))

# Calculate the percentage of people who felt down for each wave, by race, before the peak of the pandemic
down_by_race_before <- data_peak_before %>% 
  select(WEEK.x, RRACE, down) %>% 
  filter(RRACE != -88) %>% # Exclude missing values
  group_by(WEEK.x, RRACE) %>% 
  summarize(mean_down = mean(down, na.rm = TRUE)) %>% 
  mutate(Pandemic = "Before")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Calculate the percentage of people who felt down for each wave, by race, after the peak of the pandemic
down_by_race_after <- data_peak_after %>% 
  select(WEEK.x, RRACE, down) %>% 
  filter(RRACE != -88) %>% # Exclude missing values
  group_by(WEEK.x, RRACE) %>% 
  summarize(mean_down = mean(down, na.rm = TRUE)) %>% 
  mutate(Pandemic = "After")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Combine the two datasets
down_by_race_combined <- rbind(down_by_race_before, down_by_race_after)

# Plot the trend of the percentage of people who felt down for each wave, by race, before and after the peak of the pandemic
ggplot(down_by_race_combined, aes(x = WEEK.x, y = mean_down * 100, color = Pandemic, linetype = as.factor(RRACE))) +
  geom_line() +
  scale_linetype_manual(values = c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash")) +
  theme_minimal() +
  xlab("Month") +
  ylab("% of People Feeling Down") +
  ylim(25, 55) +
  labs(title = "Trend of the Percentage of People Feeling Down Before and After the Peak of the Pandemic, by Race",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_DOWN_ALL_RACE_BEFORE_AND_AFTER.png",height=8, width=10, dpi = "print" )

MENTAL HEALTH

#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day 

# Output: Trend of the percentage of people who experienced mental health concerns for each wave, by race.

library(ggplot2)

# Create a subset of data for before and after the peak of the pandemic
#data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
#data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Recode mental health variables into a binary variable where 1 means experiencing mental health concerns
data_peak_before <- data_peak_before %>% 
  mutate(mental_health = ifelse(WORRY %in% c(2, 3, 4) | ANXIOUS %in% c(2, 3, 4) | DOWN %in% c(2, 3, 4), 1, 0))

data_peak_after <- data_peak_after %>% 
  mutate(mental_health = ifelse(WORRY %in% c(2, 3, 4) | ANXIOUS %in% c(2, 3, 4) | DOWN %in% c(2, 3, 4), 1, 0))

# Calculate the percentage of people who experienced mental health concerns for each wave, by race, before the peak of the pandemic
mental_health_by_race_before <- data_peak_before %>% 
  select(WEEK.x, RRACE, mental_health) %>% 
  filter(RRACE != -88) %>% # Exclude missing values
  group_by(WEEK.x, RRACE) %>% 
  summarize(mean_mental_health = mean(mental_health, na.rm = TRUE)) %>% 
  mutate(Pandemic = "During")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Calculate the percentage of people who experienced mental health concerns for each wave, by race, after the peak of the pandemic
mental_health_by_race_after <- data_peak_after %>% 
  select(WEEK.x, RRACE, mental_health) %>% 
  filter(RRACE != -88) %>% # Exclude missing values
  group_by(WEEK.x, RRACE) %>% 
  summarize(mean_mental_health = mean(mental_health, na.rm = TRUE)) %>% 
  mutate(Pandemic = "After")

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Combine the two datasets
mental_health_by_race_combined <- rbind(mental_health_by_race_before, mental_health_by_race_after)

# Plot the trend of the percentage of people who experienced mental health concerns for each wave, by race, before and after the peak of the pandemic
ggplot(mental_health_by_race_combined, aes(x = WEEK.x, y = mean_mental_health * 100, color = Pandemic, linetype = as.factor(RRACE))) +
  geom_line() +
  scale_linetype_manual(values = c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash")) +
  theme_minimal() +
  xlab("Month") +
  ylab("% of People Experiencing Mental Health Concerns") +
  ylim(30, 70) +
  labs(title = "Trend of the Percentage of People Experiencing Mental Health Concerns During and After the Peak of the Pandemic, by Race",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

ggsave(filename = "TREND_MENTAL_HEALTH_RACE_ALL_BEFORE_AND_AFTER.png")

## Saving 7 x 5 in image

*********BAR CHART*********** ##############BAR CHART##########

BAR CHART ANALYSIS

MORTGAGE

#library(questionr)
data_all%>%
 mutate(mortage = ifelse(MORTCUR %in% 1, 1, ifelse(MORTCUR <0, NA, 0))) %>%
  group_by(race_ethnicity)%>%
  summarise(pct_remote = weighted.mean(mortage, na.rm=T))%>%
  ggplot(aes(x=race_ethnicity, y=pct_remote, fill=race_ethnicity,label = scales::percent(pct_remote)))+
  
  geom_col(position = 'dodge') + 
    geom_text(position = position_dodge(width = .9),    # move to center of bars
              vjust = -0.5,    # nudge above top of bar
              size = 3) + 
    scale_y_continuous(labels = scales::percent)+
  
   ggtitle("Percent of caught up in Mortage")+
  ylab("Percent") +
  xlab("Race/Ethnicity")+
  theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))

ggsave(filename = "Bar_MORTCUR.png",height=8, width=10, dpi = "print" )

MORTGAGE For ALL RACES

# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity before the peak of the pandemic
mortgage_caught_up_before <- data_peak_before %>%
  mutate(mortgage = ifelse(MORTCUR %in% 1, 1, ifelse(MORTCUR <0, NA, 0))) %>%
  group_by(race_ethnicity) %>%
  summarize(pct_mortgage_caught_up = weighted.mean(mortgage, na.rm = TRUE)) %>%
  mutate(Pandemic = "Before")

# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity after the peak of the pandemic
mortgage_caught_up_after <- data_peak_after %>%
  mutate(mortgage = ifelse(MORTCUR %in% 1, 1, ifelse(MORTCUR <0, NA, 0))) %>%
  group_by(race_ethnicity) %>%
  summarize(pct_mortgage_caught_up = weighted.mean(mortgage, na.rm = TRUE)) %>%
  mutate(Pandemic = "After")

# Combine the two datasets
mortgage_caught_up_combined <- rbind(mortgage_caught_up_before, mortgage_caught_up_after)

# Plot the trend of the percentage of households that are currently caught up on mortgage before and after the peak of the pandemic for different races/ethnicities
ggplot(mortgage_caught_up_combined, aes(x = race_ethnicity, y = pct_mortgage_caught_up * 100, fill = Pandemic, label = scales::percent(pct_mortgage_caught_up))) +
  geom_col(position = "dodge", width = 0.7) +
  geom_text(position = position_dodge(width = 0.7), vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("blue", "red"), labels = c("Peak", "After")) +
  coord_flip() +
  theme_minimal() +
  xlab("Race/Ethnicity") +
  ylab("% of Households Caught Up on Mortgage") +
  labs(title = "Bar Chart Households Caught Up on Mortgage During Peak and After the Peak of the Pandemic for Different Races/Ethnicities",
  subtitle = "Weighted",
  caption = "Source: Census Household Pulse survey")

  ggsave(filename = "BAR_CHART_MORTGAGE_FOR_ALL_BEFORE_AFTER_PANDEMIC.png",height=8, width=10, dpi = "print" )

CAUGHT UP ON RENT

#library(questionr)
data_all%>%
 mutate(rent_caught = ifelse(RENTCUR %in% 1, 1, ifelse(RENTCUR <0, NA, 0))) %>%
  group_by(race_ethnicity)%>%
  summarise(pct_remote_rent = weighted.mean(rent_caught, na.rm=T))%>%
   ggplot(aes(x=race_ethnicity, y=pct_remote_rent, fill=race_ethnicity,label = scales::percent(pct_remote_rent)))+
  
  geom_col(position = 'dodge') + 
    geom_text(position = position_dodge(width = .9),    # move to center of bars
              vjust = -0.5,    # nudge above top of bar
              size = 3) + 
    scale_y_continuous(labels = scales::percent)+
  
  
  
  ggtitle("Percent of caught up in Rent")+
  ylab("Percent") +
  xlab("Race/Ethnicity")+
  theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))

ggsave(filename = "BARCHART_RENT.png",height=8, width=10, dpi = "print" )

RENT for all RACES

# Create a subset of data for before and after the peak of the pandemic
data_peak_before <- data_all[data_all$WEEK.x <= "2021-01-06", ]
data_peak_after <- data_all[data_all$WEEK.x >= "2021-01-06", ]

# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity before the peak of the pandemic
three_rent_caught_up_before <- data_peak_before %>%
  mutate(rent = ifelse(RENTCUR %in% 1, 1, ifelse(RENTCUR <0, NA, 0))) %>%
  group_by(race_ethnicity) %>%
  summarize(pct_rent_caught_up = weighted.mean(rent, na.rm = TRUE)) %>%
  mutate(Pandemic = "Before")

# Calculate the percentage of households that are currently caught up on mortgage for each race/ethnicity after the peak of the pandemic
three_rent_caught_up_after <- data_peak_after %>%
  mutate(rent = ifelse(RENTCUR %in% 1, 1, ifelse(RENTCUR <0, NA, 0))) %>%
  group_by(race_ethnicity) %>%
  summarize(pct_rent_caught_up = weighted.mean(rent, na.rm = TRUE)) %>%
  mutate(Pandemic = "After")

# Combine the two datasets
three_rent_caught_up_combined <- rbind(three_rent_caught_up_before, three_rent_caught_up_after)

# Plot the trend of the percentage of households that are currently caught up on mortgage before and after the peak of the pandemic for different races/ethnicities
ggplot(three_rent_caught_up_combined, aes(x = race_ethnicity, y = pct_rent_caught_up * 100, fill = Pandemic, label = scales::percent(pct_rent_caught_up))) +
  geom_col(position = "dodge", width = 0.7) +
  geom_text(position = position_dodge(width = 0.7), vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("blue", "red"), labels = c("Peak", "After")) +
  coord_flip() +
  theme_minimal() +
  xlab("Race/Ethnicity") +
  ylab("% of Households Caught Up on Rent") +
  labs(title = "Trend of Households Caught Up on rent Peak and After the Peak of the Pandemic for Different Races/Ethnicities",
  subtitle = "Weighted",
  caption = "Source: Census Household Pulse survey")

  ggsave(filename = "BAR_CHART_RENT_FOR_ALL_BEFORE_AFTER_PANDEMIC.png",height=8, width=10, dpi = "print" )

CONFIDENCE OF PAYING MORTGAGE OR RENT

#library(questionr)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

data_all%>%
mutate(rent_mort_confidence = case_when(
  MORTCONF %in% c(3,4)~1, MORTCONF %in% c(1,2,5) ~ 0, TRUE ~ NA_real_)) %>%
  group_by(race_ethnicity)%>%
  summarise(pct_remote_mort_rent = weighted.mean(rent_mort_confidence, na.rm=T))%>%
  ggplot(aes(x=race_ethnicity, y=pct_remote_mort_rent, fill=race_ethnicity,label = scales::percent(pct_remote_mort_rent)))+
  
  geom_col(position = 'dodge') + 
    geom_text(position = position_dodge(width = .9),    # move to center of bars
              vjust = -0.5,    # nudge above top of bar
              size = 3) + 
    scale_y_continuous(labels = scales::percent)+

  ggtitle("Percent of confidence to pay rent or mortage")+
  ylab("Percent") +
  xlab("Race/Ethnicity")+
  theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))

  ggsave(filename = "BARCHART_CONFIDENCE.png",height=8, width=10, dpi = "print" )

Housing Affordability

We define a new variable Housing Affordability. This variable will show the portion of people who was caught up on rent, mortgage and were confident to pay their next mortgage and rent. As a result, these household has more housing affordability or less house rent hardship

# Filter the data for week 13 to week 45
data_filtered <- data_all

# Create a new variable for housing insecurity
data_housing_insecurity <- data_filtered %>%
  mutate(rent_insecurity = ifelse(RENTCUR %in% c(2, 3), 1, 0),
         mortgage_insecurity = ifelse(MORTCUR %in% c(2, 3), 1, 0),
         mortgage_conf_insecurity = ifelse(MORTCONF %in% c(3,4), 1, 0),
         housing_insecurity = pmax(rent_insecurity, mortgage_insecurity, mortgage_conf_insecurity)) %>%
  group_by(race_ethnicity, WEEK.x) %>%
  summarize(pct_housing_insecurity = mean(housing_insecurity, na.rm = TRUE))

## `summarise()` has grouped output by 'race_ethnicity'. You can override using
## the `.groups` argument.

# Separate the data for HISPANIC and NON-Hispanic
data_housing_insecurity_hisp <- data_housing_insecurity %>%
  filter(race_ethnicity == "HISPANIC")

data_housing_insecurity_nonhisp <- data_housing_insecurity %>%
  filter(race_ethnicity == "NON-Hispanic")

# Bar chart for housing insecurity by race/ethnicity
ggplot(data_housing_insecurity, aes(x = race_ethnicity, y = pct_housing_insecurity * 100, fill = race_ethnicity)) +
  geom_col(position = "dodge", width = 0.7) +
  scale_fill_manual(values = c("red", "blue", "green", "purple", "orange")) +
  theme_minimal() +
  xlab("") +
  ylab("% of Households with Housing Affordibility") +
  labs(title = "Housing affordibility by Race/Ethnicity in the U.S. during the COVID-19 Pandemic",
  caption = "Source: Census Household Pulse Survey")

  ggsave(filename = "BARCHART_MORT_RENT_CONF.png",height=8, width=10, dpi = "print" )

Worry Bar Chart

#library(questionr)
library(lubridate)
data_all%>%
mutate(pan_re_worry = case_when(
  WORRY %in% c(2,3,4)~1, WORRY %in% c(1) ~ 0, TRUE ~ NA_real_)) %>%
  group_by(race_ethnicity)%>%
  summarise(pct_pan_re_worry = weighted.mean(pan_re_worry, na.rm=T))%>%
  ggplot(aes(x=race_ethnicity, y=pct_pan_re_worry, fill=race_ethnicity,label = scales::percent(pct_pan_re_worry)))+
  
  geom_col(position = 'dodge') + 
    geom_text(position = position_dodge(width = .9),    # move to center of bars
              vjust = -0.5,    # nudge above top of bar
              size = 3) + 
    scale_y_continuous(labels = scales::percent)+

  ggtitle("Percent of people who are worried for a week")+
  ylab("Percent") +
  xlab("Race/Ethnicity")+
  theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))

ggsave(filename = "BARCHART_WORRY.png",height=8, width=10, dpi = "print" )

Anxious Bar Chart

#library(questionr)
library(lubridate)
data_all%>%
mutate(pan_re_anxious = case_when(
  ANXIOUS %in% c(2,3,4)~1, ANXIOUS %in% c(1) ~ 0, TRUE ~ NA_real_)) %>%
  group_by(race_ethnicity)%>%
  summarise(pct_pan_re_anxious = weighted.mean(pan_re_anxious, na.rm=T))%>%
  ggplot(aes(x=race_ethnicity, y=pct_pan_re_anxious, fill=race_ethnicity,label = scales::percent(pct_pan_re_anxious)))+
  
  geom_col(position = 'dodge') + 
    geom_text(position = position_dodge(width = .9),    # move to center of bars
              vjust = -0.5,    # nudge above top of bar
              size = 3) + 
    scale_y_continuous(labels = scales::percent)+

  ggtitle("Percent of people who are anxious for a week")+
  ylab("Percent") +
  xlab("Race/Ethnicity")+
  theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))

ggsave(filename = "BARCHART_ANXIOUS.png",height=8, width=10, dpi = "print" )

Feeling DOWN BAR CHART

#library(questionr)
library(lubridate)
data_all%>%
mutate(pan_re_down = case_when(
  DOWN %in% c(2,3,4)~1, DOWN %in% c(1) ~ 0, TRUE ~ NA_real_)) %>%
  group_by(race_ethnicity)%>%
  summarise(pct_pan_re_down = weighted.mean(pan_re_down, na.rm=T))%>%
  ggplot(aes(x=race_ethnicity, y=pct_pan_re_down, fill=race_ethnicity,label = scales::percent(pct_pan_re_down)))+
  
  geom_col(position = 'dodge') + 
    geom_text(position = position_dodge(width = .9),    # move to center of bars
              vjust = -0.5,    # nudge above top of bar
              size = 3) + 
    scale_y_continuous(labels = scales::percent)+

  ggtitle("Percent of people who are feeling down for a week")+
  ylab("Percent") +
  xlab("Race/Ethnicity")+
  theme(axis.text.x=element_text(angle =- 45, vjust = 0.5))

ggsave(filename = "BARCHART_DOWN.png",height=8, width=10, dpi = "print" )

Mental Health

We create a new variable to define Mental Health. The more score means more mental stress

library(dplyr)
library(ggplot2)

# Calculate total mental health score by summing WORRY, DOWN, and INTEREST scores
data_all <- data_all %>% 
  mutate(mental_health = WORRY + DOWN + INTEREST)

# Calculate mean mental health score by month
mental_health_by_month <- data_all %>%
  group_by(WEEK.x) %>%
  summarize(mean_mental_health = mean(mental_health, na.rm = TRUE))

# Plot mean mental health score over time
ggplot(mental_health_by_month, aes(x = WEEK.x, y = mean_mental_health)) +
  geom_line() +
  ggtitle("Trend of Mental Health") +
  xlab("Month") +
  ylab("Mean Mental Stress Score")

ggsave("TREND_MENTAL_Stress_COMB.png")

## Saving 7 x 5 in image

Mental Health By AGE

Which Group of people suffer from Mental Stress

#1) Not at all
#2) Several days
#3) More than half the days
#4) Nearly every day 

# Output: Trend of the percentage of people who felt worried, anxious, or down for each wave, by age group.

library(ggplot2)

# Recode WORRY, ANXIOUS, and DOWN variables into a binary variable where 1 means worried, anxious, or down
data_all <- data_all %>% 
  mutate(mental_health = ifelse(WORRY %in% c(2, 3, 4) | ANXIOUS %in% c(2, 3, 4) | DOWN %in% c(2, 3, 4), 1, 0))

# Calculate the percentage of people who felt worried, anxious, or down for each wave, by age group
mental_health_by_age <- data_all %>% 
  select(WEEK.x, TBIRTH_YEAR, mental_health) %>% 
  filter(TBIRTH_YEAR != -88) %>% # Exclude missing values
  mutate(AGE = as.numeric(format(Sys.Date(), "%Y")) - TBIRTH_YEAR) %>%
  group_by(WEEK.x, AGE) %>% 
  summarize(mean_mental_health = mean(mental_health, na.rm = TRUE)) %>% 
  mutate(AGE_GROUP = case_when(
    AGE <= 17 ~ "Under 18",
    AGE >= 18 & AGE <= 24 ~ "18-24",
    AGE >= 25 & AGE <= 34 ~ "25-34",
    AGE >= 35 & AGE <= 44 ~ "35-44",
    AGE >= 45 & AGE <= 54 ~ "45-54",
    AGE >= 55 & AGE <= 64 ~ "55-64",
    AGE >= 65 & AGE <= 74 ~ "65-74",
    AGE >= 75 ~ "75 and over"
  ))

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

# Plot the percentage of people who felt worried, anxious, or down for each age group
ggplot(mental_health_by_age, aes(x = AGE_GROUP, y = mean_mental_health * 100)) +
  geom_bar(stat = "identity", fill = "blue") +
  theme_minimal() +
  xlab("Age Group") +
  ylab("% of People with Mental Health Issues") +
  ylim(0, 70) +
  labs(title = "Percentage of People with Mental Health Issues by Age Group",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

## Warning: Removed 14 rows containing missing values (`position_stack()`).

## Warning: Removed 2251 rows containing missing values (`geom_bar()`).

ggsave(filename = "MENTAL_HEALTH_BY_AGE_BAR.png", height=8, width=10, dpi = "print")

## Warning: Removed 14 rows containing missing values (`position_stack()`).
## Removed 2251 rows containing missing values (`geom_bar()`).

Housing Insecurity By AGe

Which Group of people suffer from Housing Insecurity

library(ggplot2)

data_all <- data_all %>% 
  mutate(rent_mort = RENTCUR + MORTCUR + MORTCONF)

data_all <- data_all %>% 
  mutate(rent_mort = ifelse(RENTCUR %in% c(1) | MORTCUR %in% c(1) | MORTCONF %in% c(3, 4), 1, 0))

# Calculate the percentage of people who felt worried, anxious, or down for each wave, by age group
house_insecurit_by_age <- data_all %>% 
  select(WEEK.x, TBIRTH_YEAR, rent_mort) %>% 
  filter(TBIRTH_YEAR != -88) %>% # Exclude missing values
  mutate(AGE = as.numeric(format(Sys.Date(), "%Y")) - TBIRTH_YEAR) %>%
  group_by(WEEK.x, AGE) %>% 
  summarize(mean_rent_mort = mean(rent_mort, na.rm = TRUE)) %>% 
  mutate(AGE_GROUP = case_when(
    AGE <= 17 ~ "Under 18",
    AGE >= 18 & AGE <= 24 ~ "18-24",
    AGE >= 25 & AGE <= 34 ~ "25-34",
    AGE >= 35 & AGE <= 44 ~ "35-44",
    AGE >= 45 & AGE <= 54 ~ "45-54",
    AGE >= 55 & AGE <= 64 ~ "55-64",
    AGE >= 65 & AGE <= 74 ~ "65-74",
    AGE >= 75 ~ "75 and over"
  ))

## `summarise()` has grouped output by 'WEEK.x'. You can override using the
## `.groups` argument.

ggplot(house_insecurit_by_age, aes(x = AGE_GROUP, y = mean_rent_mort * 100)) +
  geom_bar(stat = "identity", fill = "blue") +
  theme_minimal() +
  xlab("Age Group") +
  ylab("% of People Pay their Rent or Mortgage") +
  ylim(0, 70) +
  labs(title = "Percentage of People Paid House Rent or Mortgage by Age Group",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey")

## Warning: Removed 68 rows containing missing values (`position_stack()`).

## Warning: Removed 2196 rows containing missing values (`geom_bar()`).

ggsave(filename = "HOUSING_Affordability_BY_AGE_BAR.png", height=8, width=10, dpi = "print")

## Warning: Removed 68 rows containing missing values (`position_stack()`).
## Removed 2196 rows containing missing values (`geom_bar()`).

##CORRELATION BETWEEN HOUSERENT HARDSHIP and MENTAL STRESS ** A correlation analysis tells if there is a linear relationship between the two variables, while a regression analysis can help understand the strength and direction of the relationship and whether it is statistically significant. We will notice the co-relation between RENTCUR, MORTCUR and WORRY, ANXIOUS, DOWN, INTEREST.

library(ggplot2)

# Select relevant variables
data_rent_worry <- data_all %>% 
  select(WORRY, RENTCUR) %>% 
  filter(!is.na(RENTCUR)) # Remove missing values

# Create a scatter plot
ggplot(data_rent_worry, aes(x = RENTCUR, y = WORRY)) +
  geom_point(alpha = 0.2) +
  xlab("Current House Rent Hardship") +
  ylab("Mental Health Issue (WORRY)") +
  labs(title = "Correlation between House Rent Hardship and Mental Health",
       subtitle = "Weighted",
       caption = "Source: Census Household Pulse survey") +
  theme_minimal()

# Calculate correlation coefficient
correlation <- cor(data_rent_worry$RENTCUR, data_rent_worry$WORRY, use = "complete.obs")
cat("Correlation Coefficient: ", correlation)

## Correlation Coefficient:  0.2000606

# Load required libraries
library(dplyr)
library(ggplot2)
library(sjPlot)

## Warning: package 'sjPlot' was built under R version 4.2.3

# Subset the data to include only necessary variables
data_alll <- data_all %>%
  select(WEEK.x, WORRY, RENTCUR, RRACE) %>%
  filter(RENTCUR != -88 & WORRY != -88 & RRACE!= -88 &  RENTCUR != -99 & WORRY != -99 & RRACE!= -99)

# Recode WORRY variable into a binary variable where 1 means worried, anxious, or down
data_alll$WORRY <- ifelse(data_alll$WORRY %in% c(2, 3, 4), 1, 0)
#data_alll$ANXIOUS <- ifelse(data_alll$ANXIOUS %in% c(2, 3, 4), 1, 0)

# Create a new binary variable RENT_STRESS where 1 means having difficulty paying rent and 0 means no difficulty


# Conduct logistic regression analysis
model <- glm(WORRY ~ RENTCUR + RRACE, data = data_alll, family = "binomial")

# Print the summary of the model
summary(model)

## 
## Call:
## glm(formula = WORRY ~ RENTCUR + RRACE, family = "binomial", data = data_alll)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.767  -1.352   1.012   1.012   1.016  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.521243   0.012959 -40.222   <2e-16 ***
## RENTCUR      0.925475   0.011020  83.980   <2e-16 ***
## RRACE       -0.003075   0.003540  -0.869    0.385    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 600029  on 452398  degrees of freedom
## Residual deviance: 591949  on 452396  degrees of freedom
## AIC: 591955
## 
## Number of Fisher Scoring iterations: 4

# Create a forest plot to visualize the odds ratio and confidence intervals
plot_model(model, type = "est", vline.color = "gray", ci.lvl = 0.95, axis.labels = c("Odds Ratio"), title = "Logistic Regression Model: WORRY ~ RENT_STRESS + RRACE")

## Profiled confidence intervals may take longer time to compute.
##   Use `ci_method="wald"` for faster computation of CIs.

**A logit link function to model the relationship between WORRY (binary outcome variable) and two predictor variables, RENTCUR (continuous predictor variable) and RRACE (categorical predictor variable with two levels, White and Black).

The coefficient estimates for each predictor variable are shown, as well as their standard errors and associated p-values, which can be used to assess the statistical significance of the relationship between the predictor and the outcome variables.

The intercept coefficient represents the estimated log-odds of the outcome variable when all predictor variables are equal to zero. In this case, the intercept is 0.446, which means that when RENTCUR and RRACE are equal to zero, the log-odds of WORRY is 0.446.

The coefficient estimate for RENTCUR is 0.009, indicating that for each one-unit increase in RENTCUR, the log-odds of WORRY increases by 0.009. This suggests that higher levels of current rent are associated with higher levels of worry.

The coefficient estimate for RRACE is 0.026, indicating that being Black is associated with higher levels of worry compared to being White. The p-value associated with this coefficient estimate is < 0.001, indicating that this relationship is statistically significant.

The deviance residuals represent the difference between the predicted probabilities of the outcome variable and the actual values. The residual deviance (572682) is smaller than the null deviance (572922), suggesting that the model provides a better fit to the data than the null model. The AIC (Akaike information criterion) provides a measure of model fit, with lower values indicating better fit. The AIC for this model is 572688, which suggests a good fit.

Overall, this model suggests that both current rent and race are significant predictors of worry, with higher current rent and being Black associated with higher levels of worry.**

# Load required libraries
library(dplyr)
library(ggplot2)
library(sjPlot)

# Subset the data to include only necessary variables
data_allll <- data_all %>%
  select(WEEK.x, WORRY, MORTCUR, RRACE) %>%
  filter(MORTCUR != -88 & WORRY != -88 & RRACE!= -88 &  MORTCUR != -99 & WORRY != -99 & RRACE!= -99)

# Recode WORRY variable into a binary variable where 1 means worried, anxious, or down
data_allll$WORRY <- ifelse(data_allll$WORRY %in% c(2, 3, 4), 1, 0)
#data_alll$ANXIOUS <- ifelse(data_alll$ANXIOUS %in% c(2, 3, 4), 1, 0)

# Create a new binary variable RENT_STRESS where 1 means having difficulty paying rent and 0 means no difficulty


# Conduct logistic regression analysis
model <- glm(WORRY ~ MORTCUR + RRACE, data = data_allll, family = "binomial")

# Print the summary of the model
summary(model)

## 
## Call:
## glm(formula = WORRY ~ MORTCUR + RRACE, family = "binomial", data = data_allll)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.666  -1.119  -1.119   1.237   1.237  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.279959   0.010589  -120.9   <2e-16 ***
## MORTCUR      1.093767   0.009530   114.8   <2e-16 ***
## RRACE        0.048259   0.002839    17.0   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1315314  on 949438  degrees of freedom
## Residual deviance: 1299821  on 949436  degrees of freedom
## AIC: 1299827
## 
## Number of Fisher Scoring iterations: 4

# Create a forest plot to visualize the odds ratio and confidence intervals
plot_model(model, type = "est", vline.color = "gray", ci.lvl = 0.95, axis.labels = c("Odds Ratio"), title = "Logistic Regression Model: WORRY ~ MORT_STRESS + RRACE")

## Profiled confidence intervals may take longer time to compute.
##   Use `ci_method="wald"` for faster computation of CIs.

Mental Stress Caused by Housing Insecurity

allweeks <- data_all

allweeks$hisp<-ifelse(allweeks$RHISPANIC==1, "Not_hispanic", "hispanic")
allweeks$race <- car::Recode(allweeks$RRACE, recodes = "1= 'White'; 2= 'Black'; 3 = 'Asian'; 4='Other'; else=NA")
allweeks$race_eth<-ifelse(allweeks$hisp==1, "hisp",allweeks$race)
allweeks$race_eth <- interaction(allweeks$hisp, allweeks$race)

allweeks$race_ethnicity<- mutate(allweeks, ifelse(hisp==0 & race==1, 1,
                    ifelse(hisp==0 & race==2, 2,
                     ifelse(hisp==1, 3,
                    ifelse(hisp==0 & race==3, 4,
                     ifelse(hisp==0 & race==4, 5, "NA"))))))

allweeks$race_ethnicity<- ifelse(substr(allweeks$race_eth, 1, 4)=="hisp", "hisp", as.character(allweeks$race_eth))

allweeks$gad2 <- (allweeks$WORRY-1) + (allweeks$ANXIOUS-1)+ (allweeks$DOWN-1) + (allweeks$INTEREST-1)
allweeks$gad_b <- ifelse(allweeks$gad2 >= c(3,4) , 1, 0)

## Warning in allweeks$gad2 >= c(3, 4): longer object length is not a multiple of
## shorter object length

allweeks$mortconf<-car::Recode(allweeks$MORTCONF, recodes ="c(3,4)='Confident';c(1,2,5)='Not-Confident'")
allweeks <- allweeks %>% filter(MORTCONF!=-88 & MORTCONF!=-99)

MORTGAGE TO MENTAL STRESS

allweeks$mortgage<-car::Recode(allweeks$MORTCUR, recodes ="1='Yes'; 2='No'")
allweeks <- allweeks %>% filter(MORTCUR!=-88 & MORTCUR!=-99)

library(tidyverse)
allweeks %>%
mutate(mortgage = ifelse(mortgage == "Yes", "Caught up on Mortgage", "Not Caught up on Mortgage")) %>%
group_by(mortgage,race_ethnicity)%>%
summarise(meangad = questionr::wtd.mean(gad_b, weights = PWEIGHT))%>%
ggplot(aes(x = race_ethnicity, y = meangad, fill=race_ethnicity, label = scales::percent(meangad)), stat=
"identity", position="dodge")+ facet_wrap(~mortgage)+ ylab("Mental Stress Due to House Insecurity") +xlab("This household Currently Caught up on Mortgage") + scale_fill_manual( name ="Race_ethnicity",labels=c("Hispanic","Non_Hisp White", "Non_Hisp Black", "Non_Hisp Asian", "Others"),values = c("#004f9e", "#e36c2c", "#CC0000", "#006600", "#669999" )) + labs(caption = "Source: U.S Census Household Pulse survey(Week 13-45: August 13th, 2020- May ,2022)") + geom_col(position ='dodge') + geom_text( vjust = -0.5, size =5) + scale_y_continuous(labels = scales::percent)

## `summarise()` has grouped output by 'mortgage'. You can override using the
## `.groups` argument.

ggsave("MORT_TO_STRESS.png", dpi = 300, width = 12, height = 8)

RENT TO MENTAL STRESS

allweeks3 <- data_all

allweeks3$rentt<-car::Recode(allweeks3$RENTCUR, recodes ="1='Yes'; 2='No'")
allweeks3 <- allweeks3 %>% filter(RENTCUR!=-88 & RENTCUR!=-99)

allweeks3$gad2 <- (allweeks3$WORRY-1) + (allweeks3$ANXIOUS-1)+ (allweeks3$DOWN-1) + (allweeks3$INTEREST-1)
allweeks3$gad_b <- ifelse(allweeks3$gad2 >= c(3,4) , 1, 0)

library(tidyverse)
allweeks3 %>%
  mutate(rentt = ifelse(rentt == "Yes", "Caught up on Rent", "Not Caught up on Rent")) %>%
group_by(rentt,race_ethnicity)%>%
summarise(meangad = questionr::wtd.mean(gad_b, weights = PWEIGHT))%>%
ggplot(aes(x = race_ethnicity, y = meangad, fill=race_ethnicity, label = scales::percent(meangad)), stat=
"identity", position="dodge")+ facet_wrap(~rentt)+ ylab("Mental Stress from Rent Payment") +xlab("This household Currently Caught up on Rent") + scale_fill_manual( name ="Race_ethnicity",labels=c("Hispanic","Non_Hisp White", "Non_Hisp Black", "Non_Hisp Asian", "Others"),values = c("#004f9e", "#e36c2c", "#CC0000", "#006600", "#669999" )) + labs(caption = "Source: U.S Census Household Pulse survey(Week 28-44: August 19th, 2020- December 22,2020)") + geom_col(position ='dodge') + geom_text( vjust = -0.5, size =5) + scale_y_continuous(labels = scales::percent)

## `summarise()` has grouped output by 'rentt'. You can override using the
## `.groups` argument.

ggsave("RENT_TO_STRESS.png", dpi = 300, width = 12, height = 8)

Capstone Project

Mahmuda Sultana

4/1/2023

UNZIP the files

READ WEIGHTED FILE

Merge Data and Weights

Remove unused files

combine weeks together

WEEK13: 19th August 2020- 31st August 2020

WEEK45: 27th April 2022-9th May 2022

PHASE 2 to PHASE 3.4

RHISPANIC

1) No, not of Hispanic, Latino, or Spanish origin

2) Yes, of Hispanic, Latino, or Spanish origin

RRACE

1) White, Alone

2) Black, Alone

3) Asian, Alone

4) Any other race alone, or race in combination

MORTGAGE

What percentage of households are currently caught up on Mortgage

Trend of Mortgage Before and After Peak Pandemic

MORTGAGE FOR HISPANIC ONLY

HISPANIC AND NON_HISPANIC

Caught up on Mortgage for All Races

CAUGHT UP ON RENT

Rent before and after peak pandemic

RENT for HISPANIC vs NON-HISPANIC

Caught up on Rent For all Races

CONFIDENCE

Confidence to pay Rent/Mortgage Before and After Peak Pandemic

ANalysis of MENTAL HEALTH

WORRY HISPANIC ONLY

WORRY ALL RACE

ANXIOUS

ANXIOUS ALL RACE

DOWN

MENTAL HEALTH

BAR CHART ANALYSIS

MORTGAGE

MORTGAGE For ALL RACES

CAUGHT UP ON RENT

RENT for all RACES

CONFIDENCE OF PAYING MORTGAGE OR RENT

Housing Affordability

Worry Bar Chart

Anxious Bar Chart

Feeling DOWN BAR CHART

Mental Health

Mental Health By AGE

Housing Insecurity By AGe

Mental Stress Caused by Housing Insecurity

MORTGAGE TO MENTAL STRESS

RENT TO MENTAL STRESS