library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk1/HPS_Week01_PUF_CSV.zip";
download.file(url, "pulse01.zip")
unzip("pulse01.zip")
pulse01 <-read.csv("pulse2020_puf_01.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk2/HPS_Week02_PUF_CSV.zip";
download.file(url, "pulse02.zip")
unzip("pulse02.zip")
pulse02 <-read.csv("pulse2020_puf_02.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk3/HPS_Week03_PUF_CSV.zip";
download.file(url, "pulse03.zip")
unzip("pulse03.zip")
pulse03 <-read.csv("pulse2020_puf_03.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk4/HPS_Week04_PUF_CSV.zip";
download.file(url, "pulse04.zip")
unzip("pulse04.zip")
pulse04<-read.csv("pulse2020_puf_04.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk5/HPS_Week05_PUF_CSV.zip";
download.file(url, "pulse05.zip")
unzip("pulse05.zip")
pulse05 <-read.csv("pulse2020_puf_05.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk6/HPS_Week06_PUF_CSV.zip";
download.file(url, "pulse06.zip")
unzip("pulse06.zip")
pulse06 <-read.csv("pulse2020_puf_06.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk7/HPS_Week07_PUF_CSV.zip";
download.file(url, "pulse07.zip")
unzip("pulse07.zip")
pulse07 <-read.csv("pulse2020_puf_07.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk8/HPS_Week08_PUF_CSV.zip";
download.file(url, "pulse08.zip")
unzip("pulse08.zip")
pulse08 <-read.csv("pulse2020_puf_08.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk9/HPS_Week09_PUF_CSV.zip";
download.file(url, "pulse09.zip")
unzip("pulse09.zip")
pulse09 <-read.csv("pulse2020_puf_09.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk10/HPS_Week10_PUF_CSV.zip";
download.file(url, "pulse10.zip")
unzip("pulse10.zip")
pulse10 <-read.csv("pulse2020_puf_10.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk11/HPS_Week11_PUF_CSV.zip";
download.file(url, "pulse11.zip")
unzip("pulse11.zip")
pulse11 <-read.csv("pulse2020_puf_11.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk12/HPS_Week12_PUF_CSV.zip";
download.file(url, "pulse12.zip")
unzip("pulse12.zip")
pulse12 <-read.csv("pulse2020_puf_12.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk13/HPS_Week13_PUF_CSV.zip";
download.file(url, "pulse13.zip")
unzip("pulse13.zip")
pulse13 <-read.csv("pulse2020_puf_13.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk14/HPS_Week14_PUF_CSV.zip";
download.file(url, "pulse14.zip")
unzip("pulse14.zip")
pulse14 <-read.csv("pulse2020_puf_14.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk15/HPS_Week15_PUF_CSV.zip";
download.file(url, "pulse15.zip")
unzip("pulse15.zip")
pulse15 <-read.csv("pulse2020_puf_15.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk16/HPS_Week16_PUF_CSV.zip";
download.file(url, "pulse16.zip")
unzip("pulse16.zip")
pulse16 <-read.csv("pulse2020_puf_16.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk17/HPS_Week17_PUF_CSV.zip";
download.file(url, "pulse17.zip")
unzip("pulse17.zip")
pulse17 <-read.csv("pulse2020_puf_17.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk18/HPS_Week18_PUF_CSV.zip";
download.file(url, "pulse18.zip")
unzip("pulse18.zip")
pulse18 <-read.csv("pulse2020_puf_18.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk19/HPS_Week19_PUF_CSV.zip";
download.file(url, "pulse19.zip")
unzip("pulse19.zip")
pulse19 <-read.csv("pulse2020_puf_19.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk20/HPS_Week20_PUF_CSV.zip";
download.file(url, "pulse20.zip")
unzip("pulse20.zip")
pulse20 <-read.csv("pulse2020_puf_20.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2020/wk21/HPS_Week21_PUF_CSV.zip";
download.file(url, "pulse21.zip")
unzip("pulse21.zip")
pulse21 <-read.csv("pulse2020_puf_21.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk22/HPS_Week22_PUF_CSV.zip";
download.file(url, "pulse22.zip")
unzip("pulse22.zip")
pulse22 <-read.csv("pulse2021_puf_22.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk23/HPS_Week23_PUF_CSV.zip";
download.file(url, "pulse23.zip")
unzip("pulse23.zip")
pulse23 <-read.csv("pulse2021_puf_23.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk24/HPS_Week24_PUF_CSV.zip";
download.file(url, "pulse24.zip")
unzip("pulse24.zip")
pulse24 <-read.csv("pulse2021_puf_24.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk25/HPS_Week25_PUF_CSV.zip";
download.file(url, "pulse25.zip")
unzip("pulse25.zip")
pulse25 <-read.csv("pulse2021_puf_25.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk26/HPS_Week26_PUF_CSV.zip";
download.file(url, "pulse26.zip")
unzip("pulse26.zip")
pulse26 <-read.csv("pulse2021_puf_26.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk27/HPS_Week27_PUF_CSV.zip";
download.file(url, "pulse27.zip")
unzip("pulse27.zip")
pulse27 <-read.csv("pulse2021_puf_27.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk28/HPS_Week28_PUF_CSV.zip";
download.file(url, "pulse28.zip")
unzip("pulse28.zip")
pulse28 <-read.csv("pulse2021_puf_28.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk29/HPS_Week29_PUF_CSV.zip";
download.file(url, "pulse29.zip")
unzip("pulse29.zip")
pulse29 <-read.csv("pulse2021_puf_29.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk30/HPS_Week30_PUF_CSV.zip";
download.file(url, "pulse30.zip")
unzip("pulse30.zip")
pulse30 <-read.csv("pulse2021_puf_30.csv")
url<-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk31/HPS_Week31_PUF_CSV.zip";
download.file(url, "pulse31.zip")
unzip("pulse31.zip")
pulse31 <-read.csv("pulse2021_puf_31.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk32/HPS_Week32_PUF_CSV.zip";
download.file(url, "pulse32.zip")
unzip("pulse32.zip")
pulse32 <-read.csv("pulse2021_puf_32.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk33/HPS_Week33_PUF_CSV.zip";
download.file(url, "pulse33.zip")
unzip("pulse33.zip")
pulse33 <-read.csv("pulse2021_puf_33.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk34/HPS_Week34_PUF_CSV.zip";
download.file(url, "pulse34.zip")
unzip("pulse34.zip")
pulse34 <-read.csv("pulse2021_puf_34.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk35/HPS_Week35_PUF_CSV.zip";
download.file(url, "pulse35.zip")
unzip("pulse35.zip")
pulse35 <-read.csv("pulse2021_puf_35.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk36/HPS_Week36_PUF_CSV.zip";
download.file(url, "pulse36.zip")
unzip("pulse36.zip")
pulse36 <-read.csv("pulse2021_puf_36.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk37/HPS_Week37_PUF_CSV.zip";
download.file(url, "pulse37.zip")
unzip("pulse37.zip")
pulse37 <-read.csv("pulse2021_puf_37.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk38/HPS_Week38_PUF_CSV.zip";
download.file(url, "pulse38.zip")
unzip("pulse38.zip")
pulse38 <-read.csv("pulse2021_puf_38.csv")
url <-
"https://www2.census.gov/programs-surveys/demo/datasets/hhp/2021/wk39/HPS_Week39_PUF_CSV.zip";
download.file(url, "pulse39.zip")
unzip("pulse39.zip")
pulse39 <-read.csv("pulse2021_puf_39.csv")
#Combine weeks with the same variables. Notice Pulse 30- Pulse 33 has 239 variables, and Pulse 34- Pulse 39 have 202 so we have to append them seperately and select the variables we need( Weeks and workloss). Assuming they all have same number of variables we donโt need to append them seperately
pulse01_05 <-rbind(pulse01, pulse02, pulse03, pulse04, pulse05)
pulse06_06 <-rbind(pulse06)
pulse07_12 <-rbind(pulse07, pulse08, pulse09, pulse10, pulse11, pulse12)
pulse13_16 <-rbind(pulse13, pulse14, pulse15, pulse16)
pulse17_21 <-rbind(pulse17, pulse18, pulse19, pulse20, pulse21)
pulse22_27 <-rbind(pulse22, pulse23, pulse24, pulse25, pulse26, pulse27)
pulse28_29 <-rbind(pulse28, pulse29)
pulse30_33 <-rbind(pulse30, pulse31, pulse32, pulse33)
pulse34_39 <-rbind(pulse34, pulse35, pulse36, pulse37, pulse38, pulse39)
#clean up to make your console cleaner since you have merged into a new data set
rm(pulse30, pulse31, pulse32, pulse33, pulse34,
pulse35, pulse36, pulse37, pulse38, pulse39)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3945652 210.8 6881441 367.6 4494166 240.1
## Vcells 488583534 3727.6 711173747 5425.9 654789490 4995.7
#Select the variables you only need for each of the data sets
library(tidyverse)
pulse30_33<-pulse30_33%>%
select(WEEK, WRKLOSSRV)
pulse34_39<-pulse34_39%>%
select(WEEK, WRKLOSSRV)
#You can successfully merge
pulse_all<-rbind(pulse30_33, pulse34_39)
pulse_all %>%
select(WEEK, WRKLOSSRV) %>%
mutate(work_loss = case_when(WRKLOSSRV<0 ~ NA_real_,
WRKLOSSRV %in% 2 ~ 0,
WRKLOSSRV%in% 1 ~ 1)) %>%
group_by(WEEK) %>%
summarize(mean_work_loss = mean(work_loss, na.rm=TRUE)) %>%
ggplot(aes(x=WEEK, y=mean_work_loss*100)) +
geom_line() +
theme_minimal() +
xlab("HPS wave") +
ylab("") +
ylim(0, 40) +
labs(title = "% Work loss from May 12 til October 11",
subtitle = "Unweighted",
caption = "Source: Census Household Pulse survey")