Library and Import
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Import data.
library(janitor)# Cleaning data
library(ggplot2) # Allows us to create nice figures.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
library(broom) # Allows us to turn model objects into tibbles.
wave1 <- read_dta("anchor1_50percent_Eng.dta")
# sample size =6201
Recoding
wave1b <- wave1 %>%
transmute(
age,
sat6=case_when(sat6<0 ~ as.numeric(NA), #specify when sat should be considered missing
TRUE ~ as.numeric(sat6)),
sex_gen=as_factor(sex_gen) %>% fct_drop(), #treat sex_gen as categorical, and drop unused level
relstat=as_factor(relstat), #treat relationship status as categorical
relstat_new1=case_when(
relstat=="-7 Incomplete data" ~ as.character(NA),#specify when it should be missing
TRUE ~ as.character(relstat)
) %>% as_factor() %>% fct_drop()
#make relstat as a factor, and then drop unused levels in relstat_new1
) %>%
drop_na() #drop all observations with missing values in the sample
# sample size change from 6201 to 6162
Further selection
wave1c <- wave1b %>%
mutate(
relstat_new2=case_when(
relstat_new1 %in% c("1 Never married single") ~ "single",
#treat 'never married single' as 'single'
relstat_new1 %in% c("2 Never married LAT",
"3 Never married COHAB",
"4 Married COHAB",
"5 Married noncohabiting") ~ 'partnered',
#treat the 4 situations as "partnered"
relstat_new1 %in% c("6 Divorced/separated single",
"7 Divorced/separated LAT",
"8 Divorced/separated COHAB") ~ 'separated',
#treat the 3 situations as "separated"
relstat_new1 %in% c("9 Widowed single",
"10 Widowed LAT") ~ 'widowed'
#treat the 2 situations as "widowed"
) %>% as_factor()# make relstat_new2 as factor
) %>%
filter(relstat_new2!= "widowed" & relstat_new2!= "separated") #only 4 widowed and 284 separated, dropping.
# sample size change to 5874 after dropping widowed and separated