Library packages
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Import data.
library(Hmisc) # Weighting
library(ggplot2) # Allows us to create nice figures.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
Import data
wave1 <- read_dta("anchor1_50percent_Eng.dta")
# sample size =6201
Clean data
wave1b <- wave1 %>%
transmute(
age=zap_labels(age), #Independent variable
sat6=case_when(sat6<0 ~ as.numeric(NA), #specify when sat should be considered missing
TRUE ~ as.numeric(sat6)) %>% zap_label(), #remove labels of sat6
cdweight=zap_label(cdweight), #cdweight is the variable telling the weight
sex_gen=as_factor(sex_gen) %>% fct_drop(), #treat sex_gen as categorical, and drop unused level
relstat=as_factor(relstat), #treat relationship status as categorical
relstat=case_when(relstat=="-7 Incomplete data" ~ as.character(NA),
TRUE ~ as.character(relstat)) %>% #specify when relstat should be considered missing
as_factor() %>% #make relstat as a factor
fct_drop() #drop unused levels in relstat
) %>%
drop_na() #drop all observations with missing values in the sample
# sample size change from 6201 to 6162
Generate a new variable for marital status
wave1c <- wave1b %>%
mutate(
marital1=case_when(
relstat %in% c("1 Never married single","2 Never married LAT","3 Never married COHAB") ~ "Nevermarried",
# when relstat has any of the three situations, I assign "Nevermarried" to new variable "marital1"
relstat %in% c("4 Married COHAB","5 Married noncohabiting") ~ 'Married',
# when relstat has any of the two situations, I assign "Married" to new variable "marital1"
relstat %in% c("6 Divorced/separated single","7 Divorced/separated LAT","8 Divorced/separated COHAB") ~ 'Divorced',
# when relstat has any of the three situations, I assign "Divorced" to new variable "marital1"
relstat %in% c("9 Widowed single","10 Widowed LAT") ~ 'Widow'
# when relstat has any of the two situations, I assign "Widow" to new variable "marital1"
) %>% as_factor()# I treat marital1 as a categorical variable
) %>% filter(marital1!= "Widow") #only 4 cases are widowed, dropping.
# sample size change to 6158 after dropping those widowed