Prepare data for OLS with regression

Library packages

library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Import data.
library(Hmisc) # Weighting
library(ggplot2) # Allows us to create nice figures.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.

Import data

wave1 <- read_dta("anchor1_50percent_Eng.dta")
# sample size =6201

Clean data

wave1b <- wave1 %>% 
  transmute(
           age=zap_labels(age), #Independent variable
           
           sat6=case_when(sat6<0 ~ as.numeric(NA), #specify when sat should be considered missing
                          TRUE ~ as.numeric(sat6)) %>% zap_label(), #remove labels of sat6
           
           cdweight=zap_label(cdweight), #cdweight is the variable telling the weight
           
           sex_gen=as_factor(sex_gen) %>% fct_drop(), #treat sex_gen as categorical, and drop unused level
           
           relstat=as_factor(relstat), #treat relationship status as categorical
           relstat=case_when(relstat=="-7 Incomplete data" ~ as.character(NA),
                             TRUE ~ as.character(relstat)) %>%  #specify when relstat should be considered missing
             as_factor() %>%  #make relstat as a factor
             fct_drop() #drop unused levels in relstat
) %>%   
  drop_na() #drop all observations with missing values in the sample
# sample size change from 6201 to 6162

Generate a new variable for marital status

wave1c <- wave1b %>% 
  mutate(
    marital1=case_when(
      relstat %in% c("1 Never married single","2 Never married LAT","3 Never married COHAB") ~ "Nevermarried",
      # when relstat has any of the three situations, I assign "Nevermarried" to new variable "marital1"
      
      relstat %in% c("4 Married COHAB","5 Married noncohabiting") ~ 'Married',
      # when relstat has any of the two situations, I assign "Married" to new variable "marital1"
      
      relstat %in% c("6 Divorced/separated single","7 Divorced/separated LAT","8 Divorced/separated COHAB") ~ 'Divorced',
      # when relstat has any of the three situations, I assign "Divorced" to new variable "marital1"
      
      relstat %in% c("9 Widowed single","10 Widowed LAT") ~ 'Widow'
      # when relstat has any of the two situations, I assign "Widow" to new variable "marital1"
      
    ) %>% as_factor()# I treat marital1 as a categorical variable
  ) %>% filter(marital1!= "Widow") #only 4 cases are widowed, dropping.
# sample size change to 6158 after dropping those widowed

Prepare data for OLS with regression

Mengni Chen

2023-10-06

Library packages

Import data

Clean data

Generate a new variable for marital status