No. 1

Question

Rewrite the following code chunks using the %>% operator.

  • log(sd(c(5, 13, 89)))
  • as.numeric(scale(c(100, 32, 45)))

Answer

#Question: Rewrite the following code chunks using the %>% operator.**
#log(sd(c(5, 13, 89)))
#as.numeric(scale(c(100, 32, 45)))
library(tidyverse)
c(5, 13, 89) %>% sd() %>% log()
## [1] 3.836456
c(100, 32, 45) %>% scale() %>% as.numeric()
## [1]  1.1358256 -0.7479827 -0.3878429

No. 2

Question

Use Wave 1 data to see which cohort have the highest proportion of agreeing and agreeing completely with the argument “A child under age 6 will suffer from having a working mother” (i.e. val1i5).

Answer

#Question: Use Wave 1 data to see which cohort have the highest proportion of agreeing and agreeing completely with the argument "A child under age 6 will suffer from having a working mother" (i.e. val1i5).
library(tidyverse) 
library(haven)
library(Hmisc)
library(wCorr)
wave1 <- read_dta("anchor1_50percent_Eng.dta")
table(wave1$cohort) #have a look at the frequency of cohort
## 
##    1    2    3 
## 2173 2013 2015
table(wave1$val1i5) #have a look at the frequency of val1i5
## 
##   -2   -1    1    2    3    4    5 
##    5   92 1201 1173 1660 1117  953
#val1i5 is a question with answer ranged from 1 disagree completely to 5 agree completely 
#while also some observation of -1 (don't know) or -2 (no answer)

wave1a <-  wave1 %>%  #pipe dataset wave1 into the following code, Rstudos recgnized the transmute is for wave 1
  transmute( # Create new variables and keep only those
    
    cdweight=zap_label(cdweight), #remove labels of variable "cdweight"
    
    cohort=as_factor(cohort) %>% fct_drop(), #treat cohort as a categorical variable and pipe into fct_drop() to drop unused levels
    
    val1i5=case_when(val1i5<0 ~ as.numeric(NA),  #when val1i5<0, make it missing
                     TRUE ~ as.numeric(val1i5)), #the rest observation remained unchanged
    
    agree_val1i5=case_when(val1i5>3 ~ 1, val1i5 %in% c(1:3) ~ 0, 
                           TRUE ~ as.numeric(NA)) # the rest are assigned NA
    #create a new variable called "agree_val1i5", when val1i5 is >3, assign 1 to agree_val1i5
    #when val1i5 is equal to 1, 2, 3, assign 0 to agree_val1i5
    #I use %in% to say when value of val1i5 belongs to 1-3
    # c(1:3) represents a vector of values 1, 2, 3, I use ":" to mean from 1 to 3.
  ) 

#Now I use the wave1a produced from the above code
wave1a %>% drop_na() %>%    #this is the new command, drop all missing observations
  
  group_by(cohort) %>% #pipe wave1a to group_by(cohort), now the wave1a is grouped based on cohort
  
  summarise(           #pipe the grouped data to do some summary calculation under function summarise()
    
    wn=sum(cdweight),  #get the weighted sample size by cohort
    
    w_val1i5=sum(cdweight*agree_val1i5), #get the weighted sum of agree_val1i5, excluding missing
    
    wp_val1i5=(100*w_val1i5/wn) #get the weighted proportion of those who say agree or completely agree
    ) 
## # A tibble: 3 × 4
##   cohort         wn w_val1i5 wp_val1i5
##   <fct>       <dbl>    <dbl>     <dbl>
## 1 1 1991-1993 1817.     717.      39.4
## 2 2 1981-1983 2055.     650.      31.6
## 3 3 1971-1973 2218.     712.      32.1
#Or you can simply use 
wave1a %>%   
  group_by(cohort) %>% #pipe wave1a to group_by(cohort), now the wave1a is grouped based on cohort
  drop_na() %>%
  summarise(           #pipe the grouped data to do some summary calculation under function summarise()
    wtd.mean(x=agree_val1i5, weight=cdweight)) #get the weighted proportion of those who say agree or completely agree
## # A tibble: 3 × 2
##   cohort      `wtd.mean(x = agree_val1i5, weight = cdweight)`
##   <fct>                                                 <dbl>
## 1 1 1991-1993                                           0.394
## 2 2 1981-1983                                           0.316
## 3 3 1971-1973                                           0.321

No. 3

Question

Calculate the correlation between education years (yeduc) and egalitarian attitudes toward gender division of housework (val1i4) for three cohorts: compare when not considering weight and when considering weight

Answer

#Calculate the correlation between education years (yeduc) and egalitarian attitudes in gender division of housework (val1i4) for three cohorts
wave1d <-  wave1 %>% #this part is very similar the the exercise above
  transmute(  #this part is very similar the the exercise above
    cdweight=zap_label(cdweight), #remove label of weight

    cohort=as_factor(cohort), #this part is very similar the the exercise above
    
    yeduc=case_when(yeduc<0 ~ as.numeric(NA), TRUE ~ as.numeric(yeduc)) %>% zap_label(), 
    #when yeduc<0, make it NA; and the rest take their original value;and pipe yeduc into zap_label() to take off labels
    
    val1i4=case_when(val1i4<0 ~ as.numeric(NA),  # when val1i4 is <4, make it missing
                     TRUE ~ as.numeric(val1i4))  # the rest remains unchanged.
  ) 
#estimate correlation when not considering weight
wave1d %>%  group_by(cohort) %>% #pipe dataset into group(cohort), now it is grouped by cohort.
  drop_na() %>%
  summarise( #do some summary calculation under summarise()
    cor(yeduc, val1i4) #calculate correlation coefficient between yeduc and val1i4
    ) 
## # A tibble: 3 × 2
##   cohort      `cor(yeduc, val1i4)`
##   <fct>                      <dbl>
## 1 1 1991-1993              -0.0662
## 2 2 1981-1983               0.105 
## 3 3 1971-1973               0.0418
#estimate correlation when considering weight
wave1d %>%  group_by(cohort) %>% #pipe dataset into group(cohort), now it is grouped by cohort.
  drop_na() %>%
  summarise(
    weightedCorr(x=yeduc, y=val1i4, method="Pearson", weights=cdweight)
  )
## # A tibble: 3 × 2
##   cohort      weightedCorr(x = yeduc, y = val1i4, method = "Pearson", weights …¹
##   <fct>                                                                    <dbl>
## 1 1 1991-1993                                                            -0.0755
## 2 2 1981-1983                                                             0.113 
## 3 3 1971-1973                                                             0.0469
## # … with abbreviated variable name
## #   ¹​`weightedCorr(x = yeduc, y = val1i4, method = "Pearson", weights = cdweight)`