No. 1

Question

Rewrite the following code chunks using the %>% operator.

  • log(sd(c(5, 13, 89)))
  • as.numeric(scale(c(100, 32, 45)))

Answer

#Question: Rewrite the following code chunks using the %>% operator.**
#log(sd(c(5, 13, 89)))
#as.numeric(scale(c(100, 32, 45)))
library(tidyverse)
c(5, 13, 89) %>% sd() %>% log()
## [1] 3.836456
c(100, 32, 45) %>% scale() %>% as.numeric()
## [1]  1.1358256 -0.7479827 -0.3878429

No. 2

Question

Use Wave 1 data to see which cohort have the highest proportion of agreeing and agreeing completely with the argument “A child under age 6 will suffer from having a working mother” (i.e. val1i5).

Answer

#Question: Use Wave 1 data to see which cohort have the highest proportion of agreeing and agreeing completely with the argument "A child under age 6 will suffer from having a working mother" (i.e. val1i5).
library(tidyverse) 
library(haven)
library(Hmisc)
wave1 <- read_dta("anchor1_50percent_Eng.dta")
table(wave1$val1i5) #have a look at the frequency of val1i5
## 
##   -2   -1    1    2    3    4    5 
##    5   92 1201 1173 1660 1117  953
#val1i5 is a question with answer ranged from 1 disagree completely to 5 agree completely 
#while also some observation of -1 (don't know) or -2 (no answer)

wave1a <-  wave1 %>%  #pipe dataset wave1 into the following code, Rstudos recgnized the transmute is for wave 1
  transmute( # Create new variables and keep only those
    cdweight=zap_label(cdweight), #remove labels of variable "cdweight"
    cohort=as_factor(cohort) %>% fct_drop(), #treat cohort as a categorical variable and pipe into fct_drop() to drop unused levels
    val1i5=case_when(val1i5<0 ~ as.numeric(NA),  #when val1i5<0, make it missing
                     TRUE ~ as.numeric(val1i5)), #the rest observation remained unchanged
    agree_val1i5=case_when(val1i5>3 ~ 1, val1i5 %in% c(1:3) ~ 0, 
                           TRUE ~ as.numeric(NA)) # the rest are assigned NA
    #create a new variable called "agree_val1i5", when val1i5 is >3, assign 1 to agree_val1i5
    #when val1i5 is equal to 1, 2, 3, assign 0 to agree_val1i5
    #I use %in% to say when value of val1i5 belongs to 1-3
    # c(1:3) represents a vector of values 1, 2, 3, I use ":" to mean from 1 to 3.
  ) 

#Now I use the wave1a produced from the above code
wave1a %>% drop_na() %>%    #this is the new command, drop all missing observations
  group_by(cohort) %>% #pipe wave1a to group_by(cohort), now the wave1a is grouped based on cohort
  summarise(           #pipe the grouped data to do some summary calculation under function summarise()
    wn=sum(cdweight),  #get the weighted sample size by cohort
    w_val1i5=sum(cdweight*agree_val1i5), #get the weighted sum of agree_val1i5, excluding missing 
    wp_val1i5=(100*w_val1i5/wn)) #get the weighted proportion of those who say agree or completely agree
## # A tibble: 3 × 4
##   cohort         wn w_val1i5 wp_val1i5
##   <fct>       <dbl>    <dbl>     <dbl>
## 1 1 1991-1993 1817.     717.      39.4
## 2 2 1981-1983 2055.     650.      31.6
## 3 3 1971-1973 2218.     712.      32.1
#Or you can simply use 
wave1a %>%   
  group_by(cohort) %>% #pipe wave1a to group_by(cohort), now the wave1a is grouped based on cohort
  summarise(           #pipe the grouped data to do some summary calculation under function summarise()
    wtd.mean(x=agree_val1i5, weight=cdweight, na.rm=TRUE)) #get the weighted proportion of those who say agree or completely agree
## # A tibble: 3 × 2
##   cohort      `wtd.mean(x = agree_val1i5, weight = cdweight, na.rm = TRUE)`
##   <fct>                                                               <dbl>
## 1 1 1991-1993                                                         0.394
## 2 2 1981-1983                                                         0.316
## 3 3 1971-1973                                                         0.321

No. 3

Question

Calculate the correlation between education years (yeduc) and egalitarian attitudes in gender division of housework (val1i4) for three cohorts (you don’t need to consider weight here)

Answer

#Calculate the correlation between education years (yeduc) and egalitarian attitudes in gender division of housework (val1i4) for three cohorts
wave1d <-  wave1 %>% #this part is very simlar the the exercise above
  transmute(  #this part is very simlar the the exercise above
    cohort=as_factor(cohort), #this part is very simlar the the exercise above
    yeduc=case_when(yeduc<0 ~ as.numeric(NA), TRUE ~ as.numeric(yeduc)) %>% zap_label(), 
    #when yeduc<0, make it NA; and the rest take their original value;and pipe yeduc into zap_label() to take off labels  
    val1i4=case_when(val1i4<0 ~ as.numeric(NA),  # when val1i4 is <4, make it missing
                     TRUE ~ as.numeric(val1i4))  # the rest remains unchanged.

  ) 
wave1d %>%  group_by(cohort) %>% #pipe dataset into group(cohort), now it is grouped by cohort.
  summarise( #do some summary calculation under summarise()
    cor(yeduc, val1i4,use = "complete.obs") #calculate correlation coefficient between yeduc and val1i4, use = "complete.obs" is to say exluding missing 
    ) 
## # A tibble: 3 × 2
##   cohort      `cor(yeduc, val1i4, use = "complete.obs")`
##   <fct>                                            <dbl>
## 1 1 1991-1993                                    -0.0662
## 2 2 1981-1983                                     0.105 
## 3 3 1971-1973                                     0.0418
#or you can run the following

wave1d %>%  
  drop_na() %>% #use drop_na() to drop missing first
  group_by(cohort) %>% #pipe dataset into group(cohort), now it is grouped by cohort.
  summarise( #do some summary calculation under summarise()
    cor(yeduc, val1i4) #no need to add in " use = "complete.obs" " 
  )
## # A tibble: 3 × 2
##   cohort      `cor(yeduc, val1i4)`
##   <fct>                      <dbl>
## 1 1 1991-1993              -0.0662
## 2 2 1981-1983               0.105 
## 3 3 1971-1973               0.0418