Rewrite the following code chunks using the %>% operator.
#Question: Rewrite the following code chunks using the %>% operator.**
#log(sd(c(5, 13, 89)))
#as.numeric(scale(c(100, 32, 45)))
library(tidyverse)
c(5, 13, 89) %>% sd() %>% log()
## [1] 3.836456
c(100, 32, 45) %>% scale() %>% as.numeric()
## [1] 1.1358256 -0.7479827 -0.3878429
Use Wave 1 data to see which cohort have the highest proportion of agreeing and agreeing completely with the argument “A child under age 6 will suffer from having a working mother” (i.e. val1i5).
#Question: Use Wave 1 data to see which cohort have the highest proportion of agreeing and agreeing completely with the argument "A child under age 6 will suffer from having a working mother" (i.e. val1i5).
library(tidyverse)
library(haven)
library(Hmisc)
wave1 <- read_dta("anchor1_50percent_Eng.dta")
table(wave1$val1i5) #have a look at the frequency of val1i5
##
## -2 -1 1 2 3 4 5
## 5 92 1201 1173 1660 1117 953
#val1i5 is a question with answer ranged from 1 disagree completely to 5 agree completely
#while also some observation of -1 (don't know) or -2 (no answer)
wave1a <- wave1 %>% #pipe dataset wave1 into the following code, Rstudos recgnized the transmute is for wave 1
transmute( # Create new variables and keep only those
cdweight=zap_label(cdweight), #remove labels of variable "cdweight"
cohort=as_factor(cohort) %>% fct_drop(), #treat cohort as a categorical variable and pipe into fct_drop() to drop unused levels
val1i5=case_when(val1i5<0 ~ as.numeric(NA), #when val1i5<0, make it missing
TRUE ~ as.numeric(val1i5)), #the rest observation remained unchanged
agree_val1i5=case_when(val1i5>3 ~ 1, val1i5 %in% c(1:3) ~ 0,
TRUE ~ as.numeric(NA)) # the rest are assigned NA
#create a new variable called "agree_val1i5", when val1i5 is >3, assign 1 to agree_val1i5
#when val1i5 is equal to 1, 2, 3, assign 0 to agree_val1i5
#I use %in% to say when value of val1i5 belongs to 1-3
# c(1:3) represents a vector of values 1, 2, 3, I use ":" to mean from 1 to 3.
)
#Now I use the wave1a produced from the above code
wave1a %>% drop_na() %>% #this is the new command, drop all missing observations
group_by(cohort) %>% #pipe wave1a to group_by(cohort), now the wave1a is grouped based on cohort
summarise( #pipe the grouped data to do some summary calculation under function summarise()
wn=sum(cdweight), #get the weighted sample size by cohort
w_val1i5=sum(cdweight*agree_val1i5), #get the weighted sum of agree_val1i5, excluding missing
wp_val1i5=(100*w_val1i5/wn)) #get the weighted proportion of those who say agree or completely agree
## # A tibble: 3 × 4
## cohort wn w_val1i5 wp_val1i5
## <fct> <dbl> <dbl> <dbl>
## 1 1 1991-1993 1817. 717. 39.4
## 2 2 1981-1983 2055. 650. 31.6
## 3 3 1971-1973 2218. 712. 32.1
#Or you can simply use
wave1a %>%
group_by(cohort) %>% #pipe wave1a to group_by(cohort), now the wave1a is grouped based on cohort
summarise( #pipe the grouped data to do some summary calculation under function summarise()
wtd.mean(x=agree_val1i5, weight=cdweight, na.rm=TRUE)) #get the weighted proportion of those who say agree or completely agree
## # A tibble: 3 × 2
## cohort `wtd.mean(x = agree_val1i5, weight = cdweight, na.rm = TRUE)`
## <fct> <dbl>
## 1 1 1991-1993 0.394
## 2 2 1981-1983 0.316
## 3 3 1971-1973 0.321
Calculate the correlation between education years (yeduc) and egalitarian attitudes in gender division of housework (val1i4) for three cohorts (you don’t need to consider weight here)
#Calculate the correlation between education years (yeduc) and egalitarian attitudes in gender division of housework (val1i4) for three cohorts
wave1d <- wave1 %>% #this part is very simlar the the exercise above
transmute( #this part is very simlar the the exercise above
cohort=as_factor(cohort), #this part is very simlar the the exercise above
yeduc=case_when(yeduc<0 ~ as.numeric(NA), TRUE ~ as.numeric(yeduc)) %>% zap_label(),
#when yeduc<0, make it NA; and the rest take their original value;and pipe yeduc into zap_label() to take off labels
val1i4=case_when(val1i4<0 ~ as.numeric(NA), # when val1i4 is <4, make it missing
TRUE ~ as.numeric(val1i4)) # the rest remains unchanged.
)
wave1d %>% group_by(cohort) %>% #pipe dataset into group(cohort), now it is grouped by cohort.
summarise( #do some summary calculation under summarise()
cor(yeduc, val1i4,use = "complete.obs") #calculate correlation coefficient between yeduc and val1i4, use = "complete.obs" is to say exluding missing
)
## # A tibble: 3 × 2
## cohort `cor(yeduc, val1i4, use = "complete.obs")`
## <fct> <dbl>
## 1 1 1991-1993 -0.0662
## 2 2 1981-1983 0.105
## 3 3 1971-1973 0.0418
#or you can run the following
wave1d %>%
drop_na() %>% #use drop_na() to drop missing first
group_by(cohort) %>% #pipe dataset into group(cohort), now it is grouped by cohort.
summarise( #do some summary calculation under summarise()
cor(yeduc, val1i4) #no need to add in " use = "complete.obs" "
)
## # A tibble: 3 × 2
## cohort `cor(yeduc, val1i4)`
## <fct> <dbl>
## 1 1 1991-1993 -0.0662
## 2 2 1981-1983 0.105
## 3 3 1971-1973 0.0418