Data analysis of uptake experiment for parenting attitudes questionnaire. Do scores on the PAQ subscales predict uptake of experimental content compared to control content?

1 Data preprocessing

Preliminaries.

## [1] "dplyr"   "langcog" "tidyr"   "ggplot2" "lme4"
## 
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
## 
##     scale
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## %+%():    ggplot2, psych
## alpha():  ggplot2, psych
## filter(): dplyr, stats
## lag():    dplyr, stats
## 
## Attaching package: 'ggthemes'
## The following objects are masked from 'package:langcog':
## 
##     scale_color_solarized, scale_colour_solarized,
##     scale_fill_solarized
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand

Read in participant data.

files <- dir("../production-results/uptake_e4/")
answers <- data.frame()
attitudes <- data.frame()
subinfo <- data.frame()

for (f in files) {
  jf <- paste("../production-results/uptake_e4/",f,sep="")
  jd <- fromJSON(paste(readLines(jf), collapse=""))

  #uptake responses
  answers_id <- data.frame(
    answer= jd$answers$data$answer,
    item = jd$answers$data$item[jd$answers$data$trial_type =="uptake"],
    workerid = jd$WorkerId)
  
  answers <- bind_rows(answers, answers_id)
  
  attitudes_id <- data.frame(workerid = jd$WorkerId, 
                   sent = jd$answers$data$sentence[jd$answers$data$trial_type=="attitudes"],
                   rating = as.numeric(jd$answers$data$rating[jd$answers$data$trial_type=="attitudes"])) 
                   
  attitudes <- bind_rows(attitudes, attitudes_id)
  
  #questionnaire and demo
  subinfo_id <- data.frame(workerid = jd$WorkerId, 
                   children = jd$answers$data$children,
                   language = jd$answers$data$homelang,
                   ses = jd$answers$data$ladder,
                   gender = jd$answers$data$gender,
                   age = jd$answers$data$age,
                   education = jd$answers$data$education,
                   ethnicity = jd$answers$data$ethnicity,
                   race = as.character(jd$answers$data$race[1]),
                   rt_exp1 = jd$answers$data$target1_rt,
                   rt_exp2 = jd$answers$data$target2_rt,
                   rt_con1 = jd$answers$data$control1_rt,
                   rt_con2 = jd$answers$data$control2_rt)
  subinfo <- bind_rows(subinfo, subinfo_id)
}

Read in trial info and questionnaire labels.

labels <- read.csv("sent_forms_uptake_e4.csv")
labels$sent <- as.character(labels$sent)

answer_key <- read.csv("uptake_key_e4.csv")

Clean up labels.

attitudes$sent <- stringr::str_replace_all(as.character(attitudes$sent), "[â‘”“’']", "")

Plot demographic info.

subinfo$education <- factor(subinfo$education, 
                            levels = c("highSchool","someCollege",
                                       "4year","someGrad","Grad"))

qplot(ses, data=subinfo)

qplot(children, data=subinfo)

qplot(gender, data=subinfo)

qplot(education, data=subinfo)

qplot(age, data=subinfo)

qplot(language, data=subinfo)

qplot(ethnicity, data=subinfo)

2 Data Frames and Exclusions

2.1 Make data frames.

Questionnaire attitude means.

dq <- attitudes %>%
  left_join(labels) 

dq$rating[dq$reverse_code == 1] <- 6 - dq$rating[dq$reverse_code == 1]

atts <- dq %>%
  group_by(workerid, category) %>% 
  summarise(rating = mean(rating))

Uptake questions.

2.2 Exclusions

Setting up exclusion based on reading time.

exclusions <- as_tibble(subinfo) %>%
  select(workerid, starts_with("rt")) %>%
  gather(article, rt, rt_exp1, rt_exp2, rt_con1, rt_con2) %>%
  mutate(article = stringr::str_replace(stringr::str_replace(stringr::str_replace(article,"rt_", ""), 
                                           "exp", "e"), "con","c"))

ggplot(exclusions, aes(x = rt)) + 
  geom_histogram() + 
  facet_wrap(~article) +
  geom_vline(xintercept = 15, lty = 2, col="red")

Exclude for less than 15s.

exclusions$exclude <- exclusions$rt < 15

This constitutes 0.077 of the data.

3 Questionnaire

Look at mean ratings across sentences.

ms <- dq %>%
  group_by(category, short_sent, reverse_code) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(category, desc(mean)) 

ms$short_sent_ord <- factor(ms$short_sent, 
                             levels = ms$short_sent)

Plot responses to individual questionnaire items.

qplot(short_sent_ord, mean, col = category,
      ymin = ci_lower, ymax = ci_upper, pch = factor(reverse_code),
      geom = "pointrange",
      data = ms) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
  xlab("") + 
  ylab("Mean Rating") + 
  ylim(c(0,6)) + 
  scale_colour_solarized()

Plot mean subscale scores.

atts_m <- dq %>%
  group_by(category) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(category, desc(mean)) 

ggplot(atts_m, aes(x = category, y = mean)) + 
  geom_bar(stat="identity") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

4 Information Uptake Analyses

Get accuracy data.

answers$answer <- as.character(answers$answer)
answer_key$answer_cor <- as.character(answer_key$answer_cor)

uptake <- answers %>%
  left_join(answer_key) %>%
  mutate(acc = (answer == answer_cor)) %>%
  select(workerid, item, acc, q_type, article) %>%
  left_join(exclusions) %>%
  filter(!exclude)

mss <- uptake %>%
  group_by(workerid, q_type) %>% 
  summarise(acc = mean(acc))

ms <- mss %>%
  group_by(q_type) %>%
  multi_boot_standard(col = "acc")

Plot mean uptake accuracy for control and experimental articles. Is one question type harder than the other?

ggplot(ms, aes(x = q_type, y = mean)) + 
  geom_bar(stat="identity") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  ylim(c(0,1)) +
  geom_hline(yintercept = .25, lty = 2)

Test for differences

mss_wide <- spread(mss, q_type, acc) %>%
  filter(!is.na(con) & !is.na(exp))

t.test(mss_wide$con, mss_wide$exp, paired=TRUE)
## 
##  Paired t-test
## 
## data:  mss_wide$con and mss_wide$exp
## t = -4.8269, df = 230, p-value = 2.528e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07467722 -0.03138339
## sample estimates:
## mean of the differences 
##              -0.0530303

Items.

ms <- uptake %>%
  group_by(item, article, q_type) %>% 
  multi_boot_standard(col = "acc")

ggplot(ms, aes(x = fct_reorder(item, mean), y = mean, col = article)) + 
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  facet_wrap(~article, scales = "free_x") + 
  ylim(c(0,1)) + 
  geom_hline(yintercept = .25, lty = 2) + 
  ylab("Accuracy") + xlab("Item") + 
  theme_few()

4.1 Demographic predictors of uptake

Add demographics.

mss <- mss %>%
  left_join(subinfo)

By gender.

ms <- mss %>%
  filter(gender %in% c("Man","Woman")) %>%
  group_by(q_type, gender) %>%
  multi_boot_standard(col = "acc")

ggplot(ms, aes(x = q_type, y = mean, fill = gender)) + 
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  ylim(c(0,1)) +
  geom_hline(yintercept = .25, lty = 2) + 
  scale_fill_solarized()

By education.

ms <- mss %>%
  filter(!education == "") %>%
  group_by(q_type, education) %>%
  multi_boot_standard(col = "acc")

ggplot(ms, aes(x = q_type, y = mean, fill = education)) + 
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  ylim(c(0,1)) +
  geom_hline(yintercept = .25, lty = 2) + 
  scale_fill_solarized()

By race.

ms <- mss %>%
  # filter(!education == "") %>%
  group_by(q_type, race) %>%
  multi_boot_standard(col = "acc")

ggplot(ms, aes(x = q_type, y = mean, fill = race)) + 
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  ylim(c(0,1)) +
  geom_hline(yintercept = .25, lty = 2) + 
  scale_fill_solarized()

5 Plots

6 Main plot

d <- mss %>%
  left_join(atts) %>%
  left_join(subinfo) %>%
  mutate(q_type = fct_recode(q_type, "Control" = "con", "Experimental" = "exp"))

ggplot(d, aes(x = rating, y = acc, colour = q_type)) +
 geom_jitter(height = .02, width = 0, alpha= .3) +
  # geom_point(alpha = .3) + 
  geom_smooth(method="lm") + 
  facet_grid(.~category) + 
  ylim(0,1) + 
  ylab("Accuracy") +
  xlab("Subscale Rating") + 
  scale_color_solarized(name="Condition") +
  theme(legend.position = "bottom")+ 
  ggthemes::theme_few()+ 
  theme(legend.title = element_text(size=16), 
        legend.text = element_text(size=14), 
        axis.text.x  = element_text(vjust=0.5, size=14),
        axis.title.x = element_text(size=16), 
        axis.text.y  = element_text(vjust=0.5, size=14),
        axis.title.y = element_text(size=16),
        strip.text = element_text(size=14))

6.1 Subgroup plots

Gender.

ggplot(filter(d, gender %in% c("Man","Woman")),  
       aes(x = rating, y = acc, colour = category)) +
 geom_jitter(height = .02, width = 0, alpha= .3) + 
  geom_smooth(method="lm", se=FALSE) + 
  facet_grid(gender~ q_type) + 
  ylim(0,1) + 
  ylab("Accuracy") +
  xlab("Subscale Rating") 

7 Stats

Set up a dataframe for analysis with exclusions.

d_reg <- atts %>%
  left_join(uptake)%>%
  group_by(workerid, category, acc, q_type, item, article)%>%
  summarise(rating = mean(rating))%>%
  spread(category, rating)

d_reg_excl <- d_reg %>%
  left_join(exclusions) %>%
  filter(!exclude)

Model with affection and attachment.

mod <- glmer(acc ~ q_type * scale(AA) +  
                q_type * scale(RR) + 
                q_type * scale(EL) +
                (q_type | workerid) +
                (1 | item), 
              data = d_reg_excl,  
              family = "binomial")

knitr::kable(summary(mod)$coefficients, digits = 3)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.643 0.250 6.561 0.000
q_typeexp 0.486 0.344 1.412 0.158
scale(AA) 0.036 0.118 0.304 0.761
scale(RR) -0.180 0.094 -1.918 0.055
scale(EL) 0.617 0.119 5.200 0.000
q_typeexp:scale(AA) 0.335 0.118 2.842 0.004
q_typeexp:scale(RR) 0.019 0.096 0.195 0.845
q_typeexp:scale(EL) -0.201 0.116 -1.731 0.084
# run optimizer for longer
ss <- getME(mod,c("theta","fixef"))
mod2 <- update(mod,start=ss,control=glmerControl(optCtrl=list(maxfun=2e4)))

knitr::kable(summary(mod2)$coefficients, digits = 3)

#no interactions
mod3 <- glmer(acc ~ q_type + scale(AA) + scale(RR) + scale(EL) +
                (q_type | workerid) +
                (1 | item), 
              data = d_reg_excl,  
              family = "binomial")

summary(mod3)

ss <- getME(mod3,c("theta","fixef"))
mod4 <- update(mod3,start=ss,control=glmerControl(optCtrl=list(maxfun=2e4)))

knitr::kable(summary(mod4)$coefficients, digits = 3)

Model with no AA scores.

mod <- glmer(acc ~ q_type * scale(RR) + 
                q_type * scale(EL) +
                (q_type | workerid) +
                (1 | item), 
              data = d_reg_excl,  
              family = "binomial")

knitr::kable(summary(mod)$coefficients, digits = 3)