Data analysis of uptake experiment for parenting attitudes questionnaire. This is a pilot sample of 50 participants to check whether accuracy differs between control and experimental uptake questions.

1 Data preprocessing

Preliminaries.

## [1] "dplyr"   "langcog" "tidyr"   "ggplot2" "lme4"
## 
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
## 
##     scale
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: boot
## 
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
## 
##     logit
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
## 
##     melanoma
## 
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
## 
##     parallel
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand

Read in participant data.

files <- dir("../production-results/uptake_e4/")
answers <- data.frame()
attitudes <- data.frame()
subinfo <- data.frame()

for (f in files) {
  jf <- paste("../production-results/uptake_e4/",f,sep="")
  jd <- fromJSON(paste(readLines(jf), collapse=""))

  #uptake responses
  answers_id <- data.frame(
    trial= jd$answers$data$trial_number_block,
    answer= jd$answers$data$answer,
    item = jd$answers$data$item,
    workerid = jd$WorkerId,
    type = jd$answers$data$trial_type)
  
  answers_id <- answers_id %>%
    filter(type == "uptake") %>%
    select(workerid, item, answer)
  
  answers <- bind_rows(answers, answers_id)
  
  attitudes_id <- data.frame(workerid = jd$WorkerId, 
                   sent = jd$answers$data$sentence[jd$answers$data$trial_type=="attitudes"],
                   rating = as.numeric(jd$answers$data$rating[jd$answers$data$trial_type=="attitudes"])) 
                   
  attitudes <- bind_rows(attitudes, attitudes_id)
  
  #questionnaire and demo
  subinfo_id <- data.frame(workerid = jd$WorkerId, 
                   children = jd$answers$data$children,
                   language = jd$answers$data$homelang,
                   ses = jd$answers$data$ladder,
                   gender = jd$answers$data$gender,
                   age = jd$answers$data$age,
                   education = jd$answers$data$education,
                   ethnicity = jd$answers$data$ethnicity,
                   race = as.character(jd$answers$data$race[1]),
                   rt_exp1 = jd$answers$data$target1_rt,
                   rt_exp2 = jd$answers$data$target2_rt,
                   rt_con1 = jd$answers$data$control1_rt,
                   rt_con2 = jd$answers$data$control2_rt)
  subinfo <- bind_rows(subinfo, subinfo_id)
}

Read in trial info and questionnaire labels.

labels <- read.csv("sent_forms_e7.csv")
labels$sent <- as.character(labels$sent)

answer_key <- read.csv("uptake_key_e4.csv")

Clean up labels.

attitudes$sent <- as.character(attitudes$sent)
attitudes$sent <- str_replace_all(attitudes$sent, "'", "")
attitudes$sent <- str_replace_all(attitudes$sent, "’", "")
attitudes$sent <- str_replace_all(attitudes$sent, "“", "")
attitudes$sent <- str_replace_all(attitudes$sent, "”", "")
attitudes$sent <- str_replace_all(attitudes$sent, "‘", "")
attitudes$sent <- str_replace_all(attitudes$sent, "â", "")

Plot demographic info.

subinfo$education <- factor(subinfo$education, 
                            levels = c("highSchool","someCollege","4year","someGrad","Grad"))

qplot(ses, data=subinfo)

qplot(children, data=subinfo)

qplot(gender, data=subinfo)

qplot(education, data=subinfo)

qplot(age, data=subinfo)

qplot(language, data=subinfo)

qplot(ethnicity, data=subinfo)

2 Questionnaire

Look at mean ratings across sentences.

dq <- attitudes %>%
  left_join(labels) 
dq$rating[dq$reverse_code == 1] <- 6 - dq$rating[dq$reverse_code == 1]

ms <- dq %>%
  group_by(category, short_sent, reverse_code) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(category, desc(mean)) 

ms$short_sent_ord <- factor(ms$short_sent, 
                             levels = ms$short_sent)

Plot responses to individual questionnaire items.

qplot(short_sent_ord, mean, col = category,
      ymin = ci_lower, ymax = ci_upper, pch = factor(reverse_code),
      geom = "pointrange",
      data = ms) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
  xlab("") + 
  ylab("Mean Rating") + 
  ylim(c(0,6)) + 
  scale_colour_solarized()

Plot mean subscale scores.

atts_m <- dq %>%
  group_by(category) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(category, desc(mean)) 

atts <- dq %>%
  group_by(workerid, category) %>% 
  summarise(rating = mean(rating))

ggplot(atts_m, aes(x = category, y = mean)) + 
  geom_bar(stat="identity") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

3 Information Uptake Analyses

Get accuracy data.

uptake <- answers %>%
  left_join(answer_key) %>%
  mutate(acc = (answer == answer_cor))%>%
  select(workerid, item, acc, q_type)

mss<- uptake%>%
  group_by(workerid, q_type) %>% 
  summarise(acc = mean(acc))

ms <- uptake %>%
  group_by(q_type) %>%
  multi_boot_standard(col = "acc")

Plot mean uptake accuracy for control and experimental articles. Is one question type harder than the other?

ggplot(ms, aes(x = q_type, y = mean)) + 
  geom_bar(stat="identity") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

t.test(mss$acc[mss$q_type == 'con'],mss$acc[mss$q_type == 'exp'],paired=TRUE)
## 
##  Paired t-test
## 
## data:  mss$acc[mss$q_type == "con"] and mss$acc[mss$q_type == "exp"]
## t = -3.2271, df = 50, p-value = 0.002209
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.10603989 -0.02467906
## sample estimates:
## mean of the differences 
##             -0.06535948

Yes, people performed significantly worse on control items.

Item analysis. Are there any problematic items?

items <- uptake %>%
  group_by(item) %>%
  multi_boot_standard(col = "acc")

One question in particular has a particularly low accuracy rate (.45). What is the distribution of responses for this item?

c1_4 <- answers%>%
  filter(item == "c1_4")
  
qplot(answer, data=c1_4)

#means without bad item?
ms <- uptake %>%
  filter(!item == "c1_4")%>%
  group_by(q_type) %>%
  multi_boot_standard(col = "acc")
ms
## # A tibble: 2 × 4
##   q_type      mean  ci_lower  ci_upper
##   <fctr>     <dbl>     <dbl>     <dbl>
## 1    con 0.7789661 0.7433155 0.8146168
## 2    exp 0.8169935 0.7859477 0.8480392

Participants frequently selected D as the answer; correct answer was C. This item does seem confusing when I review it. Re-word it for experiment.

3.1 Plots

Create a data frame that has individuals’ subscale scores and accuracy by question type.

d <- atts %>%
  left_join(mss) 
ggplot(d, aes(x = rating, y = acc, col = category)) +
  geom_jitter(height = .02, width = 0, alpha= .3) + 
  geom_smooth(method="lm", se=FALSE) + 
  facet_wrap(~q_type)

Setting up exclusion based on reading time.

exclude <- subinfo$workerid[subinfo$rt_exp1 < 30 | subinfo$rt_exp2 < 30 | subinfo$rt_con1 < 30 | subinfo$rt_con2 < 30]
length(exclude)
## [1] 9
qplot(subinfo$rt_exp1, binwidth = 15) + 
  geom_vline(xintercept =15, lty =2, col="red")

qplot(subinfo$rt_exp2, binwidth = 15) + 
  geom_vline(xintercept =15, lty =2, col="red")

qplot(subinfo$rt_con1, binwidth = 15) + 
  geom_vline(xintercept =15, lty =2, col="red")

qplot(subinfo$rt_con2, binwidth = 15) + 
  geom_vline(xintercept =15, lty =2, col="red")

Exclusions.

ggplot(filter(d, !workerid %in% exclude), 
       aes(x = rating, y = acc, colour = category)) +
 geom_jitter(height = .02, width = 0, alpha= .3) + 
  geom_smooth(method="lm", se=FALSE) + 
  facet_wrap(~q_type) + 
  ylim(0,1)

d.reg <- atts %>%
  left_join(uptake)%>%
  group_by(workerid, category, acc, q_type, item)%>%
  summarise(rating = mean(rating))%>%
  spread(category, rating)

d_an <- d.reg %>%
  filter(!workerid %in% exclude)

3.2 Stats

summary(glmer(acc ~ q_type * rules_respect + q_type * active_learning +
                (q_type * rules_respect + q_type * active_learning|workerid) +
                (q_type * rules_respect + q_type * active_learning|item), 
              data = d_an, 
              family = "binomial"))