Data analysis of uptake experiment for parenting attitudes questionnaire. Do scores on the PAQ subscales predict uptake of experimental content compared to control content?
Data preprocessing
Preliminaries.
## [1] "dplyr" "langcog" "tidyr" "ggplot2" "lme4"
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## %+%(): ggplot2, psych
## alpha(): ggplot2, psych
## filter(): dplyr, stats
## lag(): dplyr, stats
##
## Attaching package: 'ggthemes'
## The following objects are masked from 'package:langcog':
##
## scale_color_solarized, scale_colour_solarized,
## scale_fill_solarized
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
Read in participant data.
files <- dir("../production-results/uptake_e4/")
answers <- data.frame()
attitudes <- data.frame()
subinfo <- data.frame()
for (f in files) {
jf <- paste("../production-results/uptake_e4/",f,sep="")
jd <- fromJSON(paste(readLines(jf), collapse=""))
#uptake responses
answers_id <- data.frame(
answer= jd$answers$data$answer,
item = jd$answers$data$item[jd$answers$data$trial_type =="uptake"],
workerid = jd$WorkerId)
answers <- bind_rows(answers, answers_id)
attitudes_id <- data.frame(workerid = jd$WorkerId,
sent = jd$answers$data$sentence[jd$answers$data$trial_type=="attitudes"],
rating = as.numeric(jd$answers$data$rating[jd$answers$data$trial_type=="attitudes"]))
attitudes <- bind_rows(attitudes, attitudes_id)
#questionnaire and demo
subinfo_id <- data.frame(workerid = jd$WorkerId,
children = jd$answers$data$children,
language = jd$answers$data$homelang,
ses = jd$answers$data$ladder,
gender = jd$answers$data$gender,
age = jd$answers$data$age,
education = jd$answers$data$education,
ethnicity = jd$answers$data$ethnicity,
race = as.character(jd$answers$data$race[1]),
rt_exp1 = jd$answers$data$target1_rt,
rt_exp2 = jd$answers$data$target2_rt,
rt_con1 = jd$answers$data$control1_rt,
rt_con2 = jd$answers$data$control2_rt)
subinfo <- bind_rows(subinfo, subinfo_id)
}
Read in trial info and questionnaire labels.
labels <- read.csv("sent_forms_uptake_e4.csv")
labels$sent <- as.character(labels$sent)
answer_key <- read.csv("uptake_key_e4.csv")
Clean up labels.
attitudes$sent <- stringr::str_replace_all(as.character(attitudes$sent), "[â‘”“’']", "")
Plot demographic info.
subinfo$education <- factor(subinfo$education,
levels = c("highSchool","someCollege",
"4year","someGrad","Grad"))
qplot(ses, data=subinfo)

qplot(children, data=subinfo)

qplot(gender, data=subinfo)

qplot(education, data=subinfo)

qplot(age, data=subinfo)

qplot(language, data=subinfo)

qplot(ethnicity, data=subinfo)

Data Frames and Exclusions
Make data frames.
Questionnaire attitude means.
dq <- attitudes %>%
left_join(labels)
dq$rating[dq$reverse_code == 1] <- 6 - dq$rating[dq$reverse_code == 1]
atts <- dq %>%
group_by(workerid, category) %>%
summarise(rating = mean(rating))
Uptake questions.
Exclusions
Setting up exclusion based on reading time.
exclusions <- as_tibble(subinfo) %>%
select(workerid, starts_with("rt")) %>%
gather(article, rt, rt_exp1, rt_exp2, rt_con1, rt_con2) %>%
mutate(article = stringr::str_replace(stringr::str_replace(stringr::str_replace(article,"rt_", ""),
"exp", "e"), "con","c"))
ggplot(exclusions, aes(x = rt)) +
geom_histogram() +
facet_wrap(~article) +
geom_vline(xintercept = 15, lty = 2, col="red")
Exclude for less than 15s.
exclusions$exclude <- exclusions$rt < 15
This constitutes 0.077 of the data.
Questionnaire
Look at mean ratings across sentences.
ms <- dq %>%
group_by(category, short_sent, reverse_code) %>%
multi_boot_standard(col = "rating") %>%
arrange(category, desc(mean))
ms$short_sent_ord <- factor(ms$short_sent,
levels = ms$short_sent)
Plot responses to individual questionnaire items.
qplot(short_sent_ord, mean, col = category,
ymin = ci_lower, ymax = ci_upper, pch = factor(reverse_code),
geom = "pointrange",
data = ms) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
xlab("") +
ylab("Mean Rating") +
ylim(c(0,6)) +
scale_colour_solarized()

Plot mean subscale scores.
atts_m <- dq %>%
group_by(category) %>%
multi_boot_standard(col = "rating") %>%
arrange(category, desc(mean))
ggplot(atts_m, aes(x = category, y = mean)) +
geom_bar(stat="identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))

Information Uptake Analyses
Get accuracy data.
answers$answer <- as.character(answers$answer)
answer_key$answer_cor <- as.character(answer_key$answer_cor)
uptake <- answers %>%
left_join(answer_key) %>%
mutate(acc = (answer == answer_cor)) %>%
select(workerid, item, acc, q_type, article) %>%
left_join(exclusions) %>%
filter(!exclude)
mss <- uptake %>%
group_by(workerid, q_type) %>%
summarise(acc = mean(acc))
ms <- mss %>%
group_by(q_type) %>%
multi_boot_standard(col = "acc")
Plot mean uptake accuracy for control and experimental articles. Is one question type harder than the other?
ggplot(ms, aes(x = q_type, y = mean)) +
geom_bar(stat="identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
ylim(c(0,1)) +
geom_hline(yintercept = .25, lty = 2)

Test for differences
mss_wide <- spread(mss, q_type, acc) %>%
filter(!is.na(con) & !is.na(exp))
t.test(mss_wide$con, mss_wide$exp, paired=TRUE)
##
## Paired t-test
##
## data: mss_wide$con and mss_wide$exp
## t = -4.8269, df = 230, p-value = 2.528e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07467722 -0.03138339
## sample estimates:
## mean of the differences
## -0.0530303
Items.
ms <- uptake %>%
group_by(item, article, q_type) %>%
multi_boot_standard(col = "acc")
ggplot(ms, aes(x = fct_reorder(item, mean), y = mean, col = article)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
facet_wrap(~article, scales = "free_x") +
ylim(c(0,1)) +
geom_hline(yintercept = .25, lty = 2) +
ylab("Accuracy") + xlab("Item") +
theme_few()

Demographic predictors of uptake
Add demographics.
mss <- mss %>%
left_join(subinfo)
By gender.
ms <- mss %>%
filter(gender %in% c("Man","Woman")) %>%
group_by(q_type, gender) %>%
multi_boot_standard(col = "acc")
ggplot(ms, aes(x = q_type, y = mean, fill = gender)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
ylim(c(0,1)) +
geom_hline(yintercept = .25, lty = 2) +
scale_fill_solarized()

By education.
ms <- mss %>%
filter(!education == "") %>%
group_by(q_type, education) %>%
multi_boot_standard(col = "acc")
ggplot(ms, aes(x = q_type, y = mean, fill = education)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
ylim(c(0,1)) +
geom_hline(yintercept = .25, lty = 2) +
scale_fill_solarized()

By race.
ms <- mss %>%
# filter(!education == "") %>%
group_by(q_type, race) %>%
multi_boot_standard(col = "acc")
ggplot(ms, aes(x = q_type, y = mean, fill = race)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
ylim(c(0,1)) +
geom_hline(yintercept = .25, lty = 2) +
scale_fill_solarized()

Plots
Main plot
d <- mss %>%
left_join(atts) %>%
left_join(subinfo) %>%
mutate(q_type = fct_recode(q_type, "Control" = "con", "Experimental" = "exp"))
ggplot(d, aes(x = rating, y = acc, colour = q_type)) +
geom_jitter(height = .02, width = 0, alpha= .3) +
# geom_point(alpha = .3) +
geom_smooth(method="lm") +
facet_grid(.~category) +
ylim(0,1) +
ylab("Accuracy") +
xlab("Subscale Rating") +
scale_color_solarized(name="Condition") +
theme(legend.position = "bottom")+
ggthemes::theme_few()+
theme(legend.title = element_text(size=16),
legend.text = element_text(size=14),
axis.text.x = element_text(vjust=0.5, size=14),
axis.title.x = element_text(size=16),
axis.text.y = element_text(vjust=0.5, size=14),
axis.title.y = element_text(size=16),
strip.text = element_text(size=14))

Subgroup plots
Gender.
ggplot(filter(d, gender %in% c("Man","Woman")),
aes(x = rating, y = acc, colour = category)) +
geom_jitter(height = .02, width = 0, alpha= .3) +
geom_smooth(method="lm", se=FALSE) +
facet_grid(gender~ q_type) +
ylim(0,1) +
ylab("Accuracy") +
xlab("Subscale Rating")

Stats
Set up a dataframe for analysis with exclusions.
d_reg <- atts %>%
left_join(uptake)%>%
group_by(workerid, category, acc, q_type, item, article)%>%
summarise(rating = mean(rating))%>%
spread(category, rating)
d_reg_excl <- d_reg %>%
left_join(exclusions) %>%
filter(!exclude)
Model with affection and attachment.
mod <- glmer(acc ~ q_type * scale(AA) +
q_type * scale(RR) +
q_type * scale(EL) +
(q_type | workerid) +
(1 | item),
data = d_reg_excl,
family = "binomial")
knitr::kable(summary(mod)$coefficients, digits = 3)
| (Intercept) |
1.643 |
0.250 |
6.561 |
0.000 |
| q_typeexp |
0.486 |
0.344 |
1.412 |
0.158 |
| scale(AA) |
0.036 |
0.118 |
0.304 |
0.761 |
| scale(RR) |
-0.180 |
0.094 |
-1.918 |
0.055 |
| scale(EL) |
0.617 |
0.119 |
5.200 |
0.000 |
| q_typeexp:scale(AA) |
0.335 |
0.118 |
2.842 |
0.004 |
| q_typeexp:scale(RR) |
0.019 |
0.096 |
0.195 |
0.845 |
| q_typeexp:scale(EL) |
-0.201 |
0.116 |
-1.731 |
0.084 |
# run optimizer for longer
ss <- getME(mod,c("theta","fixef"))
mod2 <- update(mod,start=ss,control=glmerControl(optCtrl=list(maxfun=2e4)))
knitr::kable(summary(mod2)$coefficients, digits = 3)
#no interactions
mod3 <- glmer(acc ~ q_type + scale(AA) + scale(RR) + scale(EL) +
(q_type | workerid) +
(1 | item),
data = d_reg_excl,
family = "binomial")
summary(mod3)
ss <- getME(mod3,c("theta","fixef"))
mod4 <- update(mod3,start=ss,control=glmerControl(optCtrl=list(maxfun=2e4)))
knitr::kable(summary(mod4)$coefficients, digits = 3)
Model with no AA scores.
mod <- glmer(acc ~ q_type * scale(RR) +
q_type * scale(EL) +
(q_type | workerid) +
(1 | item),
data = d_reg_excl,
family = "binomial")
knitr::kable(summary(mod)$coefficients, digits = 3)