Data analysis of basic parenting values/attitudes survey.

1 Data preprocessing

Preliminaries.

## [1] "dplyr"   "langcog" "tidyr"   "ggplot2" "lme4"
## 
## Attaching package: 'langcog'
## 
## The following object is masked from 'package:base':
## 
##     scale
## 
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## 
## Attaching package: 'ggplot2'
## 
## The following object is masked from 'package:psych':
## 
##     %+%
## 
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: boot
## 
## Attaching package: 'boot'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## 
## The following object is masked from 'package:boot':
## 
##     melanoma
## 
## 
## Attaching package: 'nFactors'
## 
## The following object is masked from 'package:lattice':
## 
##     parallel
## 
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following object is masked from 'package:tidyr':
## 
##     expand

Read in files and consolidate to the same directory.

files <- dir("../production-results/uptake_e1/")
d.raw <- data.frame()

for (f in files) {
  jf <- paste("../production-results/uptake_e1/",f,sep="")
  jd <- fromJSON(paste(readLines(jf), collapse=""))
  
  # clean up different tasks
  trial_type <- jd$answer$data$trial_type
  
  #parenting questionnaire 
  sent <- jd$answers$data$sentence[trial_type != "uptake"]
  rating <- as.numeric(jd$answers$data$rating[trial_type != "uptake"]) 
  
  #uptake experiment
  control_recall_1 = jd$answers$data$answer[1]
  control_recall_2 = jd$answers$data$answer[2]
  control_recall_3 = jd$answers$data$answer[3]
  control_recall_4 = jd$answers$data$answer[4]
  control_recall_5 = jd$answers$data$answer[5]
  target_generalize_1 = jd$answers$data$answer[6]
  target_generalize_2 = jd$answers$data$answer[7]
  target_generalize_3 = jd$answers$data$answer[8]
  target_generalize_4 = jd$answers$data$answer[9]
  target_generalize_5 = jd$answers$data$answer[10]
  target_recall_1 = jd$answers$data$answer[11]
  target_recall_2 = jd$answers$data$answer[12]
  target_recall_3 = jd$answers$data$answer[13]
  target_recall_4 = jd$answers$data$answer[14]
  target_recall_5 = jd$answers$data$answer[15]
  reading_time_target = jd$answers$data$target_rt[1]
  reading_time_control = jd$answers$data$control_rt[1]
  time_questionnaire = jd$answers$data$questionnaire_rt[1]
  
  
  #demographics
  race <- as.character(jd$answers$data$race[1])
  
  id <- data.frame(workerid = jd$WorkerId, 
                   sent = sent,
                   rating = rating,
                   enjoy_target = jd$answers$data$enjoy_target,
                   enjoy_control = jd$answers$data$enjoy_control,
                   reading_ease_target = jd$answers$data$reading_ease_target,
                   reading_ease_control = jd$answers$data$reading_ease_control,
                   target_recall_1 = target_recall_1,
                   target_recall_2 = target_recall_2,
                   target_recall_3 = target_recall_3,
                   target_recall_4 = target_recall_4,
                   target_recall_5 = target_recall_5,
                   target_generalize_1 = target_generalize_1,
                   target_generalize_2 = target_generalize_2,
                   target_generalize_3 = target_generalize_3,
                   target_generalize_4 = target_generalize_4,
                   target_generalize_5 = target_generalize_5,
                   control_recall_1 = control_recall_1,
                   control_recall_2 = control_recall_2,
                   control_recall_3 = control_recall_3,
                   control_recall_4 = control_recall_4,
                   control_recall_5 = control_recall_5,
                   children = jd$answers$data$children,
                   language = jd$answers$data$homelang,
                   ses = jd$answers$data$ladder,
                   gender = jd$answers$data$gender,
                   age = jd$answers$data$age,
                   education = jd$answers$data$education,
                   ethnicity = jd$answers$data$ethnicity,
                   childAgeYoung = jd$answers$data$childAgeYoung,
                   childAgeOld = jd$answers$data$childAgeOld,
                   race = race,
                   reading_time_target = reading_time_target,
                   reading_time_control = reading_time_control,
                   time_questionnaire = time_questionnaire)
  d.raw <- bind_rows(d.raw, id)
}

Map on question short forms so that we can use these instead.

labels <- read.csv("sent_forms_e7.csv")
labels$sent <- as.character(labels$sent)

Clean up labels.

d.raw$sent <- as.character(d.raw$sent)
d.raw$sent <- str_replace_all(d.raw$sent, "'", "")
d.raw$sent <- str_replace_all(d.raw$sent, "’", "")
d.raw$sent <- str_replace_all(d.raw$sent, "“", "")
d.raw$sent <- str_replace_all(d.raw$sent, "”", "")
d.raw$sent <- str_replace_all(d.raw$sent, "‘", "")
d.raw$sent <- str_replace_all(d.raw$sent, "â", "")

Merge. Recode uptake answers by accuracy.

d <- left_join(d.raw, labels)
d$rating[d$reverse_code == 1] <- 6 - d$rating[d$reverse_code == 1]

d$target_generalize_1 <- str_replace(d$target_generalize_1, "b","1")
d$target_generalize_1 <- str_replace(d$target_generalize_1, "a|c","0")
d$target_generalize_2 <- str_replace(d$target_generalize_2, "a","1")
d$target_generalize_2 <- str_replace(d$target_generalize_2, "b|c","0")
d$target_generalize_3 <- str_replace(d$target_generalize_3, "b","1")
d$target_generalize_3 <- str_replace(d$target_generalize_3, "a|c","0")
d$target_generalize_4 <- str_replace(d$target_generalize_4, "c","1")
d$target_generalize_4 <- str_replace(d$target_generalize_4, "a|b","0")
d$target_generalize_5 <- str_replace(d$target_generalize_5, "c","1")
d$target_generalize_5 <- str_replace(d$target_generalize_5, "a|b","0")

d$target_recall_1 <- str_replace(d$target_recall_1, "b","1")
d$target_recall_1 <- str_replace(d$target_recall_1, "a|c","0")
d$target_recall_2 <- str_replace(d$target_recall_2, "a","1")
d$target_recall_2 <- str_replace(d$target_recall_2, "b|c","0")
d$target_recall_3 <- str_replace(d$target_recall_3, "a","1")
d$target_recall_3 <- str_replace(d$target_recall_3, "b|c","0")
d$target_recall_4 <- str_replace(d$target_recall_4, "a","1")
d$target_recall_4 <- str_replace(d$target_recall_4, "b|c","0")
d$target_recall_5 <- str_replace(d$target_recall_5, "c","1")
d$target_recall_5 <- str_replace(d$target_recall_5, "a|b","0")

d$control_recall_1 <- str_replace(d$control_recall_1, "a","1")
d$control_recall_1 <- str_replace(d$control_recall_1, "b|c","0")
d$control_recall_2 <- str_replace(d$control_recall_2, "c","1")
d$control_recall_2 <- str_replace(d$control_recall_2, "a|b","0")
d$control_recall_3 <- str_replace(d$control_recall_3, "a","1")
d$control_recall_3 <- str_replace(d$control_recall_3, "b|c","0")
d$control_recall_4 <- str_replace(d$control_recall_4, "c","1")
d$control_recall_4 <- str_replace(d$control_recall_4, "a|b","0")
d$control_recall_5 <- str_replace(d$control_recall_5, "b","1")
d$control_recall_5 <- str_replace(d$control_recall_5, "a|c","0")

Plot demographic info.

subinfo <- d %>%
  group_by(workerid) %>%
  select(-short_sent, -category, -instrument, -reverse_code) %>%
  distinct 

questions <- subinfo %>%
  select(workerid, starts_with("target"), starts_with("control")) %>%
  gather(question, correct, starts_with("target"), starts_with("control")) %>%
  separate(question, c("passage","trial_type","q_num"), sep = "_") %>%
  group_by(workerid, passage, trial_type) %>%
  summarise(correct = mean(as.numeric(correct))) %>%
  unite(trialtype, passage, trial_type) %>%
  spread(trialtype, correct)

subinfo <- subinfo %>%
  select(-starts_with("target"), -starts_with("control")) %>%
  left_join(questions) %>%
  select(-sent, -rating)

Awful recoding.

subinfo$education <- factor(subinfo$education, 
                            levels = c("highSchool","someCollege","4year","someGrad","Grad"))

subinfo$gender <- str_replace_all(subinfo$gender, 
                              "female|FEMALE|F$|f$|Femal$|Females|Females","Female")
subinfo$gender <- str_replace_all(subinfo$gender, 
                                  "^male|^Male|^MALE|^M$|^m$|^Maleq|Make", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, 
                                  "29|24|25|28|32|33|45", "")
subinfo$gender <- str_replace_all(subinfo$gender, 
                                  "males", "male")
subinfo$gender <- str_replace_all(subinfo$gender, " ", "")

subinfo$language <- str_replace_all(subinfo$language, "english|eNGLISH|Engliah|ENGLISH|^eng$|Enlgish", "English")
subinfo$language <- str_replace_all(subinfo$language," ", "")
subinfo$language <- str_replace_all(subinfo$language,"arabic", "Arabic")
subinfo$language <- str_replace_all(subinfo$language,"chinese", "Chinese")
subinfo$language <- str_replace_all(subinfo$language,"german", "German")
subinfo$language <- str_replace_all(subinfo$language,"tagalog", "Tagalog")

subinfo$youngestChildAge <- factor(subinfo$childAgeYoung, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))

subinfo$oldestChildAge <- factor(subinfo$childAgeOld, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))

subinfo$reading_ease_target <- factor(subinfo$reading_ease_target, levels = c("Very Difficult", "Somewhat Difficult", "Somewhat Easy", "Very Easy"))

subinfo$reading_ease_control <- factor(subinfo$reading_ease_control, levels = c("Very Difficult", "Somewhat Difficult", "Somewhat Easy", "Very Easy"))

subinfo$enjoy_target <- factor(subinfo$enjoy_target, levels = c("Very Unenjoyable", "Somewhat Unenjoyable", "Somewhat Enjoyable", "Very Enjoyable"))

subinfo$enjoy_control <- factor(subinfo$enjoy_control, levels = c("Very Unenjoyable", "Somewhat Unenjoyable", "Somewhat Enjoyable", "Very Enjoyable"))
qplot(ses, data=subinfo)

qplot(children, data=subinfo)

qplot(gender, data=subinfo)

qplot(education, data=subinfo)

qplot(age, data=subinfo)

qplot(language, data=subinfo)

qplot(ethnicity, data=subinfo)

qplot(race, data=subinfo)

qplot(youngestChildAge, data=subinfo)

qplot(oldestChildAge, data=subinfo)

qplot(reading_ease_target, data=subinfo)

qplot(reading_ease_control, data=subinfo)

qplot(enjoy_target, data=subinfo)

qplot(enjoy_control, data=subinfo)

describe(subinfo$target_recall, skew=FALSE, check=FALSE)
##   vars   n mean   sd median trimmed mad min max range   se
## 1    1 100 0.76 0.25    0.8     0.8 0.3   0   1     1 0.02
describe(subinfo$control_recall, skew=FALSE, check=FALSE)
##   vars   n mean   sd median trimmed mad min max range   se
## 1    1 100 0.63 0.24    0.6    0.64 0.3   0   1     1 0.02
describe(subinfo$target_generalize, skew=FALSE, check=FALSE)
##   vars   n mean   sd median trimmed mad min max range   se
## 1    1 100  0.8 0.29      1    0.85   0   0   1     1 0.03
describe(subinfo$reading_time_target, skew=FALSE, check=FALSE)
##   vars   n   mean     sd median trimmed    mad  min max  range    se
## 1    1 100 197.74 154.72 167.34  183.51 160.36 2.41 900 897.59 15.47
describe(subinfo$reading_time_control, skew=FALSE, check=FALSE)
##   vars   n   mean     sd median trimmed    mad  min    max  range    se
## 1    1 100 162.47 146.19 118.12  145.89 155.89 1.39 736.78 735.39 14.62
qplot(reading_time_target, target_recall, data=subinfo)

qplot(reading_time_target, target_generalize, data=subinfo)

qplot(reading_time_control, control_recall, data=subinfo)

2 Basic analyses

Now look at mean ratings across sentences.

rating_count <- table(d$rating) 
rating_count
## 
##   0   1   2   3   4   5   6 
##  36  66 109 206 240 411 732
prop.table(rating_count) 
## 
##          0          1          2          3          4          5 
## 0.02000000 0.03666667 0.06055556 0.11444444 0.13333333 0.22833333 
##          6 
## 0.40666667
ms <- d %>%
  group_by(category, instrument, short_sent, reverse_code) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(instrument, category, desc(mean)) 

ms$short_sent_ord <- factor(ms$short_sent, 
                             levels = ms$short_sent)

Plot attitude.

qplot(short_sent_ord, mean, col = category,
      ymin = ci_lower, ymax = ci_upper, pch = factor(reverse_code),
      geom = "pointrange",
      data = filter(ms, instrument == "attitudes")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
  xlab("") + 
  ylab("Mean Rating") + 
  ylim(c(0,6)) + 
  scale_colour_solarized()

Plot mean subscale scores.

mc <- d %>%
  group_by(category) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(category, desc(mean)) 

ggplot(mc, aes(x = category, y = mean)) + 
  geom_bar(stat="identity") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

mcl <- d %>%
  group_by(category, workerid) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(category, desc(mean))%>%
  spread(category, mean) 

3 Scale reliability

3.1 Whole Scale

wide.attitudes <- d %>% 
  filter(instrument == "attitudes") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.mat <- as.matrix(select(wide.attitudes, -workerid))
summary(alpha(x = alpha.mat))
## 
## Reliability analysis   
##  raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.84      0.86    0.91      0.25 6.2 0.031  4.6 0.76

3.2 Rules and Respect Subscale

wide.rules_respect <- d %>% 
  filter(category == "rules_respect") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.rr <- as.matrix(select(wide.rules_respect, -workerid))
summary(alpha(x = alpha.rr))
## 
## Reliability analysis   
##  raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.72      0.73    0.72      0.31 2.7 0.067  4.2 0.91

3.3 Affection and Attachment Subscale

wide.affection <- d %>% 
  filter(category == "affection") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.af <- as.matrix(select(wide.affection, -workerid))
summary(alpha(x = alpha.af))
## 
## Reliability analysis   
##  raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##       0.78      0.81    0.84      0.41 4.2 0.058  4.8 1.1

3.4 Active Learning Subscale

wide.active_learning <- d %>% 
  filter(category == "active_learning") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.al <- as.matrix(select(wide.active_learning, -workerid))
summary(alpha(x = alpha.al))
## 
## Reliability analysis   
##  raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.74      0.75    0.75      0.33   3 0.064  4.9 0.94

4 Information Uptake Analyses

Create a data frame that has subscale scores.

Standardize ratings within subscale.

ds <- d
ds$srating <- ave(ds$rating, ds$category, FUN=scale)

ss <- ds %>%
  group_by(workerid, category) %>%
  summarize(srating = mean(srating)) %>%
  spread(category, srating) %>% 
  left_join(subinfo) %>%
  gather(trial_type, correct, control_recall, target_recall, target_generalize)

ss.long <- ss %>%
  gather(subscale, srating, active_learning, affection, rules_respect)

4.1 Plots

ggplot(ss.long, aes(x = srating, y = correct, col = subscale)) +
  geom_jitter() + 
  geom_smooth(method="lm", se=FALSE) + 
  facet_wrap(~trial_type)

Now with some exclusions, exploratory.

exclude <- subinfo$workerid[subinfo$reading_time_target < 30 | subinfo$reading_time_control < 30]
length(exclude)
## [1] 25
qplot(subinfo$reading_time_target, binwidth = 15) + 
  geom_vline(xintercept =15, lty =2, col="red")

qplot(subinfo$reading_time_control, binwidth = 15) + 
  geom_vline(xintercept =15, lty =2, col="red")

Replot with exclusions.

ggplot(filter(ss.long, !workerid %in% exclude), 
       aes(x = srating, y = correct, col = subscale)) +
  geom_jitter() + 
  geom_smooth(method="lm", se=FALSE) + 
  facet_wrap(~trial_type)

4.2 Stats

subscales <- ds %>%
  group_by(workerid, category) %>%
  summarize(srating = mean(srating)) %>%
  spread(category, srating) 

ratings <- d %>% 
  select(workerid, starts_with("target"), starts_with("control")) %>%
  gather(question, correct, starts_with("target"), starts_with("control")) %>%
  separate(question, c("passage","trial_type","q_num"), sep = "_") %>%
  mutate(correct = as.numeric(correct), 
         q_num = as.numeric(q_num) + 
           as.numeric(factor(passage)) * 10 + 
           as.numeric(factor(trial_type)) * 100)

d.reg <- left_join(ratings, subscales) %>%
  unite(question_type, passage, trial_type)

Now regression.

summary(glmer(correct ~ question_type + 
                (1|workerid) + 
                (1|q_num), 
              data = d.reg, 
              family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ question_type + (1 | workerid) + (1 | q_num)
##    Data: d.reg
## 
##      AIC      BIC   logLik deviance df.resid 
##  23950.6  23991.6 -11970.3  23940.6    26995 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2819 -0.5399  0.2608  0.5043  3.5126 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 2.9404   1.7148  
##  q_num    (Intercept) 0.2213   0.4704  
## Number of obs: 27000, groups:  workerid, 100; q_num, 15
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      0.8772     0.2689   3.262 0.001107 ** 
## question_typetarget_generalize   1.1482     0.2956   3.885 0.000102 ***
## question_typetarget_recall       0.9165     0.2944   3.113 0.001853 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                (Intr) qstn_typtrgt_g
## qstn_typtrgt_g -0.534               
## qstn_typtrgt_r -0.535  0.486

With exclusions.

summary(glmer(correct ~ question_type + 
                (1|workerid) + 
                (1|q_num), 
              data = filter(d.reg, !workerid %in% exclude), 
              family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ question_type + (1 | workerid) + (1 | q_num)
##    Data: filter(d.reg, !workerid %in% exclude)
## 
##      AIC      BIC   logLik deviance df.resid 
##  14441.2  14480.8  -7215.6  14431.2    20245 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.7009  0.0476  0.2127  0.4045  5.8898 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 3.2321   1.7978  
##  q_num    (Intercept) 0.9239   0.9612  
## Number of obs: 20250, groups:  workerid, 75; q_num, 15
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                      1.2664     0.4808   2.634  0.00844 **
## question_typetarget_generalize   1.5479     0.6145   2.519  0.01178 * 
## question_typetarget_recall       1.5787     0.6136   2.573  0.01009 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                (Intr) qstn_typtrgt_g
## qstn_typtrgt_g -0.638               
## qstn_typtrgt_r -0.635  0.505
# check against means
filter(d.reg, !workerid %in% exclude) %>%
  group_by(question_type) %>%
  summarise(correct = mean(correct))
## Source: local data frame [3 x 2]
## 
##       question_type   correct
##               (chr)     (dbl)
## 1    control_recall 0.6853333
## 2 target_generalize 0.8746667
## 3     target_recall 0.8453333

Naive interaction.

summary(glmer(correct ~ question_type * active_learning + 
                (1|workerid) + 
                (1|q_num), 
              data = filter(d.reg, !workerid %in% exclude), 
              family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ question_type * active_learning + (1 | workerid) +  
##     (1 | q_num)
##    Data: filter(d.reg, !workerid %in% exclude)
## 
##      AIC      BIC   logLik deviance df.resid 
##  14229.7  14293.1  -7106.9  14213.7    20242 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.7729  0.0430  0.2010  0.4091  4.5207 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 3.0750   1.7536  
##  q_num    (Intercept) 0.9452   0.9722  
## Number of obs: 20250, groups:  workerid, 75; q_num, 15
## 
## Fixed effects:
##                                                Estimate Std. Error z value
## (Intercept)                                    -0.06727    1.52364  -0.044
## question_typetarget_generalize                 -3.20695    0.69402  -4.621
## question_typetarget_recall                     -1.01826    0.68519  -1.486
## active_learning                                 0.25896    0.28544   0.907
## question_typetarget_generalize:active_learning  0.97257    0.06808  14.286
## question_typetarget_recall:active_learning      0.52315    0.06577   7.955
##                                                Pr(>|z|)    
## (Intercept)                                       0.965    
## question_typetarget_generalize                 3.82e-06 ***
## question_typetarget_recall                        0.137    
## active_learning                                   0.364    
## question_typetarget_generalize:active_learning  < 2e-16 ***
## question_typetarget_recall:active_learning     1.80e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                  (Intr) qstn_typtrgt_g qstn_typtrgt_r actv_l
## qstn_typtrgt_g   -0.214                                     
## qstn_typtrgt_r   -0.211  0.476                              
## activ_lrnng      -0.949  0.041          0.038               
## qstn_typtrgt_g:_  0.079 -0.468         -0.194         -0.085
## qstn_typtrgt_r:_  0.079 -0.198         -0.458         -0.087
##                  qstn_typtrgt_g:_
## qstn_typtrgt_g                   
## qstn_typtrgt_r                   
## activ_lrnng                      
## qstn_typtrgt_g:_                 
## qstn_typtrgt_r:_  0.426
summary(glmer(correct ~ question_type * affection + 
                (1|workerid) + 
                (1|q_num), 
              data = filter(d.reg, !workerid %in% exclude), 
              family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## correct ~ question_type * affection + (1 | workerid) + (1 | q_num)
##    Data: filter(d.reg, !workerid %in% exclude)
## 
##      AIC      BIC   logLik deviance df.resid 
##  14328.1  14391.5  -7156.1  14312.1    20242 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.0376  0.0434  0.2048  0.3986  5.5596 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 3.0748   1.7535  
##  q_num    (Intercept) 0.9359   0.9674  
## Number of obs: 20250, groups:  workerid, 75; q_num, 15
## 
## Fixed effects:
##                                          Estimate Std. Error z value
## (Intercept)                              -0.22147    1.26208  -0.175
## question_typetarget_generalize           -1.12950    0.66313  -1.703
## question_typetarget_recall               -0.31474    0.66471  -0.473
## affection                                 0.29692    0.23480   1.265
## question_typetarget_generalize:affection  0.56242    0.05587  10.067
## question_typetarget_recall:affection      0.39346    0.05434   7.241
##                                          Pr(>|z|)    
## (Intercept)                                0.8607    
## question_typetarget_generalize             0.0885 .  
## question_typetarget_recall                 0.6359    
## affection                                  0.2060    
## question_typetarget_generalize:affection  < 2e-16 ***
## question_typetarget_recall:affection     4.46e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                 (Intr) qstn_typtrgt_g qstn_typtrgt_r affctn
## qstn_typtrgt_g  -0.264                                     
## qstn_typtrgt_r  -0.265  0.483                              
## affection       -0.925  0.046          0.047               
## qstn_typtrgt_g:  0.077 -0.391         -0.156         -0.085
## qstn_typtrgt_r:  0.080 -0.162         -0.387         -0.089
##                 qstn_typtrgt_g:
## qstn_typtrgt_g                 
## qstn_typtrgt_r                 
## affection                      
## qstn_typtrgt_g:                
## qstn_typtrgt_r:  0.405
summary(glmer(correct ~ question_type * rules_respect + 
                (1|workerid) + 
                (1|q_num), 
              data = filter(d.reg, !workerid %in% exclude), 
              family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: correct ~ question_type * rules_respect + (1 | workerid) + (1 |  
##     q_num)
##    Data: filter(d.reg, !workerid %in% exclude)
## 
##      AIC      BIC   logLik deviance df.resid 
##  14443.0  14506.3  -7213.5  14427.0    20242 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6080  0.0478  0.2130  0.4046  5.8646 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 3.0809   1.7552  
##  q_num    (Intercept) 0.9212   0.9598  
## Number of obs: 20250, groups:  workerid, 75; q_num, 15
## 
## Fixed effects:
##                                               Estimate Std. Error z value
## (Intercept)                                  -0.699116   1.151954  -0.607
## question_typetarget_generalize                1.526304   0.663809   2.299
## question_typetarget_recall                    1.791782   0.663295   2.701
## rules_respect                                 0.460607   0.245032   1.880
## question_typetarget_generalize:rules_respect  0.005629   0.060959   0.092
## question_typetarget_recall:rules_respect     -0.051344   0.059332  -0.865
##                                              Pr(>|z|)   
## (Intercept)                                   0.54392   
## question_typetarget_generalize                0.02149 * 
## question_typetarget_recall                    0.00691 **
## rules_respect                                 0.06014 . 
## question_typetarget_generalize:rules_respect  0.92643   
## question_typetarget_recall:rules_respect      0.38683   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                  (Intr) qstn_typtrgt_g qstn_typtrgt_r rls_rs
## qstn_typtrgt_g   -0.288                                     
## qstn_typtrgt_r   -0.287  0.489                              
## rules_rspct      -0.909  0.045          0.045               
## qstn_typtrgt_g:_  0.081 -0.384         -0.152         -0.088
## qstn_typtrgt_r:_  0.084 -0.155         -0.377         -0.092
##                  qstn_typtrgt_g:_
## qstn_typtrgt_g                   
## qstn_typtrgt_r                   
## rules_rspct                      
## qstn_typtrgt_g:_                 
## qstn_typtrgt_r:_  0.393
summary(glmer(correct ~ question_type * rules_respect + 
                question_type * active_learning + 
                question_type * affection + 
                (1|workerid) + 
                (1|q_num), 
              data = filter(d.reg, !workerid %in% exclude), 
              family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## correct ~ question_type * rules_respect + question_type * active_learning +  
##     question_type * affection + (1 | workerid) + (1 | q_num)
##    Data: filter(d.reg, !workerid %in% exclude)
## 
##      AIC      BIC   logLik deviance df.resid 
##  14193.0  14303.8  -7082.5  14165.0    20236 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7236  0.0403  0.1998  0.4053  4.9276 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  workerid (Intercept) 2.9396   1.7145  
##  q_num    (Intercept) 0.9485   0.9739  
## Number of obs: 20250, groups:  workerid, 75; q_num, 15
## 
## Fixed effects:
##                                                Estimate Std. Error z value
## (Intercept)                                    -2.11317    1.68638  -1.253
## question_typetarget_generalize                 -3.14545    0.73205  -4.297
## question_typetarget_recall                     -1.07483    0.72775  -1.477
## rules_respect                                   0.50869    0.24812   2.050
## active_learning                                -0.07878    0.32313  -0.244
## affection                                       0.31998    0.25589   1.250
## question_typetarget_generalize:rules_respect   -0.24694    0.06584  -3.750
## question_typetarget_recall:rules_respect       -0.20213    0.06278  -3.220
## question_typetarget_generalize:active_learning  0.92039    0.07863  11.705
## question_typetarget_recall:active_learning      0.46282    0.07638   6.060
## question_typetarget_generalize:affection        0.25705    0.06298   4.081
## question_typetarget_recall:affection            0.25139    0.06055   4.152
##                                                Pr(>|z|)    
## (Intercept)                                    0.210177    
## question_typetarget_generalize                 1.73e-05 ***
## question_typetarget_recall                     0.139699    
## rules_respect                                  0.040347 *  
## active_learning                                0.807385    
## affection                                      0.211120    
## question_typetarget_generalize:rules_respect   0.000176 ***
## question_typetarget_recall:rules_respect       0.001282 ** 
## question_typetarget_generalize:active_learning  < 2e-16 ***
## question_typetarget_recall:active_learning     1.36e-09 ***
## question_typetarget_generalize:affection       4.48e-05 ***
## question_typetarget_recall:affection           3.30e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                       (Intr) qstn_typtrgt_g qstn_typtrgt_r rls_rs actv_l
## qstn_typtrgt_g        -0.170                                            
## qstn_typtrgt_r        -0.179  0.466                                     
## rules_rspct           -0.422  0.005          0.010                      
## activ_lrnng           -0.402  0.014          0.017         -0.320       
## affection             -0.365  0.004          0.007          0.147 -0.488
## qstn_typtrgt_gnrlz:r_  0.025 -0.239         -0.080         -0.083  0.032
## qstn_typtrgt_rcll:r_   0.030 -0.083         -0.232         -0.089  0.032
## qstn_typtrgt_gnrlz:c_  0.034 -0.251         -0.102          0.036 -0.092
## qstn_typtrgt_rcll:c_   0.037 -0.106         -0.245          0.035 -0.095
## qstn_typtrgt_g:        0.026 -0.192         -0.082         -0.007  0.039
## qstn_typtrgt_r:        0.029 -0.084         -0.193         -0.008  0.041
##                       affctn qstn_typtrgt_gnrlz:r_ qstn_typtrgt_rcll:r_
## qstn_typtrgt_g                                                         
## qstn_typtrgt_r                                                         
## rules_rspct                                                            
## activ_lrnng                                                            
## affection                                                              
## qstn_typtrgt_gnrlz:r_ -0.008                                           
## qstn_typtrgt_rcll:r_  -0.008  0.400                                    
## qstn_typtrgt_gnrlz:c_  0.042 -0.264                -0.152              
## qstn_typtrgt_rcll:c_   0.043 -0.150                -0.278              
## qstn_typtrgt_g:       -0.080  0.012                 0.032              
## qstn_typtrgt_r:       -0.086  0.032                 0.044              
##                       qstn_typtrgt_gnrlz:c_ qstn_typtrgt_rcll:c_
## qstn_typtrgt_g                                                  
## qstn_typtrgt_r                                                  
## rules_rspct                                                     
## activ_lrnng                                                     
## affection                                                       
## qstn_typtrgt_gnrlz:r_                                           
## qstn_typtrgt_rcll:r_                                            
## qstn_typtrgt_gnrlz:c_                                           
## qstn_typtrgt_rcll:c_   0.441                                    
## qstn_typtrgt_g:       -0.412                -0.166              
## qstn_typtrgt_r:       -0.170                -0.422              
##                       qstn_typtrgt_g:
## qstn_typtrgt_g                       
## qstn_typtrgt_r                       
## rules_rspct                          
## activ_lrnng                          
## affection                            
## qstn_typtrgt_gnrlz:r_                
## qstn_typtrgt_rcll:r_                 
## qstn_typtrgt_gnrlz:c_                
## qstn_typtrgt_rcll:c_                 
## qstn_typtrgt_g:                      
## qstn_typtrgt_r:        0.397         
## convergence code: 0
## Model failed to converge with max|grad| = 0.00207284 (tol = 0.001, component 1)

The analysis suggested by stats consultant:

fit <- lm(correct ~ question_type + active_learning + rules_respect + question_type*active_learning + question_type*rules_respect, 
              data = filter(d.reg, !workerid %in% exclude))
summary(fit) 
## 
## Call:
## lm(formula = correct ~ question_type + active_learning + rules_respect + 
##     question_type * active_learning + question_type * rules_respect, 
##     data = filter(d.reg, !workerid %in% exclude))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.98763  0.04222  0.13408  0.24572  0.51821 
## 
## Coefficients:
##                                                 Estimate Std. Error
## (Intercept)                                     0.678037   0.004808
## question_typetarget_generalize                  0.176750   0.006800
## question_typetarget_recall                      0.154876   0.006800
## active_learning                                 0.016927   0.009754
## rules_respect                                   0.089073   0.008739
## question_typetarget_generalize:active_learning  0.146261   0.013794
## question_typetarget_recall:active_learning      0.075587   0.013794
## question_typetarget_generalize:rules_respect   -0.068474   0.012359
## question_typetarget_recall:rules_respect       -0.058283   0.012359
##                                                t value Pr(>|t|)    
## (Intercept)                                    141.016  < 2e-16 ***
## question_typetarget_generalize                  25.993  < 2e-16 ***
## question_typetarget_recall                      22.776  < 2e-16 ***
## active_learning                                  1.735   0.0827 .  
## rules_respect                                   10.192  < 2e-16 ***
## question_typetarget_generalize:active_learning  10.603  < 2e-16 ***
## question_typetarget_recall:active_learning       5.480 4.31e-08 ***
## question_typetarget_generalize:rules_respect    -5.540 3.06e-08 ***
## question_typetarget_recall:rules_respect        -4.716 2.42e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3845 on 20241 degrees of freedom
## Multiple R-squared:  0.07018,    Adjusted R-squared:  0.06982 
## F-statistic:   191 on 8 and 20241 DF,  p-value: < 2.2e-16

4.3 Median Splits on Subscales.

Perform median splits on subscales.

# rr_med <-describe(wide_ss$rules_respect)
# al_med <-describe(wide_ss$active_learning)
# a_med <-describe(wide_ss$affection)
# 
# wide_ss$rr_split[wide_ss$rules_respect <= rr_med$median] <- "low"
# wide_ss$rr_split[wide_ss$rules_respect > rr_med$median] <- "high"
# 
# wide_ss$al_split[wide_ss$active_learning <= al_med$median] <- "low"
# wide_ss$al_split[wide_ss$active_learning > al_med$median] <- "high"
# 
# wide_ss$a_split[wide_ss$affection <= a_med$median] <- "low"
# wide_ss$a_split[wide_ss$affection > a_med$median] <- "high"
# 
# describeBy(wide_ss$mean_control_recall, group=wide_ss$rr_split)
# describeBy(wide_ss$mean_target_recall, group=wide_ss$rr_split)
# describeBy(wide_ss$mean_target_generalize, group=wide_ss$rr_split)
# 
# describeBy(wide_ss$mean_control_recall, group=wide_ss$al_split)
# describeBy(wide_ss$mean_target_recall, group=wide_ss$al_split)
# describeBy(wide_ss$mean_target_generalize, group=wide_ss$al_split)
# 
# describeBy(wide_ss$mean_control_recall, group=wide_ss$a_split)
# describeBy(wide_ss$mean_target_recall, group=wide_ss$a_split)
# describeBy(wide_ss$mean_target_generalize, group=wide_ss$a_split)