Data analysis of basic parenting values/attitudes survey.
Preliminaries.
## [1] "dplyr" "langcog" "tidyr" "ggplot2" "lme4"
##
## Attaching package: 'langcog'
##
## The following object is masked from 'package:base':
##
## scale
##
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
##
## Attaching package: 'ggplot2'
##
## The following object is masked from 'package:psych':
##
## %+%
##
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: boot
##
## Attaching package: 'boot'
##
## The following object is masked from 'package:psych':
##
## logit
##
## Loading required package: lattice
##
## Attaching package: 'lattice'
##
## The following object is masked from 'package:boot':
##
## melanoma
##
##
## Attaching package: 'nFactors'
##
## The following object is masked from 'package:lattice':
##
## parallel
##
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following object is masked from 'package:tidyr':
##
## expand
Read in files and consolidate to the same directory.
files <- dir("../production-results/uptake_e1/")
d.raw <- data.frame()
for (f in files) {
jf <- paste("../production-results/uptake_e1/",f,sep="")
jd <- fromJSON(paste(readLines(jf), collapse=""))
# clean up different tasks
trial_type <- jd$answer$data$trial_type
#parenting questionnaire
sent <- jd$answers$data$sentence[trial_type != "uptake"]
rating <- as.numeric(jd$answers$data$rating[trial_type != "uptake"])
#uptake experiment
control_recall_1 = jd$answers$data$answer[1]
control_recall_2 = jd$answers$data$answer[2]
control_recall_3 = jd$answers$data$answer[3]
control_recall_4 = jd$answers$data$answer[4]
control_recall_5 = jd$answers$data$answer[5]
target_generalize_1 = jd$answers$data$answer[6]
target_generalize_2 = jd$answers$data$answer[7]
target_generalize_3 = jd$answers$data$answer[8]
target_generalize_4 = jd$answers$data$answer[9]
target_generalize_5 = jd$answers$data$answer[10]
target_recall_1 = jd$answers$data$answer[11]
target_recall_2 = jd$answers$data$answer[12]
target_recall_3 = jd$answers$data$answer[13]
target_recall_4 = jd$answers$data$answer[14]
target_recall_5 = jd$answers$data$answer[15]
reading_time_target = jd$answers$data$target_rt[1]
reading_time_control = jd$answers$data$control_rt[1]
time_questionnaire = jd$answers$data$questionnaire_rt[1]
#demographics
race <- as.character(jd$answers$data$race[1])
id <- data.frame(workerid = jd$WorkerId,
sent = sent,
rating = rating,
enjoy_target = jd$answers$data$enjoy_target,
enjoy_control = jd$answers$data$enjoy_control,
reading_ease_target = jd$answers$data$reading_ease_target,
reading_ease_control = jd$answers$data$reading_ease_control,
target_recall_1 = target_recall_1,
target_recall_2 = target_recall_2,
target_recall_3 = target_recall_3,
target_recall_4 = target_recall_4,
target_recall_5 = target_recall_5,
target_generalize_1 = target_generalize_1,
target_generalize_2 = target_generalize_2,
target_generalize_3 = target_generalize_3,
target_generalize_4 = target_generalize_4,
target_generalize_5 = target_generalize_5,
control_recall_1 = control_recall_1,
control_recall_2 = control_recall_2,
control_recall_3 = control_recall_3,
control_recall_4 = control_recall_4,
control_recall_5 = control_recall_5,
children = jd$answers$data$children,
language = jd$answers$data$homelang,
ses = jd$answers$data$ladder,
gender = jd$answers$data$gender,
age = jd$answers$data$age,
education = jd$answers$data$education,
ethnicity = jd$answers$data$ethnicity,
childAgeYoung = jd$answers$data$childAgeYoung,
childAgeOld = jd$answers$data$childAgeOld,
race = race,
reading_time_target = reading_time_target,
reading_time_control = reading_time_control,
time_questionnaire = time_questionnaire)
d.raw <- bind_rows(d.raw, id)
}
Map on question short forms so that we can use these instead.
labels <- read.csv("sent_forms_e7.csv")
labels$sent <- as.character(labels$sent)
Clean up labels.
d.raw$sent <- as.character(d.raw$sent)
d.raw$sent <- str_replace_all(d.raw$sent, "'", "")
d.raw$sent <- str_replace_all(d.raw$sent, "’", "")
d.raw$sent <- str_replace_all(d.raw$sent, "“", "")
d.raw$sent <- str_replace_all(d.raw$sent, "”", "")
d.raw$sent <- str_replace_all(d.raw$sent, "‘", "")
d.raw$sent <- str_replace_all(d.raw$sent, "â", "")
Merge. Recode uptake answers by accuracy.
d <- left_join(d.raw, labels)
d$rating[d$reverse_code == 1] <- 6 - d$rating[d$reverse_code == 1]
d$target_generalize_1 <- str_replace(d$target_generalize_1, "b","1")
d$target_generalize_1 <- str_replace(d$target_generalize_1, "a|c","0")
d$target_generalize_2 <- str_replace(d$target_generalize_2, "a","1")
d$target_generalize_2 <- str_replace(d$target_generalize_2, "b|c","0")
d$target_generalize_3 <- str_replace(d$target_generalize_3, "b","1")
d$target_generalize_3 <- str_replace(d$target_generalize_3, "a|c","0")
d$target_generalize_4 <- str_replace(d$target_generalize_4, "c","1")
d$target_generalize_4 <- str_replace(d$target_generalize_4, "a|b","0")
d$target_generalize_5 <- str_replace(d$target_generalize_5, "c","1")
d$target_generalize_5 <- str_replace(d$target_generalize_5, "a|b","0")
d$target_recall_1 <- str_replace(d$target_recall_1, "b","1")
d$target_recall_1 <- str_replace(d$target_recall_1, "a|c","0")
d$target_recall_2 <- str_replace(d$target_recall_2, "a","1")
d$target_recall_2 <- str_replace(d$target_recall_2, "b|c","0")
d$target_recall_3 <- str_replace(d$target_recall_3, "a","1")
d$target_recall_3 <- str_replace(d$target_recall_3, "b|c","0")
d$target_recall_4 <- str_replace(d$target_recall_4, "a","1")
d$target_recall_4 <- str_replace(d$target_recall_4, "b|c","0")
d$target_recall_5 <- str_replace(d$target_recall_5, "c","1")
d$target_recall_5 <- str_replace(d$target_recall_5, "a|b","0")
d$control_recall_1 <- str_replace(d$control_recall_1, "a","1")
d$control_recall_1 <- str_replace(d$control_recall_1, "b|c","0")
d$control_recall_2 <- str_replace(d$control_recall_2, "c","1")
d$control_recall_2 <- str_replace(d$control_recall_2, "a|b","0")
d$control_recall_3 <- str_replace(d$control_recall_3, "a","1")
d$control_recall_3 <- str_replace(d$control_recall_3, "b|c","0")
d$control_recall_4 <- str_replace(d$control_recall_4, "c","1")
d$control_recall_4 <- str_replace(d$control_recall_4, "a|b","0")
d$control_recall_5 <- str_replace(d$control_recall_5, "b","1")
d$control_recall_5 <- str_replace(d$control_recall_5, "a|c","0")
Plot demographic info.
subinfo <- d %>%
group_by(workerid) %>%
select(-short_sent, -category, -instrument, -reverse_code) %>%
distinct
questions <- subinfo %>%
select(workerid, starts_with("target"), starts_with("control")) %>%
gather(question, correct, starts_with("target"), starts_with("control")) %>%
separate(question, c("passage","trial_type","q_num"), sep = "_") %>%
group_by(workerid, passage, trial_type) %>%
summarise(correct = mean(as.numeric(correct))) %>%
unite(trialtype, passage, trial_type) %>%
spread(trialtype, correct)
subinfo <- subinfo %>%
select(-starts_with("target"), -starts_with("control")) %>%
left_join(questions) %>%
select(-sent, -rating)
Awful recoding.
subinfo$education <- factor(subinfo$education,
levels = c("highSchool","someCollege","4year","someGrad","Grad"))
subinfo$gender <- str_replace_all(subinfo$gender,
"female|FEMALE|F$|f$|Femal$|Females|Females","Female")
subinfo$gender <- str_replace_all(subinfo$gender,
"^male|^Male|^MALE|^M$|^m$|^Maleq|Make", "Male")
subinfo$gender <- str_replace_all(subinfo$gender,
"29|24|25|28|32|33|45", "")
subinfo$gender <- str_replace_all(subinfo$gender,
"males", "male")
subinfo$gender <- str_replace_all(subinfo$gender, " ", "")
subinfo$language <- str_replace_all(subinfo$language, "english|eNGLISH|Engliah|ENGLISH|^eng$|Enlgish", "English")
subinfo$language <- str_replace_all(subinfo$language," ", "")
subinfo$language <- str_replace_all(subinfo$language,"arabic", "Arabic")
subinfo$language <- str_replace_all(subinfo$language,"chinese", "Chinese")
subinfo$language <- str_replace_all(subinfo$language,"german", "German")
subinfo$language <- str_replace_all(subinfo$language,"tagalog", "Tagalog")
subinfo$youngestChildAge <- factor(subinfo$childAgeYoung, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))
subinfo$oldestChildAge <- factor(subinfo$childAgeOld, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))
subinfo$reading_ease_target <- factor(subinfo$reading_ease_target, levels = c("Very Difficult", "Somewhat Difficult", "Somewhat Easy", "Very Easy"))
subinfo$reading_ease_control <- factor(subinfo$reading_ease_control, levels = c("Very Difficult", "Somewhat Difficult", "Somewhat Easy", "Very Easy"))
subinfo$enjoy_target <- factor(subinfo$enjoy_target, levels = c("Very Unenjoyable", "Somewhat Unenjoyable", "Somewhat Enjoyable", "Very Enjoyable"))
subinfo$enjoy_control <- factor(subinfo$enjoy_control, levels = c("Very Unenjoyable", "Somewhat Unenjoyable", "Somewhat Enjoyable", "Very Enjoyable"))
qplot(ses, data=subinfo)
qplot(children, data=subinfo)
qplot(gender, data=subinfo)
qplot(education, data=subinfo)
qplot(age, data=subinfo)
qplot(language, data=subinfo)
qplot(ethnicity, data=subinfo)
qplot(race, data=subinfo)
qplot(youngestChildAge, data=subinfo)
qplot(oldestChildAge, data=subinfo)
qplot(reading_ease_target, data=subinfo)
qplot(reading_ease_control, data=subinfo)
qplot(enjoy_target, data=subinfo)
qplot(enjoy_control, data=subinfo)
describe(subinfo$target_recall, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 0.76 0.25 0.8 0.8 0.3 0 1 1 0.02
describe(subinfo$control_recall, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 0.63 0.24 0.6 0.64 0.3 0 1 1 0.02
describe(subinfo$target_generalize, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 0.8 0.29 1 0.85 0 0 1 1 0.03
describe(subinfo$reading_time_target, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 197.74 154.72 167.34 183.51 160.36 2.41 900 897.59 15.47
describe(subinfo$reading_time_control, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 162.47 146.19 118.12 145.89 155.89 1.39 736.78 735.39 14.62
qplot(reading_time_target, target_recall, data=subinfo)
qplot(reading_time_target, target_generalize, data=subinfo)
qplot(reading_time_control, control_recall, data=subinfo)
Now look at mean ratings across sentences.
rating_count <- table(d$rating)
rating_count
##
## 0 1 2 3 4 5 6
## 36 66 109 206 240 411 732
prop.table(rating_count)
##
## 0 1 2 3 4 5
## 0.02000000 0.03666667 0.06055556 0.11444444 0.13333333 0.22833333
## 6
## 0.40666667
ms <- d %>%
group_by(category, instrument, short_sent, reverse_code) %>%
multi_boot_standard(col = "rating") %>%
arrange(instrument, category, desc(mean))
ms$short_sent_ord <- factor(ms$short_sent,
levels = ms$short_sent)
Plot attitude.
qplot(short_sent_ord, mean, col = category,
ymin = ci_lower, ymax = ci_upper, pch = factor(reverse_code),
geom = "pointrange",
data = filter(ms, instrument == "attitudes")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
xlab("") +
ylab("Mean Rating") +
ylim(c(0,6)) +
scale_colour_solarized()
Plot mean subscale scores.
mc <- d %>%
group_by(category) %>%
multi_boot_standard(col = "rating") %>%
arrange(category, desc(mean))
ggplot(mc, aes(x = category, y = mean)) +
geom_bar(stat="identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
mcl <- d %>%
group_by(category, workerid) %>%
multi_boot_standard(col = "rating") %>%
arrange(category, desc(mean))%>%
spread(category, mean)
wide.attitudes <- d %>%
filter(instrument == "attitudes") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.mat <- as.matrix(select(wide.attitudes, -workerid))
summary(alpha(x = alpha.mat))
##
## Reliability analysis
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.84 0.86 0.91 0.25 6.2 0.031 4.6 0.76
wide.rules_respect <- d %>%
filter(category == "rules_respect") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.rr <- as.matrix(select(wide.rules_respect, -workerid))
summary(alpha(x = alpha.rr))
##
## Reliability analysis
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.72 0.73 0.72 0.31 2.7 0.067 4.2 0.91
wide.affection <- d %>%
filter(category == "affection") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.af <- as.matrix(select(wide.affection, -workerid))
summary(alpha(x = alpha.af))
##
## Reliability analysis
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.78 0.81 0.84 0.41 4.2 0.058 4.8 1.1
wide.active_learning <- d %>%
filter(category == "active_learning") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.al <- as.matrix(select(wide.active_learning, -workerid))
summary(alpha(x = alpha.al))
##
## Reliability analysis
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.74 0.75 0.75 0.33 3 0.064 4.9 0.94
Create a data frame that has subscale scores.
Standardize ratings within subscale.
ds <- d
ds$srating <- ave(ds$rating, ds$category, FUN=scale)
ss <- ds %>%
group_by(workerid, category) %>%
summarize(srating = mean(srating)) %>%
spread(category, srating) %>%
left_join(subinfo) %>%
gather(trial_type, correct, control_recall, target_recall, target_generalize)
ss.long <- ss %>%
gather(subscale, srating, active_learning, affection, rules_respect)
ggplot(ss.long, aes(x = srating, y = correct, col = subscale)) +
geom_jitter() +
geom_smooth(method="lm", se=FALSE) +
facet_wrap(~trial_type)
Now with some exclusions, exploratory.
exclude <- subinfo$workerid[subinfo$reading_time_target < 30 | subinfo$reading_time_control < 30]
length(exclude)
## [1] 25
qplot(subinfo$reading_time_target, binwidth = 15) +
geom_vline(xintercept =15, lty =2, col="red")
qplot(subinfo$reading_time_control, binwidth = 15) +
geom_vline(xintercept =15, lty =2, col="red")
Replot with exclusions.
ggplot(filter(ss.long, !workerid %in% exclude),
aes(x = srating, y = correct, col = subscale)) +
geom_jitter() +
geom_smooth(method="lm", se=FALSE) +
facet_wrap(~trial_type)
subscales <- ds %>%
group_by(workerid, category) %>%
summarize(srating = mean(srating)) %>%
spread(category, srating)
ratings <- d %>%
select(workerid, starts_with("target"), starts_with("control")) %>%
gather(question, correct, starts_with("target"), starts_with("control")) %>%
separate(question, c("passage","trial_type","q_num"), sep = "_") %>%
mutate(correct = as.numeric(correct),
q_num = as.numeric(q_num) +
as.numeric(factor(passage)) * 10 +
as.numeric(factor(trial_type)) * 100)
d.reg <- left_join(ratings, subscales) %>%
unite(question_type, passage, trial_type)
Now regression.
summary(glmer(correct ~ question_type +
(1|workerid) +
(1|q_num),
data = d.reg,
family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ question_type + (1 | workerid) + (1 | q_num)
## Data: d.reg
##
## AIC BIC logLik deviance df.resid
## 23950.6 23991.6 -11970.3 23940.6 26995
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.2819 -0.5399 0.2608 0.5043 3.5126
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 2.9404 1.7148
## q_num (Intercept) 0.2213 0.4704
## Number of obs: 27000, groups: workerid, 100; q_num, 15
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.8772 0.2689 3.262 0.001107 **
## question_typetarget_generalize 1.1482 0.2956 3.885 0.000102 ***
## question_typetarget_recall 0.9165 0.2944 3.113 0.001853 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) qstn_typtrgt_g
## qstn_typtrgt_g -0.534
## qstn_typtrgt_r -0.535 0.486
With exclusions.
summary(glmer(correct ~ question_type +
(1|workerid) +
(1|q_num),
data = filter(d.reg, !workerid %in% exclude),
family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ question_type + (1 | workerid) + (1 | q_num)
## Data: filter(d.reg, !workerid %in% exclude)
##
## AIC BIC logLik deviance df.resid
## 14441.2 14480.8 -7215.6 14431.2 20245
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7009 0.0476 0.2127 0.4045 5.8898
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 3.2321 1.7978
## q_num (Intercept) 0.9239 0.9612
## Number of obs: 20250, groups: workerid, 75; q_num, 15
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2664 0.4808 2.634 0.00844 **
## question_typetarget_generalize 1.5479 0.6145 2.519 0.01178 *
## question_typetarget_recall 1.5787 0.6136 2.573 0.01009 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) qstn_typtrgt_g
## qstn_typtrgt_g -0.638
## qstn_typtrgt_r -0.635 0.505
# check against means
filter(d.reg, !workerid %in% exclude) %>%
group_by(question_type) %>%
summarise(correct = mean(correct))
## Source: local data frame [3 x 2]
##
## question_type correct
## (chr) (dbl)
## 1 control_recall 0.6853333
## 2 target_generalize 0.8746667
## 3 target_recall 0.8453333
Naive interaction.
summary(glmer(correct ~ question_type * active_learning +
(1|workerid) +
(1|q_num),
data = filter(d.reg, !workerid %in% exclude),
family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ question_type * active_learning + (1 | workerid) +
## (1 | q_num)
## Data: filter(d.reg, !workerid %in% exclude)
##
## AIC BIC logLik deviance df.resid
## 14229.7 14293.1 -7106.9 14213.7 20242
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7729 0.0430 0.2010 0.4091 4.5207
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 3.0750 1.7536
## q_num (Intercept) 0.9452 0.9722
## Number of obs: 20250, groups: workerid, 75; q_num, 15
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -0.06727 1.52364 -0.044
## question_typetarget_generalize -3.20695 0.69402 -4.621
## question_typetarget_recall -1.01826 0.68519 -1.486
## active_learning 0.25896 0.28544 0.907
## question_typetarget_generalize:active_learning 0.97257 0.06808 14.286
## question_typetarget_recall:active_learning 0.52315 0.06577 7.955
## Pr(>|z|)
## (Intercept) 0.965
## question_typetarget_generalize 3.82e-06 ***
## question_typetarget_recall 0.137
## active_learning 0.364
## question_typetarget_generalize:active_learning < 2e-16 ***
## question_typetarget_recall:active_learning 1.80e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) qstn_typtrgt_g qstn_typtrgt_r actv_l
## qstn_typtrgt_g -0.214
## qstn_typtrgt_r -0.211 0.476
## activ_lrnng -0.949 0.041 0.038
## qstn_typtrgt_g:_ 0.079 -0.468 -0.194 -0.085
## qstn_typtrgt_r:_ 0.079 -0.198 -0.458 -0.087
## qstn_typtrgt_g:_
## qstn_typtrgt_g
## qstn_typtrgt_r
## activ_lrnng
## qstn_typtrgt_g:_
## qstn_typtrgt_r:_ 0.426
summary(glmer(correct ~ question_type * affection +
(1|workerid) +
(1|q_num),
data = filter(d.reg, !workerid %in% exclude),
family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## correct ~ question_type * affection + (1 | workerid) + (1 | q_num)
## Data: filter(d.reg, !workerid %in% exclude)
##
## AIC BIC logLik deviance df.resid
## 14328.1 14391.5 -7156.1 14312.1 20242
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.0376 0.0434 0.2048 0.3986 5.5596
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 3.0748 1.7535
## q_num (Intercept) 0.9359 0.9674
## Number of obs: 20250, groups: workerid, 75; q_num, 15
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -0.22147 1.26208 -0.175
## question_typetarget_generalize -1.12950 0.66313 -1.703
## question_typetarget_recall -0.31474 0.66471 -0.473
## affection 0.29692 0.23480 1.265
## question_typetarget_generalize:affection 0.56242 0.05587 10.067
## question_typetarget_recall:affection 0.39346 0.05434 7.241
## Pr(>|z|)
## (Intercept) 0.8607
## question_typetarget_generalize 0.0885 .
## question_typetarget_recall 0.6359
## affection 0.2060
## question_typetarget_generalize:affection < 2e-16 ***
## question_typetarget_recall:affection 4.46e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) qstn_typtrgt_g qstn_typtrgt_r affctn
## qstn_typtrgt_g -0.264
## qstn_typtrgt_r -0.265 0.483
## affection -0.925 0.046 0.047
## qstn_typtrgt_g: 0.077 -0.391 -0.156 -0.085
## qstn_typtrgt_r: 0.080 -0.162 -0.387 -0.089
## qstn_typtrgt_g:
## qstn_typtrgt_g
## qstn_typtrgt_r
## affection
## qstn_typtrgt_g:
## qstn_typtrgt_r: 0.405
summary(glmer(correct ~ question_type * rules_respect +
(1|workerid) +
(1|q_num),
data = filter(d.reg, !workerid %in% exclude),
family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: correct ~ question_type * rules_respect + (1 | workerid) + (1 |
## q_num)
## Data: filter(d.reg, !workerid %in% exclude)
##
## AIC BIC logLik deviance df.resid
## 14443.0 14506.3 -7213.5 14427.0 20242
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6080 0.0478 0.2130 0.4046 5.8646
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 3.0809 1.7552
## q_num (Intercept) 0.9212 0.9598
## Number of obs: 20250, groups: workerid, 75; q_num, 15
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -0.699116 1.151954 -0.607
## question_typetarget_generalize 1.526304 0.663809 2.299
## question_typetarget_recall 1.791782 0.663295 2.701
## rules_respect 0.460607 0.245032 1.880
## question_typetarget_generalize:rules_respect 0.005629 0.060959 0.092
## question_typetarget_recall:rules_respect -0.051344 0.059332 -0.865
## Pr(>|z|)
## (Intercept) 0.54392
## question_typetarget_generalize 0.02149 *
## question_typetarget_recall 0.00691 **
## rules_respect 0.06014 .
## question_typetarget_generalize:rules_respect 0.92643
## question_typetarget_recall:rules_respect 0.38683
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) qstn_typtrgt_g qstn_typtrgt_r rls_rs
## qstn_typtrgt_g -0.288
## qstn_typtrgt_r -0.287 0.489
## rules_rspct -0.909 0.045 0.045
## qstn_typtrgt_g:_ 0.081 -0.384 -0.152 -0.088
## qstn_typtrgt_r:_ 0.084 -0.155 -0.377 -0.092
## qstn_typtrgt_g:_
## qstn_typtrgt_g
## qstn_typtrgt_r
## rules_rspct
## qstn_typtrgt_g:_
## qstn_typtrgt_r:_ 0.393
summary(glmer(correct ~ question_type * rules_respect +
question_type * active_learning +
question_type * affection +
(1|workerid) +
(1|q_num),
data = filter(d.reg, !workerid %in% exclude),
family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## correct ~ question_type * rules_respect + question_type * active_learning +
## question_type * affection + (1 | workerid) + (1 | q_num)
## Data: filter(d.reg, !workerid %in% exclude)
##
## AIC BIC logLik deviance df.resid
## 14193.0 14303.8 -7082.5 14165.0 20236
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.7236 0.0403 0.1998 0.4053 4.9276
##
## Random effects:
## Groups Name Variance Std.Dev.
## workerid (Intercept) 2.9396 1.7145
## q_num (Intercept) 0.9485 0.9739
## Number of obs: 20250, groups: workerid, 75; q_num, 15
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -2.11317 1.68638 -1.253
## question_typetarget_generalize -3.14545 0.73205 -4.297
## question_typetarget_recall -1.07483 0.72775 -1.477
## rules_respect 0.50869 0.24812 2.050
## active_learning -0.07878 0.32313 -0.244
## affection 0.31998 0.25589 1.250
## question_typetarget_generalize:rules_respect -0.24694 0.06584 -3.750
## question_typetarget_recall:rules_respect -0.20213 0.06278 -3.220
## question_typetarget_generalize:active_learning 0.92039 0.07863 11.705
## question_typetarget_recall:active_learning 0.46282 0.07638 6.060
## question_typetarget_generalize:affection 0.25705 0.06298 4.081
## question_typetarget_recall:affection 0.25139 0.06055 4.152
## Pr(>|z|)
## (Intercept) 0.210177
## question_typetarget_generalize 1.73e-05 ***
## question_typetarget_recall 0.139699
## rules_respect 0.040347 *
## active_learning 0.807385
## affection 0.211120
## question_typetarget_generalize:rules_respect 0.000176 ***
## question_typetarget_recall:rules_respect 0.001282 **
## question_typetarget_generalize:active_learning < 2e-16 ***
## question_typetarget_recall:active_learning 1.36e-09 ***
## question_typetarget_generalize:affection 4.48e-05 ***
## question_typetarget_recall:affection 3.30e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) qstn_typtrgt_g qstn_typtrgt_r rls_rs actv_l
## qstn_typtrgt_g -0.170
## qstn_typtrgt_r -0.179 0.466
## rules_rspct -0.422 0.005 0.010
## activ_lrnng -0.402 0.014 0.017 -0.320
## affection -0.365 0.004 0.007 0.147 -0.488
## qstn_typtrgt_gnrlz:r_ 0.025 -0.239 -0.080 -0.083 0.032
## qstn_typtrgt_rcll:r_ 0.030 -0.083 -0.232 -0.089 0.032
## qstn_typtrgt_gnrlz:c_ 0.034 -0.251 -0.102 0.036 -0.092
## qstn_typtrgt_rcll:c_ 0.037 -0.106 -0.245 0.035 -0.095
## qstn_typtrgt_g: 0.026 -0.192 -0.082 -0.007 0.039
## qstn_typtrgt_r: 0.029 -0.084 -0.193 -0.008 0.041
## affctn qstn_typtrgt_gnrlz:r_ qstn_typtrgt_rcll:r_
## qstn_typtrgt_g
## qstn_typtrgt_r
## rules_rspct
## activ_lrnng
## affection
## qstn_typtrgt_gnrlz:r_ -0.008
## qstn_typtrgt_rcll:r_ -0.008 0.400
## qstn_typtrgt_gnrlz:c_ 0.042 -0.264 -0.152
## qstn_typtrgt_rcll:c_ 0.043 -0.150 -0.278
## qstn_typtrgt_g: -0.080 0.012 0.032
## qstn_typtrgt_r: -0.086 0.032 0.044
## qstn_typtrgt_gnrlz:c_ qstn_typtrgt_rcll:c_
## qstn_typtrgt_g
## qstn_typtrgt_r
## rules_rspct
## activ_lrnng
## affection
## qstn_typtrgt_gnrlz:r_
## qstn_typtrgt_rcll:r_
## qstn_typtrgt_gnrlz:c_
## qstn_typtrgt_rcll:c_ 0.441
## qstn_typtrgt_g: -0.412 -0.166
## qstn_typtrgt_r: -0.170 -0.422
## qstn_typtrgt_g:
## qstn_typtrgt_g
## qstn_typtrgt_r
## rules_rspct
## activ_lrnng
## affection
## qstn_typtrgt_gnrlz:r_
## qstn_typtrgt_rcll:r_
## qstn_typtrgt_gnrlz:c_
## qstn_typtrgt_rcll:c_
## qstn_typtrgt_g:
## qstn_typtrgt_r: 0.397
## convergence code: 0
## Model failed to converge with max|grad| = 0.00207284 (tol = 0.001, component 1)
The analysis suggested by stats consultant:
fit <- lm(correct ~ question_type + active_learning + rules_respect + question_type*active_learning + question_type*rules_respect,
data = filter(d.reg, !workerid %in% exclude))
summary(fit)
##
## Call:
## lm(formula = correct ~ question_type + active_learning + rules_respect +
## question_type * active_learning + question_type * rules_respect,
## data = filter(d.reg, !workerid %in% exclude))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.98763 0.04222 0.13408 0.24572 0.51821
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.678037 0.004808
## question_typetarget_generalize 0.176750 0.006800
## question_typetarget_recall 0.154876 0.006800
## active_learning 0.016927 0.009754
## rules_respect 0.089073 0.008739
## question_typetarget_generalize:active_learning 0.146261 0.013794
## question_typetarget_recall:active_learning 0.075587 0.013794
## question_typetarget_generalize:rules_respect -0.068474 0.012359
## question_typetarget_recall:rules_respect -0.058283 0.012359
## t value Pr(>|t|)
## (Intercept) 141.016 < 2e-16 ***
## question_typetarget_generalize 25.993 < 2e-16 ***
## question_typetarget_recall 22.776 < 2e-16 ***
## active_learning 1.735 0.0827 .
## rules_respect 10.192 < 2e-16 ***
## question_typetarget_generalize:active_learning 10.603 < 2e-16 ***
## question_typetarget_recall:active_learning 5.480 4.31e-08 ***
## question_typetarget_generalize:rules_respect -5.540 3.06e-08 ***
## question_typetarget_recall:rules_respect -4.716 2.42e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3845 on 20241 degrees of freedom
## Multiple R-squared: 0.07018, Adjusted R-squared: 0.06982
## F-statistic: 191 on 8 and 20241 DF, p-value: < 2.2e-16
Perform median splits on subscales.
# rr_med <-describe(wide_ss$rules_respect)
# al_med <-describe(wide_ss$active_learning)
# a_med <-describe(wide_ss$affection)
#
# wide_ss$rr_split[wide_ss$rules_respect <= rr_med$median] <- "low"
# wide_ss$rr_split[wide_ss$rules_respect > rr_med$median] <- "high"
#
# wide_ss$al_split[wide_ss$active_learning <= al_med$median] <- "low"
# wide_ss$al_split[wide_ss$active_learning > al_med$median] <- "high"
#
# wide_ss$a_split[wide_ss$affection <= a_med$median] <- "low"
# wide_ss$a_split[wide_ss$affection > a_med$median] <- "high"
#
# describeBy(wide_ss$mean_control_recall, group=wide_ss$rr_split)
# describeBy(wide_ss$mean_target_recall, group=wide_ss$rr_split)
# describeBy(wide_ss$mean_target_generalize, group=wide_ss$rr_split)
#
# describeBy(wide_ss$mean_control_recall, group=wide_ss$al_split)
# describeBy(wide_ss$mean_target_recall, group=wide_ss$al_split)
# describeBy(wide_ss$mean_target_generalize, group=wide_ss$al_split)
#
# describeBy(wide_ss$mean_control_recall, group=wide_ss$a_split)
# describeBy(wide_ss$mean_target_recall, group=wide_ss$a_split)
# describeBy(wide_ss$mean_target_generalize, group=wide_ss$a_split)