Data analysis of basic parenting values/attitudes survey.
Preliminaries.
## [1] "dplyr" "langcog" "tidyr" "ggplot2" "lme4"
##
## Attaching package: 'langcog'
##
## The following object is masked from 'package:base':
##
## scale
##
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:ggplot2':
##
## %+%
##
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: boot
##
## Attaching package: 'boot'
##
## The following object is masked from 'package:psych':
##
## logit
##
## Loading required package: lattice
##
## Attaching package: 'lattice'
##
## The following object is masked from 'package:boot':
##
## melanoma
##
##
## Attaching package: 'nFactors'
##
## The following object is masked from 'package:lattice':
##
## parallel
##
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following object is masked from 'package:tidyr':
##
## expand
Read in files and consolidate to the same directory.
files <- dir("../production-results/uptake_e1/")
d.raw <- data.frame()
for (f in files) {
jf <- paste("../production-results/uptake_e1/",f,sep="")
jd <- fromJSON(paste(readLines(jf), collapse=""))
# clean up different tasks
trial_type <- jd$answer$data$trial_type
#parenting questionnaire
sent <- jd$answers$data$sentence[trial_type != "uptake"]
rating <- as.numeric(jd$answers$data$rating[trial_type != "uptake"])
#uptake experiment
control_recall_1 = jd$answers$data$answer[1]
control_recall_2 = jd$answers$data$answer[2]
control_recall_3 = jd$answers$data$answer[3]
control_recall_4 = jd$answers$data$answer[4]
control_recall_5 = jd$answers$data$answer[5]
target_generalize_1 = jd$answers$data$answer[6]
target_generalize_2 = jd$answers$data$answer[7]
target_generalize_3 = jd$answers$data$answer[8]
target_generalize_4 = jd$answers$data$answer[9]
target_generalize_5 = jd$answers$data$answer[10]
target_recall_1 = jd$answers$data$answer[11]
target_recall_2 = jd$answers$data$answer[12]
target_recall_3 = jd$answers$data$answer[13]
target_recall_4 = jd$answers$data$answer[14]
target_recall_5 = jd$answers$data$answer[15]
reading_time_target = jd$answers$data$target_rt[1]
reading_time_control = jd$answers$data$control_rt[1]
time_questionnaire = jd$answers$data$questionnaire_rt[1]
#demographics
race <- as.character(jd$answers$data$race[1])
id <- data.frame(workerid = jd$WorkerId,
sent = sent,
rating = rating,
enjoy_target = jd$answers$data$enjoy_target,
enjoy_control = jd$answers$data$enjoy_control,
reading_ease_target = jd$answers$data$reading_ease_target,
reading_ease_control = jd$answers$data$reading_ease_control,
target_recall_1 = target_recall_1,
target_recall_2 = target_recall_2,
target_recall_3 = target_recall_3,
target_recall_4 = target_recall_4,
target_recall_5 = target_recall_5,
target_generalize_1 = target_generalize_1,
target_generalize_2 = target_generalize_2,
target_generalize_3 = target_generalize_3,
target_generalize_4 = target_generalize_4,
target_generalize_5 = target_generalize_5,
control_recall_1 = control_recall_1,
control_recall_2 = control_recall_2,
control_recall_3 = control_recall_3,
control_recall_4 = control_recall_4,
control_recall_5 = control_recall_5,
children = jd$answers$data$children,
language = jd$answers$data$homelang,
ses = jd$answers$data$ladder,
gender = jd$answers$data$gender,
age = jd$answers$data$age,
education = jd$answers$data$education,
ethnicity = jd$answers$data$ethnicity,
childAgeYoung = jd$answers$data$childAgeYoung,
childAgeOld = jd$answers$data$childAgeOld,
race = race,
reading_time_target = reading_time_target,
reading_time_control = reading_time_control,
time_questionnaire = time_questionnaire)
d.raw <- bind_rows(d.raw, id)
}
Map on question short forms so that we can use these instead.
labels <- read.csv("sent_forms_e7.csv")
labels$sent <- as.character(labels$sent)
Clean up labels.
d.raw$sent <- as.character(d.raw$sent)
d.raw$sent <- str_replace_all(d.raw$sent, "'", "")
d.raw$sent <- str_replace_all(d.raw$sent, "’", "")
d.raw$sent <- str_replace_all(d.raw$sent, "“", "")
d.raw$sent <- str_replace_all(d.raw$sent, "”", "")
d.raw$sent <- str_replace_all(d.raw$sent, "‘", "")
d.raw$sent <- str_replace_all(d.raw$sent, "â", "")
Merge. Recode uptake answers by accuracy.
d <- left_join(d.raw, labels)
d$rating[d$reverse_code == 1] <- 6 - d$rating[d$reverse_code == 1]
d$target_generalize_1 <- str_replace(d$target_generalize_1, "b","1")
d$target_generalize_1 <- str_replace(d$target_generalize_1, "a|c","0")
d$target_generalize_2 <- str_replace(d$target_generalize_2, "a","1")
d$target_generalize_2 <- str_replace(d$target_generalize_2, "b|c","0")
d$target_generalize_3 <- str_replace(d$target_generalize_3, "b","1")
d$target_generalize_3 <- str_replace(d$target_generalize_3, "a|c","0")
d$target_generalize_4 <- str_replace(d$target_generalize_4, "c","1")
d$target_generalize_4 <- str_replace(d$target_generalize_4, "a|b","0")
d$target_generalize_5 <- str_replace(d$target_generalize_5, "c","1")
d$target_generalize_5 <- str_replace(d$target_generalize_5, "a|b","0")
d$target_recall_1 <- str_replace(d$target_recall_1, "b","1")
d$target_recall_1 <- str_replace(d$target_recall_1, "a|c","0")
d$target_recall_2 <- str_replace(d$target_recall_2, "a","1")
d$target_recall_2 <- str_replace(d$target_recall_2, "b|c","0")
d$target_recall_3 <- str_replace(d$target_recall_3, "a","1")
d$target_recall_3 <- str_replace(d$target_recall_3, "b|c","0")
d$target_recall_4 <- str_replace(d$target_recall_4, "a","1")
d$target_recall_4 <- str_replace(d$target_recall_4, "b|c","0")
d$target_recall_5 <- str_replace(d$target_recall_5, "c","1")
d$target_recall_5 <- str_replace(d$target_recall_5, "a|b","0")
d$control_recall_1 <- str_replace(d$control_recall_1, "a","1")
d$control_recall_1 <- str_replace(d$control_recall_1, "b|c","0")
d$control_recall_2 <- str_replace(d$control_recall_2, "c","1")
d$control_recall_2 <- str_replace(d$control_recall_2, "a|b","0")
d$control_recall_3 <- str_replace(d$control_recall_3, "a","1")
d$control_recall_3 <- str_replace(d$control_recall_3, "b|c","0")
d$control_recall_4 <- str_replace(d$control_recall_4, "c","1")
d$control_recall_4 <- str_replace(d$control_recall_4, "a|b","0")
d$control_recall_5 <- str_replace(d$control_recall_5, "b","1")
d$control_recall_5 <- str_replace(d$control_recall_5, "a|c","0")
Plot demographic info.
subinfo <- d %>%
group_by(workerid) %>%
summarise(age = age[1],
gender = gender[1],
children = children[1],
ses = ses[1],
education = education[1],
language = language[1],
ethnicity = ethnicity[1],
youngestChildAge = childAgeYoung[1],
oldestChildAge = childAgeOld[1],
race = race[1],
reading_ease_target = reading_ease_target[1],
reading_ease_control = reading_ease_control[1],
enjoy_target = enjoy_target[1],
enjoy_control = enjoy_control[1],
target_recall_1 = as.numeric(target_recall_1[1]),
target_recall_2 = as.numeric(target_recall_2[1]),
target_recall_3 = as.numeric(target_recall_3[1]),
target_recall_4 = as.numeric(target_recall_4[1]),
target_recall_5 = as.numeric(target_recall_5[1]),
target_generalize_1 = as.numeric(target_generalize_1[1]),
target_generalize_2 = as.numeric(target_generalize_2[1]),
target_generalize_3 = as.numeric(target_generalize_3[1]),
target_generalize_4 = as.numeric(target_generalize_4[1]),
target_generalize_5 = as.numeric(target_generalize_5[1]),
control_recall_1 = as.numeric(control_recall_1[1]),
control_recall_2 = as.numeric(control_recall_2[1]),
control_recall_3 = as.numeric(control_recall_3[1]),
control_recall_4 = as.numeric(control_recall_4[1]),
control_recall_5 = as.numeric(control_recall_5[1]),
reading_time_target = reading_time_target[1],
reading_time_control = reading_time_control[1],
time_questionnaire = time_questionnaire[1])
subinfo$mean_target_recall <- (subinfo$target_recall_1 + subinfo$target_recall_2 + subinfo$target_recall_3 + subinfo$target_recall_4 + subinfo$target_recall_5)/5
subinfo$mean_target_generalize <- (subinfo$target_generalize_1 + subinfo$target_generalize_2 + subinfo$target_generalize_3 + subinfo$target_generalize_4 + subinfo$target_generalize_5)/5
subinfo$mean_control_recall <- (subinfo$control_recall_1 + subinfo$control_recall_2 + subinfo$control_recall_3 + subinfo$control_recall_4 + subinfo$control_recall_5)/5
subinfo$education <- factor(subinfo$education, levels = c("highSchool","someCollege","4year","someGrad","Grad"))
subinfo$gender <- str_replace_all(subinfo$gender, "female|FEMALE|F$|f$|Femal$|Females|Females","Female")
subinfo$gender <- str_replace_all(subinfo$gender, "^male|^Male|^MALE|^M$|^m$|^Maleq|Make", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "29|24|25|28|32|33|45", "")
subinfo$gender <- str_replace_all(subinfo$gender, "males", "male")
subinfo$gender <- str_replace_all(subinfo$gender, " ", "")
subinfo$language <- str_replace_all(subinfo$language, "english|eNGLISH|Engliah|ENGLISH|^eng$|Enlgish", "English")
subinfo$language <- str_replace_all(subinfo$language," ", "")
subinfo$language <- str_replace_all(subinfo$language,"arabic", "Arabic")
subinfo$language <- str_replace_all(subinfo$language,"chinese", "Chinese")
subinfo$language <- str_replace_all(subinfo$language,"german", "German")
subinfo$language <- str_replace_all(subinfo$language,"tagalog", "Tagalog")
subinfo$youngestChildAge <- factor(subinfo$youngestChildAge, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))
subinfo$oldestChildAge <- factor(subinfo$oldestChildAge, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))
subinfo$reading_ease_target <- factor(subinfo$reading_ease_target, levels = c("Very Difficult", "Somewhat Difficult", "Somewhat Easy", "Very Easy"))
subinfo$reading_ease_control <- factor(subinfo$reading_ease_control, levels = c("Very Difficult", "Somewhat Difficult", "Somewhat Easy", "Very Easy"))
subinfo$enjoy_target <- factor(subinfo$enjoy_target, levels = c("Very Unenjoyable", "Somewhat Unenjoyable", "Somewhat Enjoyable", "Very Enjoyable"))
subinfo$enjoy_control <- factor(subinfo$enjoy_control, levels = c("Very Unenjoyable", "Somewhat Unenjoyable", "Somewhat Enjoyable", "Very Enjoyable"))
qplot(ses, data=subinfo)
qplot(children, data=subinfo)
qplot(gender, data=subinfo)
qplot(education, data=subinfo)
qplot(age, data=subinfo)
qplot(language, data=subinfo)
qplot(ethnicity, data=subinfo)
qplot(race, data=subinfo)
qplot(youngestChildAge, data=subinfo)
qplot(oldestChildAge, data=subinfo)
qplot(reading_ease_target, data=subinfo)
qplot(reading_ease_control, data=subinfo)
qplot(enjoy_target, data=subinfo)
qplot(enjoy_control, data=subinfo)
describe(subinfo$mean_target_recall, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 0.76 0.25 0.8 0.8 0.3 0 1 1 0.02
describe(subinfo$mean_control_recall, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 0.63 0.24 0.6 0.64 0.3 0 1 1 0.02
describe(subinfo$mean_target_generalize, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 0.8 0.29 1 0.85 0 0 1 1 0.03
describe(subinfo$reading_time_target, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 197.74 154.72 167.34 183.51 160.36 2.41 900 897.59 15.47
describe(subinfo$reading_time_control, skew=FALSE, check=FALSE)
## vars n mean sd median trimmed mad min max range se
## 1 1 100 162.47 146.19 118.12 145.89 155.89 1.39 736.78 735.39 14.62
qplot(reading_time_target, mean_target_recall, data=subinfo)
qplot(reading_time_target, mean_target_generalize, data=subinfo)
qplot(reading_time_control, mean_control_recall, data=subinfo)
Now look at mean ratings across sentences.
rating_count <- table(d$rating)
rating_count
##
## 0 1 2 3 4 5 6
## 36 66 109 206 240 411 732
prop.table(rating_count)
##
## 0 1 2 3 4 5
## 0.02000000 0.03666667 0.06055556 0.11444444 0.13333333 0.22833333
## 6
## 0.40666667
ms <- d %>%
group_by(category, instrument, short_sent, reverse_code) %>%
multi_boot_standard(col = "rating") %>%
arrange(instrument, category, desc(mean))
ms$short_sent_ord <- factor(ms$short_sent,
levels = ms$short_sent)
Plot attitude.
qplot(short_sent_ord, mean, col = category,
ymin = ci_lower, ymax = ci_upper, pch = factor(reverse_code),
geom = "pointrange",
data = filter(ms, instrument == "attitudes")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
xlab("") +
ylab("Mean Rating") +
ylim(c(0,6)) +
scale_colour_solarized()
Plot mean subscale scores.
mc <- d %>%
group_by(category) %>%
multi_boot_standard(col = "rating") %>%
arrange(category, desc(mean))
qplot(category, mean,
geom = "bar", stat = "identity", position = "dodge",
data=mc) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
mcl <- d %>%
group_by(category, workerid) %>%
multi_boot_standard(col = "rating") %>%
arrange(category, desc(mean))%>%
spread(category, mean)
wide.attitudes <- d %>%
filter(instrument == "attitudes") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.mat <- as.matrix(select(wide.attitudes, -workerid))
alpha(x = alpha.mat)
##
## Reliability analysis
## Call: alpha(x = alpha.mat)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.84 0.86 0.91 0.25 6.2 0.031 4.6 0.76
##
## lower alpha upper 95% confidence boundaries
## 0.78 0.84 0.9
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r
## can learn good and bad 0.83 0.85 0.90 0.25
## children should be grateful 0.84 0.86 0.91 0.27
## dont impose structure 0.84 0.86 0.90 0.26
## give children comfort 0.83 0.85 0.90 0.25
## holding and cradling 0.83 0.84 0.89 0.24
## kids should not decide 0.84 0.86 0.90 0.26
## learn by playing 0.83 0.85 0.89 0.24
## learn from repetetive behaviors 0.85 0.87 0.91 0.28
## learn math before school 0.83 0.85 0.90 0.25
## not spoiled with too much attention 0.85 0.86 0.90 0.27
## punish for small rules 0.84 0.86 0.91 0.27
## reading before speaking 0.83 0.85 0.89 0.24
## reasons for rules 0.83 0.85 0.90 0.25
## respect parents teachers 0.83 0.85 0.90 0.25
## safe loving environment 0.83 0.84 0.89 0.24
## talk about emotions 0.83 0.85 0.90 0.25
## too much affection does not harm 0.84 0.86 0.90 0.26
## wait when told 0.84 0.85 0.90 0.26
## S/N alpha se
## can learn good and bad 5.6 0.033
## children should be grateful 6.3 0.031
## dont impose structure 6.0 0.032
## give children comfort 5.6 0.033
## holding and cradling 5.4 0.033
## kids should not decide 6.0 0.032
## learn by playing 5.5 0.033
## learn from repetetive behaviors 6.5 0.030
## learn math before school 5.7 0.033
## not spoiled with too much attention 6.2 0.031
## punish for small rules 6.2 0.031
## reading before speaking 5.5 0.034
## reasons for rules 5.8 0.032
## respect parents teachers 5.7 0.033
## safe loving environment 5.4 0.033
## talk about emotions 5.7 0.033
## too much affection does not harm 6.1 0.032
## wait when told 5.9 0.032
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## can learn good and bad 100 0.62 0.66 0.64 0.57 5.1 1.22
## children should be grateful 100 0.32 0.34 0.29 0.23 4.5 1.32
## dont impose structure 100 0.46 0.45 0.41 0.36 3.8 1.62
## give children comfort 100 0.65 0.66 0.65 0.59 5.0 1.27
## holding and cradling 100 0.71 0.73 0.72 0.66 5.2 1.18
## kids should not decide 100 0.47 0.45 0.41 0.37 3.7 1.70
## learn by playing 100 0.67 0.70 0.70 0.62 5.2 1.22
## learn from repetetive behaviors 100 0.26 0.26 0.19 0.15 4.4 1.49
## learn math before school 100 0.61 0.62 0.59 0.54 5.0 1.43
## not spoiled with too much attention 100 0.46 0.39 0.37 0.33 3.7 2.08
## punish for small rules 100 0.37 0.39 0.33 0.28 3.5 1.40
## reading before speaking 100 0.70 0.70 0.70 0.64 5.2 1.51
## reasons for rules 100 0.57 0.55 0.51 0.48 4.5 1.69
## respect parents teachers 100 0.58 0.60 0.59 0.51 4.8 1.23
## safe loving environment 100 0.71 0.74 0.74 0.67 5.5 0.92
## talk about emotions 100 0.63 0.62 0.59 0.56 4.9 1.54
## too much affection does not harm 100 0.48 0.42 0.40 0.37 4.3 1.87
## wait when told 100 0.47 0.53 0.50 0.40 4.9 1.17
##
## Non missing response frequency for each item
## 0 1 2 3 4 5 6
## can learn good and bad 0.00 0.00 0.07 0.05 0.11 0.22 0.55
## children should be grateful 0.01 0.01 0.04 0.22 0.14 0.34 0.24
## dont impose structure 0.01 0.10 0.10 0.26 0.17 0.17 0.19
## give children comfort 0.01 0.01 0.03 0.07 0.14 0.23 0.51
## holding and cradling 0.00 0.01 0.03 0.09 0.07 0.26 0.54
## kids should not decide 0.04 0.08 0.12 0.21 0.18 0.19 0.18
## learn by playing 0.01 0.00 0.03 0.08 0.08 0.20 0.60
## learn from repetetive behaviors 0.03 0.01 0.06 0.16 0.17 0.29 0.28
## learn math before school 0.02 0.03 0.02 0.05 0.16 0.22 0.50
## not spoiled with too much attention 0.10 0.11 0.11 0.10 0.12 0.19 0.27
## punish for small rules 0.02 0.05 0.15 0.34 0.21 0.13 0.10
## reading before speaking 0.02 0.04 0.02 0.06 0.05 0.14 0.67
## reasons for rules 0.03 0.05 0.08 0.05 0.17 0.22 0.40
## respect parents teachers 0.00 0.02 0.05 0.09 0.10 0.43 0.31
## safe loving environment 0.00 0.01 0.01 0.02 0.07 0.21 0.68
## talk about emotions 0.01 0.04 0.06 0.06 0.13 0.14 0.56
## too much affection does not harm 0.05 0.08 0.07 0.08 0.14 0.22 0.36
## wait when told 0.00 0.01 0.04 0.07 0.19 0.31 0.38
## miss
## can learn good and bad 0
## children should be grateful 0
## dont impose structure 0
## give children comfort 0
## holding and cradling 0
## kids should not decide 0
## learn by playing 0
## learn from repetetive behaviors 0
## learn math before school 0
## not spoiled with too much attention 0
## punish for small rules 0
## reading before speaking 0
## reasons for rules 0
## respect parents teachers 0
## safe loving environment 0
## talk about emotions 0
## too much affection does not harm 0
## wait when told 0
wide.rules_respect <- d %>%
filter(category == "rules_respect") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.rr <- as.matrix(select(wide.rules_respect, -workerid))
alpha(x = alpha.rr)
##
## Reliability analysis
## Call: alpha(x = alpha.rr)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.72 0.73 0.72 0.31 2.7 0.067 4.2 0.91
##
## lower alpha upper 95% confidence boundaries
## 0.59 0.72 0.85
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## children should be grateful 0.67 0.68 0.66 0.30 2.2
## dont impose structure 0.67 0.69 0.67 0.31 2.3
## kids should not decide 0.70 0.72 0.69 0.34 2.5
## punish for small rules 0.69 0.71 0.70 0.33 2.4
## respect parents teachers 0.64 0.64 0.61 0.26 1.8
## wait when told 0.69 0.70 0.68 0.32 2.4
## alpha se
## children should be grateful 0.079
## dont impose structure 0.080
## kids should not decide 0.075
## punish for small rules 0.076
## respect parents teachers 0.084
## wait when told 0.077
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## children should be grateful 100 0.65 0.67 0.59 0.47 4.5 1.3
## dont impose structure 100 0.69 0.65 0.55 0.48 3.8 1.6
## kids should not decide 100 0.64 0.59 0.45 0.39 3.7 1.7
## punish for small rules 100 0.60 0.60 0.46 0.40 3.5 1.4
## respect parents teachers 100 0.75 0.77 0.75 0.62 4.8 1.2
## wait when told 100 0.57 0.63 0.52 0.40 4.9 1.2
##
## Non missing response frequency for each item
## 0 1 2 3 4 5 6 miss
## children should be grateful 0.01 0.01 0.04 0.22 0.14 0.34 0.24 0
## dont impose structure 0.01 0.10 0.10 0.26 0.17 0.17 0.19 0
## kids should not decide 0.04 0.08 0.12 0.21 0.18 0.19 0.18 0
## punish for small rules 0.02 0.05 0.15 0.34 0.21 0.13 0.10 0
## respect parents teachers 0.00 0.02 0.05 0.09 0.10 0.43 0.31 0
## wait when told 0.00 0.01 0.04 0.07 0.19 0.31 0.38 0
wide.affection <- d %>%
filter(category == "affection") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.af <- as.matrix(select(wide.affection, -workerid))
alpha(x = alpha.af)
##
## Reliability analysis
## Call: alpha(x = alpha.af)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.78 0.81 0.84 0.41 4.2 0.058 4.8 1.1
##
## lower alpha upper 95% confidence boundaries
## 0.67 0.78 0.89
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r
## give children comfort 0.76 0.78 0.81 0.41
## holding and cradling 0.75 0.76 0.79 0.39
## not spoiled with too much attention 0.75 0.79 0.78 0.43
## safe loving environment 0.76 0.77 0.79 0.39
## talk about emotions 0.75 0.79 0.83 0.42
## too much affection does not harm 0.73 0.78 0.77 0.42
## S/N alpha se
## give children comfort 3.5 0.067
## holding and cradling 3.2 0.068
## not spoiled with too much attention 3.8 0.069
## safe loving environment 3.3 0.067
## talk about emotions 3.7 0.067
## too much affection does not harm 3.6 0.072
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## give children comfort 100 0.64 0.71 0.64 0.50 5.0 1.27
## holding and cradling 100 0.68 0.76 0.72 0.56 5.2 1.18
## not spoiled with too much attention 100 0.77 0.67 0.63 0.57 3.7 2.08
## safe loving environment 100 0.66 0.75 0.70 0.57 5.5 0.92
## talk about emotions 100 0.68 0.69 0.57 0.51 4.9 1.54
## too much affection does not harm 100 0.78 0.70 0.66 0.62 4.3 1.87
##
## Non missing response frequency for each item
## 0 1 2 3 4 5 6
## give children comfort 0.01 0.01 0.03 0.07 0.14 0.23 0.51
## holding and cradling 0.00 0.01 0.03 0.09 0.07 0.26 0.54
## not spoiled with too much attention 0.10 0.11 0.11 0.10 0.12 0.19 0.27
## safe loving environment 0.00 0.01 0.01 0.02 0.07 0.21 0.68
## talk about emotions 0.01 0.04 0.06 0.06 0.13 0.14 0.56
## too much affection does not harm 0.05 0.08 0.07 0.08 0.14 0.22 0.36
## miss
## give children comfort 0
## holding and cradling 0
## not spoiled with too much attention 0
## safe loving environment 0
## talk about emotions 0
## too much affection does not harm 0
wide.active_learning <- d %>%
filter(category == "active_learning") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.al <- as.matrix(select(wide.active_learning, -workerid))
alpha(x = alpha.al)
##
## Reliability analysis
## Call: alpha(x = alpha.al)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.74 0.75 0.75 0.33 3 0.064 4.9 0.94
##
## lower alpha upper 95% confidence boundaries
## 0.61 0.74 0.86
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## can learn good and bad 0.69 0.70 0.69 0.32 2.4
## learn by playing 0.65 0.66 0.65 0.28 2.0
## learn from repetetive behaviors 0.79 0.79 0.77 0.44 3.9
## learn math before school 0.67 0.68 0.67 0.30 2.2
## reading before speaking 0.67 0.69 0.68 0.31 2.2
## reasons for rules 0.71 0.72 0.71 0.34 2.6
## alpha se
## can learn good and bad 0.076
## learn by playing 0.081
## learn from repetetive behaviors 0.064
## learn math before school 0.080
## reading before speaking 0.079
## reasons for rules 0.074
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## can learn good and bad 100 0.66 0.69 0.60 0.51 5.1 1.2
## learn by playing 100 0.78 0.79 0.76 0.66 5.2 1.2
## learn from repetetive behaviors 100 0.41 0.40 0.20 0.16 4.4 1.5
## learn math before school 100 0.74 0.74 0.68 0.58 5.0 1.4
## reading before speaking 100 0.73 0.73 0.67 0.56 5.2 1.5
## reasons for rules 100 0.67 0.64 0.53 0.45 4.5 1.7
##
## Non missing response frequency for each item
## 0 1 2 3 4 5 6 miss
## can learn good and bad 0.00 0.00 0.07 0.05 0.11 0.22 0.55 0
## learn by playing 0.01 0.00 0.03 0.08 0.08 0.20 0.60 0
## learn from repetetive behaviors 0.03 0.01 0.06 0.16 0.17 0.29 0.28 0
## learn math before school 0.02 0.03 0.02 0.05 0.16 0.22 0.50 0
## reading before speaking 0.02 0.04 0.02 0.06 0.05 0.14 0.67 0
## reasons for rules 0.03 0.05 0.08 0.05 0.17 0.22 0.40 0
Create a data frame that has subscale scores.
ss <- d %>%
group_by(workerid, category) %>%
summarize(rating = mean(rating))
ss <- left_join(ss, subinfo)
Visually it appears that active learning subscore is most predictive of generalization and recall of the target article, but no difference for the control article!
#Separate regressions of recall on subscale score for each category
qplot(rating, mean_control_recall, data=ss, geom=c("point", "smooth"),
method="lm", formula=y~x, color=category,
main="Regression of control recall on subscale score",
xlab="Subscale Score", ylab="Mean Target Recall")
#Separate regressions of generalization on subscale score for each category
qplot(rating, mean_target_recall, data=ss, geom=c("point", "smooth"),
method="lm", formula=y~x, color=category,
main="Regression of target recall on subscale score",
xlab="Subscale Score", ylab="Mean Target Recall")
#Separate regressions of generalization on subscale score for each category
qplot(rating, mean_target_generalize, data=ss, geom=c("point", "smooth"),
method="lm", formula=y~x, color=category,
main="Regression of target generalization on subscale score",
xlab="Subscale Score", ylab="Mean Target Recall")
Mixed Effects Model. Gather recall types.
ssr <- ss %>%
gather(task, accuracy, mean_target_recall:mean_control_recall)%>%
spread(category, rating)
#glmer( accuracy ~task * category + (1 | workerid), family = "binomial")
Create a wide data frame that has subscale scores.
In these seperate multiple regressions, active learning and affection scores predict target recall; only active learning scores predict target generalization; no subscale predicts control recall.
wide_ss <- ss %>%
spread(category, rating)
# Multiple Linear Regression of Recall on Subscale Scores
fit <- lm(mean_control_recall ~ active_learning + affection + rules_respect, data=wide_ss)
summary(fit) # show results
##
## Call:
## lm(formula = mean_control_recall ~ active_learning + affection +
## rules_respect, data = wide_ss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64098 -0.13088 0.06283 0.16599 0.40276
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.27340 0.14868 1.839 0.069 .
## active_learning 0.00693 0.03446 0.201 0.841
## affection 0.02938 0.02875 1.022 0.309
## rules_respect 0.04278 0.02907 1.472 0.144
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2401 on 96 degrees of freedom
## Multiple R-squared: 0.06043, Adjusted R-squared: 0.03106
## F-statistic: 2.058 on 3 and 96 DF, p-value: 0.1109
# Multiple Linear Regression of Recall on Subscale Scores
fit <- lm(mean_target_recall ~ active_learning + affection + rules_respect, data=wide_ss)
summary(fit) # show results
##
## Call:
## lm(formula = mean_target_recall ~ active_learning + affection +
## rules_respect, data = wide_ss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50232 -0.13563 0.03054 0.15929 0.46081
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02366 0.13158 -0.180 0.8577
## active_learning 0.07970 0.03050 2.613 0.0104 *
## affection 0.05510 0.02544 2.166 0.0328 *
## rules_respect 0.03208 0.02572 1.247 0.2154
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2125 on 96 degrees of freedom
## Multiple R-squared: 0.2904, Adjusted R-squared: 0.2682
## F-statistic: 13.09 on 3 and 96 DF, p-value: 3.075e-07
# Multiple Linear Regression of Recall on Subscale Scores
fit <- lm(mean_target_generalize ~ active_learning + affection + rules_respect, data=wide_ss)
summary(fit) # show results
##
## Call:
## lm(formula = mean_target_generalize ~ active_learning + affection +
## rules_respect, data = wide_ss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.75172 -0.10443 0.06965 0.14738 0.51555
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01966 0.15738 -0.125 0.9009
## active_learning 0.09437 0.03648 2.587 0.0112 *
## affection 0.04025 0.03043 1.323 0.1891
## rules_respect 0.03849 0.03077 1.251 0.2140
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2542 on 96 degrees of freedom
## Multiple R-squared: 0.2321, Adjusted R-squared: 0.2081
## F-statistic: 9.67 on 3 and 96 DF, p-value: 1.228e-05
Perform median splits on subscales.
rr_med <-describe(wide_ss$rules_respect)
al_med <-describe(wide_ss$active_learning)
a_med <-describe(wide_ss$affection)
wide_ss$rr_split[wide_ss$rules_respect <= rr_med$median] <- "low"
wide_ss$rr_split[wide_ss$rules_respect > rr_med$median] <- "high"
wide_ss$al_split[wide_ss$active_learning <= al_med$median] <- "low"
wide_ss$al_split[wide_ss$active_learning > al_med$median] <- "high"
wide_ss$a_split[wide_ss$affection <= a_med$median] <- "low"
wide_ss$a_split[wide_ss$affection > a_med$median] <- "high"
describeBy(wide_ss$mean_control_recall, group=wide_ss$rr_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 46 0.68 0.23 0.8 0.7 0.3 0.2 1 0.8 -0.65 -0.35 0.03
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 54 0.58 0.25 0.6 0.58 0.3 0 1 1 -0.25 -0.89 0.03
describeBy(wide_ss$mean_target_recall, group=wide_ss$rr_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 46 0.82 0.2 0.8 0.84 0.3 0.2 1 0.8 -0.85 0.21 0.03
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 54 0.72 0.28 0.8 0.75 0.3 0 1 1 -0.54 -0.82 0.04
describeBy(wide_ss$mean_target_generalize, group=wide_ss$rr_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 46 0.86 0.25 1 0.92 0 0 1 1 -2 3.54 0.04
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 54 0.74 0.3 0.8 0.78 0.3 0 1 1 -0.95 -0.34 0.04
describeBy(wide_ss$mean_control_recall, group=wide_ss$al_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 49 0.65 0.26 0.8 0.66 0.3 0 1 1 -0.67 -0.56 0.04
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 51 0.6 0.23 0.6 0.61 0.3 0.2 1 0.8 -0.2 -0.84 0.03
describeBy(wide_ss$mean_target_recall, group=wide_ss$al_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 49 0.83 0.19 0.8 0.85 0.3 0.4 1 0.6 -0.73 -0.75 0.03
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 51 0.7 0.28 0.8 0.73 0.3 0 1 1 -0.5 -0.77 0.04
describeBy(wide_ss$mean_target_generalize, group=wide_ss$al_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 49 0.91 0.2 1 0.95 0 0.2 1 0.8 -2.27 4.5 0.03
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 51 0.69 0.32 0.8 0.73 0.3 0 1 1 -0.81 -0.47 0.04
describeBy(wide_ss$mean_control_recall, group=wide_ss$a_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 48 0.67 0.25 0.8 0.68 0.3 0 1 1 -0.64 -0.28 0.04
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 52 0.59 0.24 0.6 0.6 0.3 0.2 1 0.8 -0.3 -1.02 0.03
describeBy(wide_ss$mean_target_recall, group=wide_ss$a_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 48 0.86 0.17 1 0.88 0 0.6 1 0.4 -0.61 -1.3 0.02
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 52 0.67 0.28 0.6 0.7 0.3 0 1 1 -0.35 -0.93 0.04
describeBy(wide_ss$mean_target_generalize, group=wide_ss$a_split)
## group: high
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 48 0.9 0.18 1 0.94 0 0.2 1 0.8 -2.07 3.93 0.03
## --------------------------------------------------------
## group: low
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 52 0.7 0.33 0.8 0.74 0.3 0 1 1 -0.79 -0.63 0.05
ms <- ss %>%
filter(ses != "") %>%
mutate(ses.level = cut(as.numeric(ses), c(0,3,6,10))) %>%
group_by(ses.level, category) %>%
multi_boot_standard(col = "rating")
ggplot(ms, aes(category, mean, fill=ses.level)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
Drop the sparsest groups.
ms <- ss %>%
filter(race != "NULL") %>%
group_by(race, category) %>%
filter(n() > 5) %>%
multi_boot_standard(col = "rating")
ggplot(ms, aes(category, mean, fill=race)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
ms <- ss %>%
filter(gender %in% c("Male","Female")) %>%
group_by(gender, category) %>%
multi_boot_standard(col = "rating")
ggplot(ms, aes(category, mean, fill=gender)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
Drop the sparsest groups.
ms <- ss %>%
filter(!is.na(education)) %>%
group_by(education, category) %>%
filter(n() > 5) %>%
multi_boot_standard(col = "rating")
ggplot(ms, aes(category, mean, fill=education)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
Drop the sparsest groups again.
ms <- ss %>%
filter(!is.na(age)) %>%
group_by(age, category) %>%
filter(n() > 5) %>%
multi_boot_standard(col = "rating")
ggplot(ms, aes(category, mean, fill=age)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
ms <- ss %>%
filter(!is.na(children)) %>%
group_by(children, category) %>%
filter(n() > 5) %>%
multi_boot_standard(col = "rating")
ggplot(ms, aes(category, mean, fill=children)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
ms <- ss %>%
filter(ethnicity != "") %>%
group_by(ethnicity, category) %>%
filter(n() > 5) %>%
multi_boot_standard(col = "rating")
ggplot(ms, aes(category, mean, fill=ethnicity)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
Get eigenvalues for determining number of factors.
att.mat <- select(wide.attitudes, -workerid)
ev <- eigen(cor(x=att.mat)) # get eigenvalues
ap <- parallel(subject=nrow(att.mat), var=ncol(att.mat),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)
It still looks like Active/Early Learning and Affection are grouping together (with a separate factor that seems to focus on physical affection perhaps?), while Rules and Respect is separate. I’m still wondering if we need to re-think the structure of the subscales. It might be that Early Learning and Affection are not fully differentiated latent theories, and that they group together into a “child development is a project” theory.
item.levels <- c("respect parents teachers", "wait when told","children should be grateful","punish for small rules", "dont impose structure", "kids should not decide", "safe loving environment", "holding and cradling", "give children comfort", "talk about emotions", "not spoiled with too much attention","too much affection does not harm", "can learn good and bad", "learn by playing", "learn from repetetive behaviors", "reasons for rules", "learn math before school", "reading before speaking")
n.factors <- 3
af <- factanal(x=att.mat, factors = n.factors, scores = "regression")
loadings <- data.frame(af$loadings[,1:n.factors]) %>%
mutate(item = rownames(af$loadings)) %>%
gather(factor, loading, starts_with("Factor"))
loadings$item <- factor(loadings$item, item.levels)
labels$item <- labels$short_sent
loadings <- join(loadings, labels, by = "item")
qplot(factor, item, fill=loading, geom="tile", data = loadings) +
scale_fill_continuous(low="#000000", high="#FFFFFF") +
facet_grid(category ~ ., scales = "free_y")
qplot(factor, item, fill=loading > .4, geom="tile", data = loadings) +
scale_fill_solarized() +
facet_grid(category ~ ., scales = "free_y")
Merge back in subject id info.
a.scores <- af$scores %>%
data.frame %>%
mutate(workerid = as.character(wide.attitudes$workerid)) %>%
left_join(subinfo)
a.factor.names <- c("Affection and Early Learning","Rules and Respect", "Affection 2")
mf <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(ses != "") %>%
mutate(ses.level = cut(as.numeric(ses), c(0,3,6,10)),
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(ses.level, factor.name) %>%
multi_boot_standard(col = "score")
ggplot(mf, aes(factor.name, mean, fill=ses.level)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
mf <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(gender %in% c("Male","Female")) %>%
mutate(factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(gender, factor.name) %>%
multi_boot_standard(col = "score")
ggplot(mf, aes(factor.name, mean, fill=gender)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
mf <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(education != "") %>%
mutate(factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(education, factor.name) %>%
multi_boot_standard(col = "score")
ggplot(mf, aes(factor.name, mean, fill=education)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
mf <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(age != "") %>%
mutate(factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(age, factor.name) %>%
multi_boot_standard(col = "score")
ggplot(mf, aes(factor.name, mean, fill=age)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
Do you have children?
mf <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(children != "") %>%
mutate(children = children %in% c("1","2","3","4","5","morethan5"),
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(children, factor.name) %>%
multi_boot_standard(col = "score")
ggplot(mf, aes(factor.name, mean, fill=children)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
How many?
mf <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(children != "") %>%
mutate(n.children = children,
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(n.children, factor.name) %>%
multi_boot_standard(col = "score")
ggplot(mf, aes(factor.name, mean, fill=n.children)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
mf <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(ethnicity != "") %>%
mutate(hispanic = ethnicity== "Hispanic",
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(hispanic, factor.name) %>%
multi_boot_standard(col = "score")
ggplot(mf, aes(factor.name, mean, fill=hispanic)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
mf <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(race != "NULL") %>%
mutate(factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(race, factor.name) %>%
multi_boot_standard(col = "score")
ggplot(mf, aes(factor.name, mean, fill=race)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
…