Data analysis of basic parenting values/attitudes survey, version 2.
Preliminaries.
## [1] "dplyr" "langcog" "tidyr" "ggplot2" "lme4"
##
## Attaching package: 'langcog'
##
## The following object is masked from 'package:base':
##
## scale
##
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:ggplot2':
##
## %+%
##
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: boot
##
## Attaching package: 'boot'
##
## The following object is masked from 'package:psych':
##
## logit
##
## Loading required package: lattice
##
## Attaching package: 'lattice'
##
## The following object is masked from 'package:boot':
##
## melanoma
##
##
## Attaching package: 'nFactors'
##
## The following object is masked from 'package:lattice':
##
## parallel
Read in files and consolidate to the same directory.
files <- dir("../production-results/e3/")
d.raw <- data.frame()
for (f in files) {
jf <- paste("../production-results/e3/",f,sep="")
jd <- fromJSON(paste(readLines(jf), collapse=""))
# clean up instruction trial
sent <- jd$answers$data$sentence
rating <- as.numeric(jd$answers$data$rating)
trial_type <- jd$answer$data$trial_type
id <- data.frame(workerid = jd$WorkerId,
sent = sent,
rating = rating,
children = jd$answers$data$children,
language = jd$answer$data$homelang,
ses = jd$answer$data$ladder,
gender = jd$answer$data$gender,
age = jd$answer$data$age,
education = jd$answer$data$education)
d.raw <- bind_rows(d.raw, id)
}
Map on question short forms so that we can use these instead.
labels <- read.csv("sent_forms_e3.csv")
labels$sent <- as.character(labels$sent)
Clean up labels.
d.raw$sent <- as.character(d.raw$sent)
d.raw$sent <- str_replace_all(d.raw$sent, "'", "")
d.raw$sent <- str_replace_all(d.raw$sent, "’", "")
d.raw$sent <- str_replace_all(d.raw$sent, "“", "")
d.raw$sent <- str_replace_all(d.raw$sent, "”", "")
d.raw$sent <- str_replace_all(d.raw$sent, "‘", "")
d.raw$sent <- str_replace_all(d.raw$sent, "â", "'")
Merge.
d <- left_join(d.raw, labels) %>%
mutate(children = grepl("yes", children, ignore.case=TRUE))
Plot demographic info.
subinfo <- d %>%
group_by(workerid) %>%
summarise(children = children[1],
ses = ses[1],
gender = gender[1],
education = education[1],
age = age[1],
language = language[1])
subinfo$education <- factor(subinfo$education, levels = c("highSchool","someCollege","4year","someGrad","Grad"))
subinfo$gender <- str_replace_all(subinfo$gender, "female", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "FEMALE", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "F$", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "f$", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "Female ", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "^male", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^Male ", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^MALE", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^M$", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^m$", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^Maleq", "Male")
qplot(ses, data=subinfo)
qplot(children, data=subinfo)
qplot(gender, data=subinfo)
qplot(education, data=subinfo)
qplot(age, data=subinfo)
qplot(language, data=subinfo)
Now look at mean ratings across sentences.
ms <- d %>%
group_by(category, instrument, short_sent) %>%
multi_boot_standard(col = "rating") %>%
arrange(instrument, category, desc(mean))
ms$short_sent_ord <- factor(ms$short_sent,
levels = ms$short_sent)
Plot attitude.
qplot(short_sent_ord, mean, col = category,
ymin = ci_lower, ymax = ci_upper,
geom = "pointrange",
data = filter(ms, instrument == "attitudes")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
xlab("") +
ylab("Mean Rating") +
ylim(c(1,7)) +
scale_colour_solarized()
Attitudes items.
wide.attitudes <- d %>%
filter(instrument == "attitudes") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.mat <- as.matrix(select(wide.attitudes, -workerid))
alpha(x = alpha.mat)
## Some items ( punish misbehavior ) were negatively correlated with the total scale and probably should be reversed. To do this, run the function again with the 'check.keys=TRUE' option
##
## Reliability analysis
## Call: alpha(x = alpha.mat)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.76 0.77 0.85 0.12 3.4 0.04 5.3 0.54
##
## lower alpha upper 95% confidence boundaries
## 0.68 0.76 0.84
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc)
## babies blank slate 0.75 0.77 0.84
## baby pattern recognition 0.75 0.76 0.84
## baby pleasure 0.75 0.76 0.84
## behavior control 0.75 0.77 0.84
## behavior problems no love 0.76 0.78 0.85
## best have few rules 0.76 0.77 0.85
## child preferences 0.75 0.76 0.84
## early abstraction 0.76 0.77 0.84
## examples 0.74 0.76 0.83
## follow in good 0.75 0.76 0.84
## impulse control 0.74 0.76 0.83
## learn from play 0.75 0.76 0.84
## learning from senses 0.75 0.76 0.84
## learning similar 0.75 0.77 0.84
## newborn ignorance 0.75 0.77 0.84
## parents should be supportive 0.74 0.76 0.83
## physical maternal affection important 0.75 0.76 0.84
## piaget 0.75 0.76 0.84
## punish misbehavior 0.76 0.78 0.85
## reasons for rules 0.75 0.76 0.83
## respect adults 0.75 0.76 0.84
## reward is good 0.75 0.77 0.84
## safe and loving environment 0.75 0.77 0.84
## stage theory 0.75 0.77 0.84
## strict rules good 0.75 0.77 0.84
## strong bond with mom 0.74 0.76 0.84
## average_r S/N alpha se
## babies blank slate 0.12 3.3 0.042
## baby pattern recognition 0.11 3.2 0.041
## baby pleasure 0.11 3.2 0.042
## behavior control 0.12 3.3 0.041
## behavior problems no love 0.12 3.5 0.040
## best have few rules 0.12 3.4 0.040
## child preferences 0.12 3.3 0.041
## early abstraction 0.12 3.4 0.040
## examples 0.11 3.2 0.042
## follow in good 0.12 3.3 0.041
## impulse control 0.11 3.1 0.043
## learn from play 0.11 3.2 0.041
## learning from senses 0.11 3.2 0.042
## learning similar 0.12 3.3 0.041
## newborn ignorance 0.12 3.3 0.041
## parents should be supportive 0.11 3.2 0.042
## physical maternal affection important 0.11 3.2 0.042
## piaget 0.11 3.2 0.042
## punish misbehavior 0.12 3.5 0.039
## reasons for rules 0.11 3.1 0.042
## respect adults 0.11 3.2 0.041
## reward is good 0.12 3.4 0.041
## safe and loving environment 0.12 3.3 0.041
## stage theory 0.12 3.3 0.041
## strict rules good 0.12 3.3 0.042
## strong bond with mom 0.11 3.1 0.043
##
## Item statistics
## n raw.r std.r r.cor r.drop mean
## babies blank slate 100 0.44 0.39 0.353 0.32 4.6
## baby pattern recognition 100 0.35 0.43 0.406 0.28 5.9
## baby pleasure 100 0.40 0.44 0.407 0.32 5.8
## behavior control 100 0.41 0.38 0.347 0.30 4.9
## behavior problems no love 100 0.24 0.20 0.137 0.12 4.4
## best have few rules 100 0.24 0.22 0.168 0.14 3.7
## child preferences 100 0.32 0.39 0.366 0.25 6.0
## early abstraction 100 0.25 0.30 0.258 0.16 5.8
## examples 100 0.46 0.48 0.467 0.38 5.7
## follow in good 100 0.37 0.39 0.359 0.29 5.6
## impulse control 100 0.54 0.52 0.502 0.46 5.0
## learn from play 100 0.33 0.41 0.386 0.27 6.3
## learning from senses 100 0.44 0.46 0.440 0.36 5.9
## learning similar 100 0.42 0.37 0.336 0.30 4.3
## newborn ignorance 100 0.39 0.36 0.315 0.28 4.8
## parents should be supportive 100 0.49 0.43 0.415 0.38 3.5
## physical maternal affection important 100 0.43 0.43 0.411 0.33 5.7
## piaget 100 0.44 0.47 0.443 0.37 6.0
## punish misbehavior 100 0.24 0.14 0.096 0.10 3.7
## reasons for rules 100 0.45 0.51 0.495 0.38 6.1
## respect adults 100 0.39 0.41 0.377 0.32 6.1
## reward is good 100 0.32 0.28 0.225 0.21 4.6
## safe and loving environment 100 0.35 0.38 0.354 0.25 5.7
## stage theory 100 0.27 0.36 0.323 0.20 6.1
## strict rules good 100 0.43 0.37 0.338 0.32 4.8
## strong bond with mom 100 0.53 0.54 0.517 0.45 5.7
## sd
## babies blank slate 1.88
## baby pattern recognition 1.10
## baby pleasure 1.32
## behavior control 1.59
## behavior problems no love 1.63
## best have few rules 1.46
## child preferences 1.04
## early abstraction 1.22
## examples 1.31
## follow in good 1.11
## impulse control 1.41
## learn from play 0.93
## learning from senses 1.35
## learning similar 1.78
## newborn ignorance 1.71
## parents should be supportive 1.79
## physical maternal affection important 1.44
## piaget 1.13
## punish misbehavior 1.88
## reasons for rules 1.13
## respect adults 1.11
## reward is good 1.55
## safe and loving environment 1.39
## stage theory 1.00
## strict rules good 1.67
## strong bond with mom 1.40
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 7
## babies blank slate 0.07 0.09 0.15 0.16 0.10 0.25 0.18
## baby pattern recognition 0.00 0.00 0.05 0.09 0.08 0.46 0.32
## baby pleasure 0.02 0.02 0.01 0.09 0.17 0.36 0.33
## behavior control 0.05 0.05 0.05 0.19 0.24 0.27 0.15
## behavior problems no love 0.06 0.08 0.13 0.25 0.16 0.25 0.07
## best have few rules 0.05 0.14 0.29 0.25 0.13 0.10 0.04
## child preferences 0.00 0.01 0.01 0.07 0.17 0.37 0.37
## early abstraction 0.00 0.00 0.05 0.13 0.15 0.28 0.39
## examples 0.01 0.01 0.04 0.13 0.20 0.28 0.33
## follow in good 0.00 0.01 0.03 0.12 0.22 0.40 0.22
## impulse control 0.01 0.05 0.10 0.17 0.23 0.32 0.12
## learn from play 0.00 0.00 0.01 0.06 0.09 0.34 0.50
## learning from senses 0.03 0.00 0.04 0.05 0.14 0.37 0.37
## learning similar 0.08 0.11 0.14 0.18 0.16 0.24 0.09
## newborn ignorance 0.06 0.04 0.14 0.13 0.23 0.23 0.17
## parents should be supportive 0.13 0.23 0.15 0.22 0.09 0.11 0.07
## physical maternal affection important 0.04 0.01 0.01 0.09 0.17 0.33 0.35
## piaget 0.00 0.01 0.03 0.08 0.13 0.36 0.39
## punish misbehavior 0.19 0.12 0.13 0.21 0.15 0.13 0.07
## reasons for rules 0.00 0.01 0.04 0.05 0.11 0.35 0.44
## respect adults 0.00 0.01 0.01 0.09 0.14 0.28 0.47
## reward is good 0.04 0.06 0.12 0.20 0.27 0.20 0.11
## safe and loving environment 0.00 0.02 0.08 0.10 0.16 0.25 0.39
## stage theory 0.00 0.00 0.02 0.05 0.18 0.31 0.44
## strict rules good 0.05 0.05 0.12 0.20 0.21 0.20 0.17
## strong bond with mom 0.02 0.03 0.02 0.09 0.19 0.32 0.33
## miss
## babies blank slate 0
## baby pattern recognition 0
## baby pleasure 0
## behavior control 0
## behavior problems no love 0
## best have few rules 0
## child preferences 0
## early abstraction 0
## examples 0
## follow in good 0
## impulse control 0
## learn from play 0
## learning from senses 0
## learning similar 0
## newborn ignorance 0
## parents should be supportive 0
## physical maternal affection important 0
## piaget 0
## punish misbehavior 0
## reasons for rules 0
## respect adults 0
## reward is good 0
## safe and loving environment 0
## stage theory 0
## strict rules good 0
## strong bond with mom 0
Get eigenvalues for determining number of factors.
att.mat <- select(wide.attitudes, -workerid)
ev <- eigen(cor(x=att.mat)) # get eigenvalues
ap <- parallel(subject=nrow(att.mat), var=ncol(att.mat),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)
Now plot factor analysis
n.factors <- 6
af <- factanal(x = att.mat, factors = n.factors, scores = "regression")
loadings <- data.frame(af$loadings[,1:n.factors]) %>%
mutate(item = rownames(af$loadings)) %>%
gather(factor, loading, starts_with("Factor"))
qplot(factor, item, fill=loading, geom="tile", data = loadings) +
scale_fill_continuous(low="#000000", high="#FFFFFF")
qplot(factor, item, fill=loading > .4, geom="tile", data = loadings) +
scale_fill_solarized()
Merge back in subject id info.
a.scores <- af$scores %>%
data.frame %>%
mutate(workerid = wide.attitudes$workerid) %>%
left_join(subinfo)
And summary:
Note: I re-labeled the factors, because the order appears different this time.
a.factor.names <- c("Early learning","Rules and Respect" ,"Affection", "Early Abstraction", "Behavior Control")
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(ses != "") %>%
mutate(high.ses = ses > 5,
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(high.ses, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=high.ses,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(gender != "") %>%
mutate(male.gender = gender == "Male",
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(male.gender, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=male.gender,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(education != "") %>%
mutate(high.education = (education == "4year" |education == "someGrad" | education == "Grad"),
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(high.education, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=high.education,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(age != "") %>%
mutate(young.age = (age == "0-19" |age == "20-29"),
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(young.age, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=young.age,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
attit <- d %>%
filter(instrument == "attitudes") %>%
group_by(workerid) %>%
summarise(mean = mean(rating)) %>%
left_join(subinfo) %>%
filter(ses != "") %>%
mutate(high_ses = ses > 5) %>%
group_by(high_ses) %>%
multi_boot_standard(col = "mean")
qplot(high_ses, mean, ymin = ci_lower, ymax = ci_upper,
group = 1,
geom = c("pointrange","smooth"), method = "lm", data = attit)
Next, let’s look at a principal components analysis for the data. This approach tries to understand underlying orthogonal dimensions of variability.
First the attitude items.
row.names(wide.attitudes) <- wide.attitudes$workerid
pcs <- princomp(x = select(wide.attitudes, -workerid))
plot(pcs)
Let’s try to plot the items by hand for more detail.
pc.items <- data.frame(pc1 = pcs$loadings[,1],
pc2 = pcs$loadings[,2],
pc3 = pcs$loadings[,3],
short_sent = row.names(pcs$loadings)) %>%
left_join(labels)
qplot(pc1, pc2, col = category,
label = short_sent, hjust = 1,
geom = c("text","point"),
data = pc.items) +
scale_colour_solarized() +
xlim(-.15,.35)
pc.inds <- data.frame(pc1 = as.numeric(pcs$scores[,1]),
pc2 = as.numeric(pcs$scores[,2]),
pc3 = as.numeric(pcs$scores[,3]),
workerid = row.names(pcs$scores)) %>%
left_join(subinfo)
qplot(pc1, pc2, col = children,
hjust = 1,
data = pc.inds) +
scale_colour_solarized()
pc.data <- data.frame(pc1 = pcs$loadings[,1],
pc2 = pcs$loadings[,2],
pc3 = pcs$loadings[,3],
short_sent = row.names(pcs$loadings)) %>%
left_join(labels)
qplot(pc1, pc2, col = category,
label = short_sent, hjust = 1,
geom = c("text","point"),
data = pc.data) +
xlim(-.7,.3) +
scale_colour_solarized()