Data analysis of basic parenting values/attitudes survey, version 2.

1 Data preprocessing

Preliminaries.

## [1] "dplyr"   "langcog" "tidyr"   "ggplot2" "lme4"
## 
## Attaching package: 'langcog'
## 
## The following object is masked from 'package:base':
## 
##     scale
## 
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:ggplot2':
## 
##     %+%
## 
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: boot
## 
## Attaching package: 'boot'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## 
## The following object is masked from 'package:boot':
## 
##     melanoma
## 
## 
## Attaching package: 'nFactors'
## 
## The following object is masked from 'package:lattice':
## 
##     parallel

Read in files and consolidate to the same directory.

files <- dir("../production-results/e3/")
d.raw <- data.frame()

for (f in files) {
  jf <- paste("../production-results/e3/",f,sep="")
  jd <- fromJSON(paste(readLines(jf), collapse=""))
  
  # clean up instruction trial
  sent <- jd$answers$data$sentence
  rating <- as.numeric(jd$answers$data$rating)
  trial_type <- jd$answer$data$trial_type
  
  id <- data.frame(workerid = jd$WorkerId, 
                   sent = sent,
                   rating = rating,
                   children = jd$answers$data$children,
                   language = jd$answer$data$homelang,
                   ses = jd$answer$data$ladder,
                   gender = jd$answer$data$gender,
                   age = jd$answer$data$age,
                   education = jd$answer$data$education)
  d.raw <- bind_rows(d.raw, id)
}

Map on question short forms so that we can use these instead.

labels <- read.csv("sent_forms_e3.csv")
labels$sent <- as.character(labels$sent)

Clean up labels.

d.raw$sent <- as.character(d.raw$sent)
d.raw$sent <- str_replace_all(d.raw$sent, "'", "")
d.raw$sent <- str_replace_all(d.raw$sent, "’", "")
d.raw$sent <- str_replace_all(d.raw$sent, "“", "")
d.raw$sent <- str_replace_all(d.raw$sent, "”", "")
d.raw$sent <- str_replace_all(d.raw$sent, "‘", "")
d.raw$sent <- str_replace_all(d.raw$sent, "â", "'")

Merge.

d <- left_join(d.raw, labels) %>%
  mutate(children = grepl("yes", children, ignore.case=TRUE))

Plot demographic info.

subinfo <- d %>% 
  group_by(workerid) %>%
  summarise(children = children[1],
            ses = ses[1],
            gender = gender[1],
            education = education[1],
            age = age[1],
            language = language[1])

subinfo$education <- factor(subinfo$education, levels = c("highSchool","someCollege","4year","someGrad","Grad"))

subinfo$gender <- str_replace_all(subinfo$gender, "female", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "FEMALE", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "F$", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "f$", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "Female ", "Female")
subinfo$gender <- str_replace_all(subinfo$gender, "^male", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^Male ", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^MALE", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^M$", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^m$", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "^Maleq", "Male")

qplot(ses, data=subinfo)

qplot(children, data=subinfo)

qplot(gender, data=subinfo)

qplot(education, data=subinfo)

qplot(age, data=subinfo)

qplot(language, data=subinfo)

2 Basic analyses

Now look at mean ratings across sentences.

ms <- d %>%
  group_by(category, instrument, short_sent) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(instrument, category, desc(mean)) 

ms$short_sent_ord <- factor(ms$short_sent, 
                             levels = ms$short_sent)

Plot attitude.

qplot(short_sent_ord, mean, col = category,
      ymin = ci_lower, ymax = ci_upper, 
      geom = "pointrange",
      data = filter(ms, instrument == "attitudes")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
  xlab("") + 
  ylab("Mean Rating") + 
  ylim(c(1,7)) + 
  scale_colour_solarized()

3 Scale reliability

Useful chapter here

Attitudes items.

wide.attitudes <- d %>% 
  filter(instrument == "attitudes") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.mat <- as.matrix(select(wide.attitudes, -workerid))
alpha(x = alpha.mat)
## Some items ( punish misbehavior ) were negatively correlated with the total scale and probably should be reversed.  To do this, run the function again with the 'check.keys=TRUE' option
## 
## Reliability analysis   
## Call: alpha(x = alpha.mat)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean   sd
##       0.76      0.77    0.85      0.12 3.4 0.04  5.3 0.54
## 
##  lower alpha upper     95% confidence boundaries
## 0.68 0.76 0.84 
## 
##  Reliability if an item is dropped:
##                                       raw_alpha std.alpha G6(smc)
## babies blank slate                         0.75      0.77    0.84
## baby pattern recognition                   0.75      0.76    0.84
## baby pleasure                              0.75      0.76    0.84
## behavior control                           0.75      0.77    0.84
## behavior problems no love                  0.76      0.78    0.85
## best have few rules                        0.76      0.77    0.85
## child preferences                          0.75      0.76    0.84
## early abstraction                          0.76      0.77    0.84
## examples                                   0.74      0.76    0.83
## follow in good                             0.75      0.76    0.84
## impulse control                            0.74      0.76    0.83
## learn from play                            0.75      0.76    0.84
## learning from senses                       0.75      0.76    0.84
## learning similar                           0.75      0.77    0.84
## newborn ignorance                          0.75      0.77    0.84
## parents should be supportive               0.74      0.76    0.83
## physical maternal affection important      0.75      0.76    0.84
## piaget                                     0.75      0.76    0.84
## punish misbehavior                         0.76      0.78    0.85
## reasons for rules                          0.75      0.76    0.83
## respect adults                             0.75      0.76    0.84
## reward is good                             0.75      0.77    0.84
## safe and loving environment                0.75      0.77    0.84
## stage theory                               0.75      0.77    0.84
## strict rules good                          0.75      0.77    0.84
## strong bond with mom                       0.74      0.76    0.84
##                                       average_r S/N alpha se
## babies blank slate                         0.12 3.3    0.042
## baby pattern recognition                   0.11 3.2    0.041
## baby pleasure                              0.11 3.2    0.042
## behavior control                           0.12 3.3    0.041
## behavior problems no love                  0.12 3.5    0.040
## best have few rules                        0.12 3.4    0.040
## child preferences                          0.12 3.3    0.041
## early abstraction                          0.12 3.4    0.040
## examples                                   0.11 3.2    0.042
## follow in good                             0.12 3.3    0.041
## impulse control                            0.11 3.1    0.043
## learn from play                            0.11 3.2    0.041
## learning from senses                       0.11 3.2    0.042
## learning similar                           0.12 3.3    0.041
## newborn ignorance                          0.12 3.3    0.041
## parents should be supportive               0.11 3.2    0.042
## physical maternal affection important      0.11 3.2    0.042
## piaget                                     0.11 3.2    0.042
## punish misbehavior                         0.12 3.5    0.039
## reasons for rules                          0.11 3.1    0.042
## respect adults                             0.11 3.2    0.041
## reward is good                             0.12 3.4    0.041
## safe and loving environment                0.12 3.3    0.041
## stage theory                               0.12 3.3    0.041
## strict rules good                          0.12 3.3    0.042
## strong bond with mom                       0.11 3.1    0.043
## 
##  Item statistics 
##                                         n raw.r std.r r.cor r.drop mean
## babies blank slate                    100  0.44  0.39 0.353   0.32  4.6
## baby pattern recognition              100  0.35  0.43 0.406   0.28  5.9
## baby pleasure                         100  0.40  0.44 0.407   0.32  5.8
## behavior control                      100  0.41  0.38 0.347   0.30  4.9
## behavior problems no love             100  0.24  0.20 0.137   0.12  4.4
## best have few rules                   100  0.24  0.22 0.168   0.14  3.7
## child preferences                     100  0.32  0.39 0.366   0.25  6.0
## early abstraction                     100  0.25  0.30 0.258   0.16  5.8
## examples                              100  0.46  0.48 0.467   0.38  5.7
## follow in good                        100  0.37  0.39 0.359   0.29  5.6
## impulse control                       100  0.54  0.52 0.502   0.46  5.0
## learn from play                       100  0.33  0.41 0.386   0.27  6.3
## learning from senses                  100  0.44  0.46 0.440   0.36  5.9
## learning similar                      100  0.42  0.37 0.336   0.30  4.3
## newborn ignorance                     100  0.39  0.36 0.315   0.28  4.8
## parents should be supportive          100  0.49  0.43 0.415   0.38  3.5
## physical maternal affection important 100  0.43  0.43 0.411   0.33  5.7
## piaget                                100  0.44  0.47 0.443   0.37  6.0
## punish misbehavior                    100  0.24  0.14 0.096   0.10  3.7
## reasons for rules                     100  0.45  0.51 0.495   0.38  6.1
## respect adults                        100  0.39  0.41 0.377   0.32  6.1
## reward is good                        100  0.32  0.28 0.225   0.21  4.6
## safe and loving environment           100  0.35  0.38 0.354   0.25  5.7
## stage theory                          100  0.27  0.36 0.323   0.20  6.1
## strict rules good                     100  0.43  0.37 0.338   0.32  4.8
## strong bond with mom                  100  0.53  0.54 0.517   0.45  5.7
##                                         sd
## babies blank slate                    1.88
## baby pattern recognition              1.10
## baby pleasure                         1.32
## behavior control                      1.59
## behavior problems no love             1.63
## best have few rules                   1.46
## child preferences                     1.04
## early abstraction                     1.22
## examples                              1.31
## follow in good                        1.11
## impulse control                       1.41
## learn from play                       0.93
## learning from senses                  1.35
## learning similar                      1.78
## newborn ignorance                     1.71
## parents should be supportive          1.79
## physical maternal affection important 1.44
## piaget                                1.13
## punish misbehavior                    1.88
## reasons for rules                     1.13
## respect adults                        1.11
## reward is good                        1.55
## safe and loving environment           1.39
## stage theory                          1.00
## strict rules good                     1.67
## strong bond with mom                  1.40
## 
## Non missing response frequency for each item
##                                          1    2    3    4    5    6    7
## babies blank slate                    0.07 0.09 0.15 0.16 0.10 0.25 0.18
## baby pattern recognition              0.00 0.00 0.05 0.09 0.08 0.46 0.32
## baby pleasure                         0.02 0.02 0.01 0.09 0.17 0.36 0.33
## behavior control                      0.05 0.05 0.05 0.19 0.24 0.27 0.15
## behavior problems no love             0.06 0.08 0.13 0.25 0.16 0.25 0.07
## best have few rules                   0.05 0.14 0.29 0.25 0.13 0.10 0.04
## child preferences                     0.00 0.01 0.01 0.07 0.17 0.37 0.37
## early abstraction                     0.00 0.00 0.05 0.13 0.15 0.28 0.39
## examples                              0.01 0.01 0.04 0.13 0.20 0.28 0.33
## follow in good                        0.00 0.01 0.03 0.12 0.22 0.40 0.22
## impulse control                       0.01 0.05 0.10 0.17 0.23 0.32 0.12
## learn from play                       0.00 0.00 0.01 0.06 0.09 0.34 0.50
## learning from senses                  0.03 0.00 0.04 0.05 0.14 0.37 0.37
## learning similar                      0.08 0.11 0.14 0.18 0.16 0.24 0.09
## newborn ignorance                     0.06 0.04 0.14 0.13 0.23 0.23 0.17
## parents should be supportive          0.13 0.23 0.15 0.22 0.09 0.11 0.07
## physical maternal affection important 0.04 0.01 0.01 0.09 0.17 0.33 0.35
## piaget                                0.00 0.01 0.03 0.08 0.13 0.36 0.39
## punish misbehavior                    0.19 0.12 0.13 0.21 0.15 0.13 0.07
## reasons for rules                     0.00 0.01 0.04 0.05 0.11 0.35 0.44
## respect adults                        0.00 0.01 0.01 0.09 0.14 0.28 0.47
## reward is good                        0.04 0.06 0.12 0.20 0.27 0.20 0.11
## safe and loving environment           0.00 0.02 0.08 0.10 0.16 0.25 0.39
## stage theory                          0.00 0.00 0.02 0.05 0.18 0.31 0.44
## strict rules good                     0.05 0.05 0.12 0.20 0.21 0.20 0.17
## strong bond with mom                  0.02 0.03 0.02 0.09 0.19 0.32 0.33
##                                       miss
## babies blank slate                       0
## baby pattern recognition                 0
## baby pleasure                            0
## behavior control                         0
## behavior problems no love                0
## best have few rules                      0
## child preferences                        0
## early abstraction                        0
## examples                                 0
## follow in good                           0
## impulse control                          0
## learn from play                          0
## learning from senses                     0
## learning similar                         0
## newborn ignorance                        0
## parents should be supportive             0
## physical maternal affection important    0
## piaget                                   0
## punish misbehavior                       0
## reasons for rules                        0
## respect adults                           0
## reward is good                           0
## safe and loving environment              0
## stage theory                             0
## strict rules good                        0
## strong bond with mom                     0

4 Factor analysis

Get eigenvalues for determining number of factors.

4.1 Attitudes.

att.mat <- select(wide.attitudes, -workerid)
ev <- eigen(cor(x=att.mat)) # get eigenvalues
ap <- parallel(subject=nrow(att.mat), var=ncol(att.mat),
               rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

Now plot factor analysis

n.factors <- 6
af <- factanal(x = att.mat, factors = n.factors, scores = "regression")
loadings <- data.frame(af$loadings[,1:n.factors]) %>%
  mutate(item = rownames(af$loadings)) %>%
  gather(factor, loading, starts_with("Factor"))

qplot(factor, item, fill=loading, geom="tile", data = loadings) + 
  scale_fill_continuous(low="#000000", high="#FFFFFF")

qplot(factor, item, fill=loading > .4, geom="tile", data = loadings) + 
  scale_fill_solarized()

Merge back in subject id info.

a.scores <- af$scores %>%
  data.frame %>%
  mutate(workerid = wide.attitudes$workerid) %>%
  left_join(subinfo)

And summary:

Note: I re-labeled the factors, because the order appears different this time.

4.1.1 Factors by SES

a.factor.names <- c("Early learning","Rules and Respect" ,"Affection", "Early Abstraction", "Behavior Control")

ms <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(ses != "") %>%
  mutate(high.ses = ses > 5, 
         factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(high.ses, factor.name) %>%
  multi_boot_standard(col = "score") 

qplot(factor.name, mean, fill=high.ses, 
      geom = "bar", stat = "identity", position = "dodge", 
      data=ms) + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

4.1.2 Factors by Gender

ms <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(gender != "") %>%
  mutate(male.gender = gender == "Male",
         factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(male.gender, factor.name) %>%
  multi_boot_standard(col = "score") 

qplot(factor.name, mean, fill=male.gender, 
      geom = "bar", stat = "identity", position = "dodge", 
      data=ms) + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

4.1.3 Factors by Education

ms <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(education != "") %>%
  mutate(high.education = (education == "4year" |education == "someGrad" | education == "Grad"),
         factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(high.education, factor.name) %>%
  multi_boot_standard(col = "score") 

qplot(factor.name, mean, fill=high.education, 
      geom = "bar", stat = "identity", position = "dodge", 
      data=ms) + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

4.1.4 Factors by Age

ms <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(age != "") %>%
  mutate(young.age = (age == "0-19" |age == "20-29"),
         factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(young.age, factor.name) %>%
  multi_boot_standard(col = "score") 

qplot(factor.name, mean, fill=young.age, 
      geom = "bar", stat = "identity", position = "dodge", 
      data=ms) + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

5 Attitudes by SES

attit <- d %>% 
  filter(instrument == "attitudes") %>%
  group_by(workerid) %>%
  summarise(mean = mean(rating)) %>%
  left_join(subinfo) %>%
  filter(ses != "") %>%
  mutate(high_ses = ses > 5) %>%
  group_by(high_ses) %>%
  multi_boot_standard(col = "mean")

qplot(high_ses, mean, ymin = ci_lower, ymax = ci_upper, 
      group = 1,
      geom = c("pointrange","smooth"), method = "lm", data = attit)

6 Principal components analysis

Next, let’s look at a principal components analysis for the data. This approach tries to understand underlying orthogonal dimensions of variability.

6.1 Attitude items

First the attitude items.

row.names(wide.attitudes) <- wide.attitudes$workerid
pcs <- princomp(x = select(wide.attitudes, -workerid))
plot(pcs)

Let’s try to plot the items by hand for more detail.

pc.items <- data.frame(pc1 = pcs$loadings[,1],
                      pc2 = pcs$loadings[,2],
                      pc3 = pcs$loadings[,3],
                      short_sent = row.names(pcs$loadings)) %>%
  left_join(labels)

qplot(pc1, pc2, col = category,
      label = short_sent, hjust = 1,
      geom = c("text","point"),
      data = pc.items) + 
  scale_colour_solarized() + 
  xlim(-.15,.35)

pc.inds <- data.frame(pc1 = as.numeric(pcs$scores[,1]),
                      pc2 = as.numeric(pcs$scores[,2]),
                      pc3 = as.numeric(pcs$scores[,3]),
                      workerid = row.names(pcs$scores)) %>%
  left_join(subinfo)
                      
qplot(pc1, pc2, col = children,
      hjust = 1,
      data = pc.inds) + 
  scale_colour_solarized()

pc.data <- data.frame(pc1 = pcs$loadings[,1],
                      pc2 = pcs$loadings[,2],
                      pc3 = pcs$loadings[,3],
                      short_sent = row.names(pcs$loadings)) %>%
  left_join(labels)

qplot(pc1, pc2, col = category,
      label = short_sent, hjust = 1,
      geom = c("text","point"),
      data = pc.data) + 
  xlim(-.7,.3) +
  scale_colour_solarized()

7 Conclusions