Data analysis of basic parenting values/attitudes survey.

1 Data preprocessing

Preliminaries.

## [1] "dplyr"   "langcog" "tidyr"   "ggplot2" "lme4"
## 
## Attaching package: 'langcog'
## 
## The following object is masked from 'package:base':
## 
##     scale
## 
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:ggplot2':
## 
##     %+%
## 
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: boot
## 
## Attaching package: 'boot'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## 
## The following object is masked from 'package:boot':
## 
##     melanoma
## 
## 
## Attaching package: 'nFactors'
## 
## The following object is masked from 'package:lattice':
## 
##     parallel

Read in files and consolidate to the same directory.

files <- dir("../production-results/e7/")
d.raw <- data.frame()

for (f in files) {
  jf <- paste("../production-results/e7/",f,sep="")
  jd <- fromJSON(paste(readLines(jf), collapse=""))
  
  # clean up instruction trial
  sent <- jd$answers$data$sentence
  rating <- as.numeric(jd$answers$data$rating) 
  trial_type <- jd$answer$data$trial_type
  sent <- sent[trial_type != "2afc_instructions"]
  trial_type <- trial_type[trial_type != "2afc_instructions"]
  race <- as.character(jd$answers$data$race[1])
  
  id <- data.frame(workerid = jd$WorkerId, 
                   sent = sent,
                   rating = rating,
                   children = jd$answers$data$children,
                   language = jd$answers$data$homelang,
                   ses = jd$answers$data$ladder,
                   gender = jd$answers$data$gender,
                   age = jd$answers$data$age,
                   education = jd$answers$data$education,
                   ethnicity = jd$answers$data$ethnicity,
                   childAgeYoung = jd$answers$data$childAgeYoung,
                   childAgeOld = jd$answers$data$childAgeOld,
                   race = race)
  d.raw <- bind_rows(d.raw, id)
}

Map on question short forms so that we can use these instead.

labels <- read.csv("sent_forms_e7.csv")
labels$sent <- as.character(labels$sent)

Clean up labels.

d.raw$sent <- as.character(d.raw$sent)
d.raw$sent <- str_replace_all(d.raw$sent, "'", "")
d.raw$sent <- str_replace_all(d.raw$sent, "’", "")
d.raw$sent <- str_replace_all(d.raw$sent, "“", "")
d.raw$sent <- str_replace_all(d.raw$sent, "”", "")
d.raw$sent <- str_replace_all(d.raw$sent, "‘", "")
d.raw$sent <- str_replace_all(d.raw$sent, "â", "")

Merge.

d <- left_join(d.raw, labels)
d$rating[d$reverse_code == 1] <- 6 - d$rating[d$reverse_code == 1]

Plot demographic info.

Unfortunately low ethnic/racial diversity.

subinfo <- d %>% 
  group_by(workerid) %>%
  summarise(age = age[1],
            gender = gender[1],
            children = children[1],
            ses = ses[1],
            education = education[1],
            language = language[1], 
            ethnicity = ethnicity[1],
            youngestChildAge = childAgeYoung[1],
            oldestChildAge = childAgeOld[1],
            race = race[1])

subinfo$education <- factor(subinfo$education, levels = c("highSchool","someCollege","4year","someGrad","Grad"))

subinfo$gender <- str_replace_all(subinfo$gender, "female|FEMALE|F$|f$|Femal$|Females|Females","Female")
subinfo$gender <- str_replace_all(subinfo$gender, "^male|^Male|^MALE|^M$|^m$|^Maleq|Make", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "29|24|25|28|32|33|45", "")
subinfo$gender <- str_replace_all(subinfo$gender, "males", "male")
subinfo$gender <- str_replace_all(subinfo$gender, " ", "")


subinfo$language <- str_replace_all(subinfo$language, "english|eNGLISH|Engliah|ENGLISH|^eng$|Enlgish", "English")
subinfo$language <- str_replace_all(subinfo$language," ", "")
subinfo$language <- str_replace_all(subinfo$language,"arabic", "Arabic")
subinfo$language <- str_replace_all(subinfo$language,"chinese", "Chinese")
subinfo$language <- str_replace_all(subinfo$language,"german", "German")
subinfo$language <- str_replace_all(subinfo$language,"tagalog", "Tagalog")

subinfo$youngestChildAge <- factor(subinfo$youngestChildAge, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))

subinfo$oldestChildAge <- factor(subinfo$oldestChildAge, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))

qplot(ses, data=subinfo)

qplot(children, data=subinfo)

qplot(gender, data=subinfo)

qplot(education, data=subinfo)

qplot(age, data=subinfo)

qplot(language, data=subinfo)

qplot(ethnicity, data=subinfo)

qplot(race, data=subinfo)

qplot(youngestChildAge, data=subinfo)

qplot(oldestChildAge, data=subinfo)

2 Basic analyses

Now look at mean ratings across sentences.

rating_count <- table(d$rating) 
rating_count
## 
##    0    1    2    3    4    5    6 
##   68  139  204  542  617  928 2002
prop.table(rating_count) 
## 
##          0          1          2          3          4          5 
## 0.01511111 0.03088889 0.04533333 0.12044444 0.13711111 0.20622222 
##          6 
## 0.44488889
ms <- d %>%
  group_by(category, instrument, short_sent, reverse_code) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(instrument, category, desc(mean)) 

ms$short_sent_ord <- factor(ms$short_sent, 
                             levels = ms$short_sent)

Plot attitude.

qplot(short_sent_ord, mean, col = category,
      ymin = ci_lower, ymax = ci_upper, pch = factor(reverse_code),
      geom = "pointrange",
      data = filter(ms, instrument == "attitudes")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
  xlab("") + 
  ylab("Mean Rating") + 
  ylim(c(0,6)) + 
  scale_colour_solarized()

Plot mean subscale scores.

mc <- d %>%
  group_by(category) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(category, desc(mean)) 

qplot(category, mean, 
      geom = "bar", stat = "identity", position = "dodge", 
      data=mc) + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9))

mcl <- d %>%
  group_by(category, workerid) %>%
  multi_boot_standard(col = "rating") %>%
  arrange(category, desc(mean))%>%
  spread(category, mean) 

3 Scale reliability

3.1 Whole Scale

wide.attitudes <- d %>% 
  filter(instrument == "attitudes") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.mat <- as.matrix(select(wide.attitudes, -workerid))
alpha(x = alpha.mat)
## 
## Reliability analysis   
## Call: alpha(x = alpha.mat)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.82      0.84    0.88      0.23 5.3 0.021  4.7 0.68
## 
##  lower alpha upper     95% confidence boundaries
## 0.78 0.82 0.86 
## 
##  Reliability if an item is dropped:
##                                     raw_alpha std.alpha G6(smc) average_r
## can learn good and bad                   0.81      0.82    0.87      0.22
## children should be grateful              0.83      0.85    0.88      0.24
## dont impose structure                    0.82      0.84    0.88      0.23
## give children comfort                    0.81      0.83    0.87      0.23
## holding and cradling                     0.80      0.82    0.87      0.22
## kids should not decide                   0.82      0.84    0.88      0.24
## learn by playing                         0.81      0.83    0.87      0.22
## learn from repetetive behaviors          0.82      0.84    0.88      0.24
## learn math before school                 0.80      0.82    0.86      0.21
## not spoiled with too much attention      0.82      0.84    0.87      0.24
## punish for small rules                   0.82      0.84    0.88      0.24
## reading before speaking                  0.80      0.83    0.87      0.22
## reasons for rules                        0.80      0.83    0.87      0.22
## respect parents teachers                 0.81      0.84    0.87      0.23
## safe loving environment                  0.80      0.82    0.86      0.21
## talk about emotions                      0.80      0.83    0.87      0.22
## too much affection does not harm         0.81      0.83    0.87      0.23
## wait when told                           0.81      0.84    0.87      0.23
##                                     S/N alpha se
## can learn good and bad              4.7    0.023
## children should be grateful         5.5    0.021
## dont impose structure               5.2    0.022
## give children comfort               5.0    0.023
## holding and cradling                4.7    0.023
## kids should not decide              5.3    0.022
## learn by playing                    4.9    0.023
## learn from repetetive behaviors     5.4    0.022
## learn math before school            4.6    0.024
## not spoiled with too much attention 5.3    0.022
## punish for small rules              5.4    0.022
## reading before speaking             4.7    0.023
## reasons for rules                   4.8    0.023
## respect parents teachers            5.1    0.022
## safe loving environment             4.6    0.023
## talk about emotions                 4.8    0.023
## too much affection does not harm    5.0    0.023
## wait when told                      5.1    0.022
## 
##  Item statistics 
##                                       n raw.r std.r r.cor r.drop mean   sd
## can learn good and bad              250  0.63  0.67  0.65   0.58  5.3 0.97
## children should be grateful         250  0.26  0.26  0.21   0.14  4.5 1.49
## dont impose structure               250  0.45  0.43  0.38   0.34  4.0 1.58
## give children comfort               250  0.51  0.53  0.49   0.43  5.2 1.17
## holding and cradling                250  0.66  0.68  0.67   0.61  5.4 0.97
## kids should not decide              250  0.38  0.38  0.32   0.27  4.0 1.48
## learn by playing                    250  0.52  0.55  0.52   0.45  5.3 1.06
## learn from repetetive behaviors     250  0.34  0.34  0.28   0.23  4.5 1.35
## learn math before school            250  0.72  0.72  0.72   0.66  5.2 1.27
## not spoiled with too much attention 250  0.42  0.37  0.34   0.28  3.5 1.87
## punish for small rules              250  0.35  0.33  0.27   0.24  3.6 1.50
## reading before speaking             250  0.66  0.66  0.64   0.59  5.3 1.39
## reasons for rules                   250  0.63  0.61  0.59   0.54  4.7 1.60
## respect parents teachers            250  0.47  0.48  0.46   0.38  4.9 1.26
## safe loving environment             250  0.72  0.75  0.75   0.68  5.6 0.87
## talk about emotions                 250  0.61  0.60  0.57   0.53  5.1 1.48
## too much affection does not harm    250  0.55  0.51  0.50   0.44  4.3 1.74
## wait when told                      250  0.45  0.48  0.44   0.36  4.9 1.22
## 
## Non missing response frequency for each item
##                                        0    1    2    3    4    5    6
## can learn good and bad              0.00 0.00 0.02 0.04 0.10 0.26 0.58
## children should be grateful         0.02 0.02 0.06 0.18 0.16 0.24 0.33
## dont impose structure               0.02 0.08 0.06 0.18 0.23 0.23 0.19
## give children comfort               0.01 0.01 0.01 0.08 0.10 0.23 0.56
## holding and cradling                0.00 0.00 0.02 0.04 0.08 0.24 0.62
## kids should not decide              0.02 0.04 0.10 0.24 0.21 0.22 0.19
## learn by playing                    0.00 0.00 0.01 0.09 0.10 0.20 0.60
## learn from repetetive behaviors     0.01 0.01 0.03 0.18 0.19 0.26 0.31
## learn math before school            0.01 0.02 0.02 0.07 0.08 0.20 0.60
## not spoiled with too much attention 0.07 0.11 0.11 0.22 0.15 0.14 0.20
## punish for small rules              0.04 0.08 0.09 0.23 0.31 0.16 0.10
## reading before speaking             0.01 0.04 0.02 0.04 0.07 0.14 0.68
## reasons for rules                   0.02 0.04 0.07 0.07 0.14 0.22 0.44
## respect parents teachers            0.00 0.01 0.04 0.12 0.10 0.30 0.42
## safe loving environment             0.00 0.00 0.01 0.04 0.06 0.10 0.79
## talk about emotions                 0.02 0.03 0.02 0.06 0.10 0.16 0.60
## too much affection does not harm    0.03 0.06 0.10 0.14 0.16 0.16 0.36
## wait when told                      0.00 0.01 0.02 0.14 0.14 0.26 0.42
##                                     miss
## can learn good and bad                 0
## children should be grateful            0
## dont impose structure                  0
## give children comfort                  0
## holding and cradling                   0
## kids should not decide                 0
## learn by playing                       0
## learn from repetetive behaviors        0
## learn math before school               0
## not spoiled with too much attention    0
## punish for small rules                 0
## reading before speaking                0
## reasons for rules                      0
## respect parents teachers               0
## safe loving environment                0
## talk about emotions                    0
## too much affection does not harm       0
## wait when told                         0

3.2 Rules and Respect Subscale

wide.rules_respect <- d %>% 
  filter(category == "rules_respect") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.rr <- as.matrix(select(wide.rules_respect, -workerid))
alpha(x = alpha.rr)
## 
## Reliability analysis   
## Call: alpha(x = alpha.rr)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd
##       0.69       0.7    0.69      0.28 2.3 0.045  4.3 0.9
## 
##  lower alpha upper     95% confidence boundaries
## 0.61 0.69 0.78 
## 
##  Reliability if an item is dropped:
##                             raw_alpha std.alpha G6(smc) average_r S/N
## children should be grateful      0.66      0.66    0.63      0.28 2.0
## dont impose structure            0.66      0.67    0.64      0.29 2.0
## kids should not decide           0.65      0.66    0.65      0.28 2.0
## punish for small rules           0.67      0.68    0.66      0.30 2.1
## respect parents teachers         0.61      0.61    0.57      0.24 1.6
## wait when told                   0.66      0.67    0.66      0.29 2.0
##                             alpha se
## children should be grateful    0.051
## dont impose structure          0.051
## kids should not decide         0.052
## punish for small rules         0.050
## respect parents teachers       0.056
## wait when told                 0.051
## 
##  Item statistics 
##                               n raw.r std.r r.cor r.drop mean  sd
## children should be grateful 250  0.62  0.62  0.54   0.40  4.5 1.5
## dont impose structure       250  0.63  0.61  0.49   0.40  4.0 1.6
## kids should not decide      250  0.64  0.63  0.50   0.43  4.0 1.5
## punish for small rules      250  0.60  0.59  0.44   0.37  3.6 1.5
## respect parents teachers    250  0.72  0.74  0.71   0.57  4.9 1.3
## wait when told              250  0.58  0.61  0.48   0.40  4.9 1.2
## 
## Non missing response frequency for each item
##                                0    1    2    3    4    5    6 miss
## children should be grateful 0.02 0.02 0.06 0.18 0.16 0.24 0.33    0
## dont impose structure       0.02 0.08 0.06 0.18 0.23 0.23 0.19    0
## kids should not decide      0.02 0.04 0.10 0.24 0.21 0.22 0.19    0
## punish for small rules      0.04 0.08 0.09 0.23 0.31 0.16 0.10    0
## respect parents teachers    0.00 0.01 0.04 0.12 0.10 0.30 0.42    0
## wait when told              0.00 0.01 0.02 0.14 0.14 0.26 0.42    0

3.3 Affection and Attachment Subscale

wide.affection <- d %>% 
  filter(category == "affection") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.af <- as.matrix(select(wide.affection, -workerid))
alpha(x = alpha.af)
## 
## Reliability analysis   
## Call: alpha(x = alpha.af)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean   sd
##       0.75      0.77    0.78      0.36 3.4 0.04  4.8 0.93
## 
##  lower alpha upper     95% confidence boundaries
## 0.67 0.75 0.82 
## 
##  Reliability if an item is dropped:
##                                     raw_alpha std.alpha G6(smc) average_r
## give children comfort                    0.73      0.76    0.77      0.39
## holding and cradling                     0.70      0.72    0.72      0.34
## not spoiled with too much attention      0.71      0.75    0.72      0.38
## safe loving environment                  0.71      0.72    0.71      0.34
## talk about emotions                      0.73      0.76    0.77      0.39
## too much affection does not harm         0.66      0.72    0.69      0.34
##                                     S/N alpha se
## give children comfort               3.2    0.044
## holding and cradling                2.5    0.047
## not spoiled with too much attention 3.0    0.047
## safe loving environment             2.6    0.046
## talk about emotions                 3.2    0.044
## too much affection does not harm    2.5    0.052
## 
##  Item statistics 
##                                       n raw.r std.r r.cor r.drop mean   sd
## give children comfort               250  0.56  0.62  0.48   0.39  5.2 1.17
## holding and cradling                250  0.68  0.75  0.69   0.57  5.4 0.97
## not spoiled with too much attention 250  0.74  0.65  0.58   0.52  3.5 1.87
## safe loving environment             250  0.65  0.73  0.68   0.54  5.6 0.87
## talk about emotions                 250  0.61  0.61  0.47   0.40  5.1 1.48
## too much affection does not harm    250  0.81  0.75  0.72   0.65  4.3 1.74
## 
## Non missing response frequency for each item
##                                        0    1    2    3    4    5    6
## give children comfort               0.01 0.01 0.01 0.08 0.10 0.23 0.56
## holding and cradling                0.00 0.00 0.02 0.04 0.08 0.24 0.62
## not spoiled with too much attention 0.07 0.11 0.11 0.22 0.15 0.14 0.20
## safe loving environment             0.00 0.00 0.01 0.04 0.06 0.10 0.79
## talk about emotions                 0.02 0.03 0.02 0.06 0.10 0.16 0.60
## too much affection does not harm    0.03 0.06 0.10 0.14 0.16 0.16 0.36
##                                     miss
## give children comfort                  0
## holding and cradling                   0
## not spoiled with too much attention    0
## safe loving environment                0
## talk about emotions                    0
## too much affection does not harm       0

3.4 Active Learning Subscale

wide.active_learning <- d %>% 
  filter(category == "active_learning") %>%
  select(workerid, short_sent, rating) %>% 
  spread(short_sent, rating)
alpha.al <- as.matrix(select(wide.active_learning, -workerid))
alpha(x = alpha.al)
## 
## Reliability analysis   
## Call: alpha(x = alpha.al)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.76      0.77    0.76      0.35 3.3 0.039  5.1 0.87
## 
##  lower alpha upper     95% confidence boundaries
## 0.68 0.76 0.84 
## 
##  Reliability if an item is dropped:
##                                 raw_alpha std.alpha G6(smc) average_r S/N
## can learn good and bad               0.73      0.74    0.72      0.36 2.8
## learn by playing                     0.72      0.73    0.70      0.35 2.7
## learn from repetetive behaviors      0.78      0.78    0.75      0.42 3.6
## learn math before school             0.70      0.71    0.68      0.33 2.5
## reading before speaking              0.70      0.71    0.69      0.33 2.5
## reasons for rules                    0.71      0.72    0.70      0.34 2.6
##                                 alpha se
## can learn good and bad             0.045
## learn by playing                   0.046
## learn from repetetive behaviors    0.041
## learn math before school           0.048
## reading before speaking            0.048
## reasons for rules                  0.047
## 
##  Item statistics 
##                                   n raw.r std.r r.cor r.drop mean   sd
## can learn good and bad          250  0.62  0.67  0.56   0.49  5.3 0.97
## learn by playing                250  0.66  0.70  0.61   0.52  5.3 1.06
## learn from repetetive behaviors 250  0.53  0.52  0.37   0.30  4.5 1.35
## learn math before school        250  0.75  0.74  0.69   0.60  5.2 1.27
## reading before speaking         250  0.75  0.73  0.68   0.59  5.3 1.39
## reasons for rules               250  0.75  0.72  0.64   0.56  4.7 1.60
## 
## Non missing response frequency for each item
##                                    0    1    2    3    4    5    6 miss
## can learn good and bad          0.00 0.00 0.02 0.04 0.10 0.26 0.58    0
## learn by playing                0.00 0.00 0.01 0.09 0.10 0.20 0.60    0
## learn from repetetive behaviors 0.01 0.01 0.03 0.18 0.19 0.26 0.31    0
## learn math before school        0.01 0.02 0.02 0.07 0.08 0.20 0.60    0
## reading before speaking         0.01 0.04 0.02 0.04 0.07 0.14 0.68    0
## reasons for rules               0.02 0.04 0.07 0.07 0.14 0.22 0.44    0

4 Subscales by demographics

Create a data frame that has subscale scores.

ss <- d %>%
  group_by(workerid, category) %>%
  summarize(rating = mean(rating))

ss <- left_join(ss, subinfo)

4.1 By SES

ms <- ss %>% 
  filter(ses != "") %>%
  mutate(ses.level = cut(as.numeric(ses), c(0,3,6,10))) %>%
  group_by(ses.level, category) %>%
  multi_boot_standard(col = "rating") 

ggplot(ms, aes(category, mean, fill=ses.level)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

4.2 By Race

Drop the sparsest groups.

ms <- ss %>% 
  filter(race != "NULL") %>%
  group_by(race, category) %>%
  filter(n() > 5) %>%
  multi_boot_standard(col = "rating")
  
ggplot(ms, aes(category, mean, fill=race)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

4.3 By Gender

ms <- ss %>% 
  filter(gender %in% c("Male","Female")) %>%
  group_by(gender, category) %>%
  multi_boot_standard(col = "rating") 

ggplot(ms, aes(category, mean, fill=gender)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

4.4 By Education

Drop the sparsest groups.

ms <- ss %>% 
  filter(!is.na(education)) %>%
  group_by(education, category) %>%
  filter(n() > 5) %>%
  multi_boot_standard(col = "rating") 

ggplot(ms, aes(category, mean, fill=education)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

4.5 By Age

Drop the sparsest groups again.

ms <- ss %>% 
  filter(!is.na(age)) %>%
  group_by(age, category) %>%
  filter(n() > 5) %>%
  multi_boot_standard(col = "rating") 

ggplot(ms, aes(category, mean, fill=age)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

4.6 By Children

ms <- ss %>% 
  filter(!is.na(children)) %>%
  group_by(children, category) %>%
  filter(n() > 5) %>%
  multi_boot_standard(col = "rating") 

ggplot(ms, aes(category, mean, fill=children)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

4.7 Factors by Ethnicity

ms <- ss %>% 
  filter(ethnicity != "") %>%
  group_by(ethnicity, category) %>%
  filter(n() > 5) %>%
  multi_boot_standard(col = "rating") 

ggplot(ms, aes(category, mean, fill=ethnicity)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

5 Factor analysis

Get eigenvalues for determining number of factors.

att.mat <- select(wide.attitudes, -workerid)

ev <- eigen(cor(x=att.mat)) # get eigenvalues

ap <- parallel(subject=nrow(att.mat), var=ncol(att.mat),
               rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

It still looks like Active/Early Learning and Affection are grouping together (with a separate factor that seems to focus on physical affection perhaps?), while Rules and Respect is separate. I’m still wondering if we need to re-think the structure of the subscales. It might be that Early Learning and Affection are not fully differentiated latent theories, and that they group together into a “child development is a project” theory.

item.levels <- c("respect parents teachers", "wait when told","children should be grateful","punish for small rules", "dont impose structure", "kids should not decide", "safe loving environment", "holding and cradling", "give children comfort", "talk about emotions", "not spoiled with too much attention","too much affection does not harm", "can learn good and bad", "learn by playing", "learn from repetetive behaviors", "reasons for rules", "learn math before school", "reading before speaking")

n.factors <- 3

af <- factanal(x=att.mat, factors = n.factors, scores = "regression")
loadings <- data.frame(af$loadings[,1:n.factors]) %>%
  mutate(item = rownames(af$loadings)) %>%
  gather(factor, loading, starts_with("Factor"))
loadings$item <- factor(loadings$item, item.levels)

labels$item <- labels$short_sent
loadings <- join(loadings, labels, by = "item")

qplot(factor, item, fill=loading, geom="tile", data = loadings) + 
  scale_fill_continuous(low="#000000", high="#FFFFFF") + 
  facet_grid(category ~ ., scales = "free_y")

qplot(factor, item, fill=loading > .4, geom="tile", data = loadings) + 
  scale_fill_solarized() + 
  facet_grid(category ~ ., scales = "free_y")

Merge back in subject id info.

a.scores <- af$scores %>%
  data.frame %>%
  mutate(workerid = as.character(wide.attitudes$workerid)) %>%
  left_join(subinfo)

5.1 Factors by SES

a.factor.names <- c("Affection and Early Learning","Rules and Respect", "Affection 2")

mf <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(ses != "") %>%
  mutate(ses.level = cut(as.numeric(ses), c(0,3,6,10)),
         factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(ses.level, factor.name) %>%
  multi_boot_standard(col = "score") 

ggplot(mf, aes(factor.name, mean, fill=ses.level)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

5.2 Factors by Gender

mf <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(gender %in% c("Male","Female")) %>%
  mutate(factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(gender, factor.name) %>%
  multi_boot_standard(col = "score") 

ggplot(mf, aes(factor.name, mean, fill=gender)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

5.3 Factors by Education

mf <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(education != "") %>%
  mutate(factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(education, factor.name) %>%
  multi_boot_standard(col = "score") 

ggplot(mf, aes(factor.name, mean, fill=education)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

5.4 Factors by Age

mf <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(age != "") %>%
  mutate(factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(age, factor.name) %>%
  multi_boot_standard(col = "score") 

ggplot(mf, aes(factor.name, mean, fill=age)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

5.5 Factors by Children

Do you have children?

mf <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(children != "") %>%
  mutate(children = children %in% c("1","2","3","4","5","morethan5"),
         factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(children, factor.name) %>%
  multi_boot_standard(col = "score") 

ggplot(mf, aes(factor.name, mean, fill=children)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

How many?

mf <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(children != "") %>%
  mutate(n.children = children,
         factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(n.children, factor.name) %>%
  multi_boot_standard(col = "score") 

ggplot(mf, aes(factor.name, mean, fill=n.children)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

5.6 Factors by Ethnicity

mf <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(ethnicity != "") %>%
  mutate(hispanic = ethnicity== "Hispanic",
         factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(hispanic, factor.name) %>%
  multi_boot_standard(col = "score") 

ggplot(mf, aes(factor.name, mean, fill=hispanic)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

5.7 Factors by Race

mf <- a.scores %>% 
  gather(factor, score, starts_with("Factor")) %>%
  filter(race != "NULL") %>%
  mutate(factor.num = as.numeric(str_replace(factor,"Factor","")),
         factor.name = a.factor.names[factor.num]) %>%
  group_by(race, factor.name) %>%
  multi_boot_standard(col = "score") 

ggplot(mf, aes(factor.name, mean, fill=race)) +
  geom_bar(stat="identity", position = "dodge") + 
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), 
             position = position_dodge(width = .9)) + 
  scale_fill_solarized()

6 Conclusions

Alpha correlations within subscales were lower this time, perhaps because there were fewer items in each subscale (6).

The three factors are not separating out exactly as predicted, but getting closer. The main issue is that the Affection items (this time,“talk about emotions”, “holding and cradling”, and “safe loving environment”) are loading onto the same factor as the Early Learning items.