Data analysis of basic parenting values/attitudes survey, version 2.
Preliminaries.
## [1] "dplyr" "langcog" "tidyr" "ggplot2" "lme4"
##
## Attaching package: 'langcog'
##
## The following object is masked from 'package:base':
##
## scale
##
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:ggplot2':
##
## %+%
##
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: boot
##
## Attaching package: 'boot'
##
## The following object is masked from 'package:psych':
##
## logit
##
## Loading required package: lattice
##
## Attaching package: 'lattice'
##
## The following object is masked from 'package:boot':
##
## melanoma
##
##
## Attaching package: 'nFactors'
##
## The following object is masked from 'package:lattice':
##
## parallel
Read in files and consolidate to the same directory.
files <- dir("../production-results/e4/")
d.raw <- data.frame()
for (f in files) {
jf <- paste("../production-results/e4/",f,sep="")
jd <- fromJSON(paste(readLines(jf), collapse=""))
# clean up instruction trial
sent <- jd$answers$data$sentence
rating <- (jd$answers$data$rating) #removed as.numeric to include DU responses
trial_type <- jd$answer$data$trial_type
sent <- sent[trial_type != "2afc_instructions"]
trial_type <- trial_type[trial_type != "2afc_instructions"]
#left out race because not sure yet how to deal with list format
id <- data.frame(workerid = jd$WorkerId,
sent = sent,
rating = rating,
children = jd$answers$data$children,
language = jd$answer$data$homelang,
ses = jd$answer$data$ladder,
gender = jd$answer$data$gender,
age = jd$answer$data$age,
education = jd$answer$data$education,
ethnicity = jd$answer$data$ethnicity,
childAgeYoung = jd$answer$data$childAgeYoung,
childAgeOld = jd$answer$data$childAgeOld
)
d.raw <- bind_rows(d.raw, id)
}
Map on question short forms so that we can use these instead.
labels <- read.csv("sent_forms_e4.csv")
labels$sent <- as.character(labels$sent)
Clean up labels.
d.raw$sent <- as.character(d.raw$sent)
d.raw$sent <- str_replace_all(d.raw$sent, "'", "")
d.raw$sent <- str_replace_all(d.raw$sent, "’", "")
d.raw$sent <- str_replace_all(d.raw$sent, "“", "")
d.raw$sent <- str_replace_all(d.raw$sent, "”", "")
d.raw$sent <- str_replace_all(d.raw$sent, "‘", "")
d.raw$sent <- str_replace_all(d.raw$sent, "â", "'")
Merge.
d <- left_join(d.raw, labels)
#set up reverse code variables so that items don't get reverse coded a second time.
d$reverse_code[d$reverse_code == 1 & d$rating == "1"] <- 1
d$reverse_code[d$reverse_code == 1 & d$rating == "2"] <- 2
d$reverse_code[d$reverse_code == 1 & d$rating == "3"] <- 3
d$reverse_code[d$reverse_code == 1 & d$rating == "5"] <- 5
d$reverse_code[d$reverse_code == 1 & d$rating == "6"] <- 6
d$reverse_code[d$reverse_code == 1 & d$rating == "7"] <- 7
#reverse code.
d$rating[d$reverse_code == 1 & d$rating == "1"] <- 7
d$rating[d$reverse_code == 2 & d$rating == "2"] <- 6
d$rating[d$reverse_code == 3 & d$rating == "3"] <- 5
d$rating[d$reverse_code == 5 & d$rating == "5"] <- 3
d$rating[d$reverse_code == 6 & d$rating == "6"] <- 2
d$rating[d$reverse_code == 7 & d$rating == "7"] <- 1
Plot demographic info.
Note: most of the respondants do not have children.
subinfo <- d %>%
group_by(workerid) %>%
summarise(age = age[1],
gender = gender[1],
children = children[1],
ses = ses[1],
education = education[1],
language = language[1],
ethnicity = ethnicity[1],
youngestChildAge = childAgeYoung[1],
oldestChildAge = childAgeOld[1]
)
subinfo$education <- factor(subinfo$education, levels = c("highSchool","someCollege","4year","someGrad","Grad"))
subinfo$gender <- str_replace_all(subinfo$gender, "female|FEMALE|F$|f$|Female ","Female")
subinfo$gender <- str_replace_all(subinfo$gender, "^male|^Male|^MALE|^M$|^m$|^Maleq|Make", "Male")
subinfo$gender <- str_replace_all(subinfo$gender, "29", "")
subinfo$language <- str_replace_all(subinfo$language, "english|eNGLISH|Engliah|ENGLISH|English ", "English")
subinfo$youngestChildAge <- factor(subinfo$youngestChildAge, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))
subinfo$oldestChildAge <- factor(subinfo$oldestChildAge, levels = c("","0to6mo","7to12mo","1y","2y","3y","4y","5y","6y","7y","8y","9y","10y","olderthan10"))
qplot(ses, data=subinfo)
qplot(children, data=subinfo)
qplot(gender, data=subinfo)
qplot(education, data=subinfo)
qplot(age, data=subinfo)
qplot(language, data=subinfo)
qplot(ethnicity, data=subinfo)
qplot(youngestChildAge, data=subinfo)
qplot(oldestChildAge, data=subinfo)
Now look at mean ratings across sentences. DU stands for “Don’t Understand,” which was an alternative to picking a numeric rating.
rating_count <- table(d$rating)
rating_count
##
## 1 2 3 4 5 6 7 DU
## 142 237 391 673 977 1428 2151 1
prop.table(rating_count)
##
## 1 2 3 4 5
## 0.0236666667 0.0395000000 0.0651666667 0.1121666667 0.1628333333
## 6 7 DU
## 0.2380000000 0.3585000000 0.0001666667
#get rid of DU
d$rating <- as.numeric(d$rating)
#added na.rm=TRUE to deal with missing values from DU.
ms <- d %>%
group_by(category, instrument, short_sent) %>%
multi_boot_standard(col = "rating", na.rm=TRUE) %>%
arrange(instrument, category, desc(mean))
ms$short_sent_ord <- factor(ms$short_sent,
levels = ms$short_sent)
Plot attitude.
Ratings are clearly lower for the reverse coded items (the final 2 items in each category); this could be due to response sets (i.e., participants endorsing all items highly).
qplot(short_sent_ord, mean, col = category,
ymin = ci_lower, ymax = ci_upper,
geom = "pointrange",
data = filter(ms, instrument == "attitudes")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
xlab("") +
ylab("Mean Rating") +
ylim(c(1,7)) +
scale_colour_solarized()
This analysis is not especially meaningful as it averages across the three subscales. However, it can be used as a comparison with the within subscale alphas. Raw alpha is .72 for the whole scale.
wide.attitudes <- d %>%
filter(instrument == "attitudes") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.mat <- as.matrix(select(wide.attitudes, -workerid))
alpha(x = alpha.mat)
## Some items ( punish misbehavior ) were negatively correlated with the total scale and probably should be reversed. To do this, run the function again with the 'check.keys=TRUE' option
##
## Reliability analysis
## Call: alpha(x = alpha.mat)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.72 0.79 0.84 0.13 3.7 0.028 5.5 0.49
##
## lower alpha upper 95% confidence boundaries
## 0.67 0.72 0.78
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r
## behavior problems no love 0.74 0.80 0.85 0.15
## can learn good and bad 0.70 0.77 0.82 0.13
## control behavior 0.71 0.78 0.83 0.13
## dont force activity 0.74 0.80 0.85 0.15
## dont interrupt 0.71 0.78 0.83 0.13
## examples are good 0.71 0.77 0.83 0.13
## follow-in good 0.72 0.78 0.84 0.14
## holding and cradling 0.70 0.77 0.83 0.13
## learn before talking 0.71 0.78 0.83 0.13
## learn by playing 0.71 0.78 0.83 0.13
## not spoiled with too much affection 0.72 0.78 0.84 0.14
## parents provide structure 0.72 0.79 0.84 0.14
## punish for small rules 0.72 0.79 0.84 0.14
## punish misbehavior 0.75 0.80 0.85 0.15
## reasons for rules 0.72 0.79 0.84 0.14
## respect parents teachers 0.71 0.78 0.83 0.13
## reward accuracy 0.74 0.80 0.85 0.15
## safe loving environment 0.71 0.77 0.82 0.13
## spontaneous play 0.72 0.78 0.84 0.14
## strict rules 0.72 0.79 0.84 0.14
## strong bond mom 0.70 0.77 0.83 0.13
## talk about feelings 0.70 0.77 0.83 0.13
## talk about opinions 0.71 0.78 0.83 0.13
## wait when told 0.70 0.77 0.83 0.13
## S/N alpha se
## behavior problems no love 3.9 0.027
## can learn good and bad 3.3 0.031
## control behavior 3.5 0.030
## dont force activity 3.9 0.027
## dont interrupt 3.6 0.030
## examples are good 3.3 0.030
## follow-in good 3.6 0.029
## holding and cradling 3.3 0.031
## learn before talking 3.5 0.030
## learn by playing 3.5 0.030
## not spoiled with too much affection 3.6 0.029
## parents provide structure 3.7 0.029
## punish for small rules 3.7 0.029
## punish misbehavior 4.1 0.027
## reasons for rules 3.7 0.029
## respect parents teachers 3.5 0.030
## reward accuracy 4.0 0.027
## safe loving environment 3.3 0.030
## spontaneous play 3.6 0.029
## strict rules 3.7 0.029
## strong bond mom 3.3 0.031
## talk about feelings 3.4 0.031
## talk about opinions 3.5 0.030
## wait when told 3.4 0.031
##
## Item statistics
## n raw.r std.r r.cor r.drop mean
## behavior problems no love 250 0.134 0.115 0.032 -0.00445 4.6
## can learn good and bad 250 0.611 0.644 0.641 0.56186 6.4
## control behavior 250 0.468 0.466 0.441 0.37553 5.5
## dont force activity 249 0.130 0.136 0.064 0.00063 5.1
## dont interrupt 250 0.421 0.420 0.395 0.31985 5.1
## examples are good 250 0.576 0.611 0.599 0.52381 6.3
## follow-in good 250 0.370 0.382 0.330 0.27575 5.6
## holding and cradling 250 0.582 0.616 0.608 0.52615 6.3
## learn before talking 250 0.454 0.482 0.456 0.37329 6.3
## learn by playing 250 0.453 0.495 0.470 0.38663 6.4
## not spoiled with too much affection 250 0.401 0.377 0.332 0.26086 4.6
## parents provide structure 250 0.345 0.312 0.269 0.22012 5.0
## punish for small rules 250 0.326 0.285 0.233 0.21313 4.2
## punish misbehavior 250 0.065 0.027 -0.036 -0.07869 3.6
## reasons for rules 250 0.383 0.356 0.317 0.24123 5.6
## respect parents teachers 250 0.503 0.497 0.472 0.42711 6.0
## reward accuracy 250 0.120 0.101 0.019 -0.02093 3.9
## safe loving environment 250 0.582 0.632 0.632 0.54206 6.7
## spontaneous play 250 0.345 0.359 0.310 0.25058 5.7
## strict rules 250 0.387 0.337 0.290 0.25966 4.5
## strong bond mom 250 0.583 0.612 0.599 0.52573 6.3
## talk about feelings 250 0.570 0.569 0.569 0.46741 6.0
## talk about opinions 250 0.479 0.515 0.493 0.41051 6.2
## wait when told 250 0.551 0.573 0.567 0.48302 6.0
## sd
## behavior problems no love 1.65
## can learn good and bad 0.86
## control behavior 1.31
## dont force activity 1.52
## dont interrupt 1.36
## examples are good 0.87
## follow-in good 1.23
## holding and cradling 0.93
## learn before talking 1.13
## learn by playing 0.94
## not spoiled with too much affection 1.82
## parents provide structure 1.58
## punish for small rules 1.42
## punish misbehavior 1.70
## reasons for rules 1.81
## respect parents teachers 1.12
## reward accuracy 1.66
## safe loving environment 0.68
## spontaneous play 1.22
## strict rules 1.64
## strong bond mom 0.96
## talk about feelings 1.60
## talk about opinions 0.99
## wait when told 1.07
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 7
## behavior problems no love 0.05 0.07 0.12 0.22 0.23 0.17 0.14
## can learn good and bad 0.00 0.00 0.02 0.03 0.06 0.32 0.58
## control behavior 0.01 0.02 0.06 0.13 0.21 0.32 0.26
## dont force activity 0.02 0.03 0.10 0.22 0.18 0.21 0.22
## dont interrupt 0.02 0.03 0.06 0.14 0.33 0.24 0.17
## examples are good 0.00 0.00 0.01 0.05 0.07 0.36 0.51
## follow-in good 0.00 0.02 0.04 0.12 0.21 0.36 0.25
## holding and cradling 0.00 0.00 0.00 0.05 0.11 0.27 0.56
## learn before talking 0.00 0.01 0.02 0.03 0.10 0.18 0.65
## learn by playing 0.00 0.00 0.01 0.03 0.09 0.29 0.58
## not spoiled with too much affection 0.04 0.10 0.18 0.15 0.16 0.14 0.22
## parents provide structure 0.04 0.04 0.08 0.16 0.24 0.26 0.17
## punish for small rules 0.04 0.09 0.14 0.27 0.30 0.11 0.05
## punish misbehavior 0.14 0.15 0.20 0.23 0.12 0.10 0.06
## reasons for rules 0.07 0.03 0.06 0.04 0.13 0.26 0.42
## respect parents teachers 0.00 0.01 0.04 0.05 0.20 0.29 0.42
## reward accuracy 0.06 0.16 0.20 0.20 0.20 0.10 0.08
## safe loving environment 0.00 0.00 0.01 0.01 0.04 0.16 0.78
## spontaneous play 0.00 0.01 0.04 0.13 0.18 0.33 0.30
## strict rules 0.04 0.12 0.10 0.22 0.24 0.16 0.12
## strong bond mom 0.00 0.00 0.01 0.06 0.10 0.26 0.58
## talk about feelings 0.03 0.04 0.04 0.03 0.10 0.17 0.59
## talk about opinions 0.00 0.00 0.02 0.04 0.13 0.30 0.51
## wait when told 0.00 0.01 0.02 0.08 0.17 0.34 0.38
## miss
## behavior problems no love 0
## can learn good and bad 0
## control behavior 0
## dont force activity 0
## dont interrupt 0
## examples are good 0
## follow-in good 0
## holding and cradling 0
## learn before talking 0
## learn by playing 0
## not spoiled with too much affection 0
## parents provide structure 0
## punish for small rules 0
## punish misbehavior 0
## reasons for rules 0
## respect parents teachers 0
## reward accuracy 0
## safe loving environment 0
## spontaneous play 0
## strict rules 0
## strong bond mom 0
## talk about feelings 0
## talk about opinions 0
## wait when told 0
Raw alpha is .72 here.
wide.rules_respect <- d %>%
filter(category == "rules_respect") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.rr <- as.matrix(select(wide.rules_respect, -workerid))
alpha(x = alpha.rr)
##
## Reliability analysis
## Call: alpha(x = alpha.rr)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.72 0.74 0.74 0.26 2.8 0.038 5 0.82
##
## lower alpha upper 95% confidence boundaries
## 0.64 0.72 0.79
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## control behavior 0.68 0.70 0.70 0.25 2.3
## dont interrupt 0.67 0.69 0.68 0.24 2.2
## parents provide structure 0.67 0.70 0.69 0.25 2.3
## punish for small rules 0.69 0.72 0.71 0.27 2.5
## punish misbehavior 0.71 0.73 0.72 0.28 2.7
## respect parents teachers 0.67 0.69 0.69 0.24 2.2
## strict rules 0.73 0.74 0.73 0.29 2.9
## wait when told 0.69 0.71 0.70 0.25 2.4
## alpha se
## control behavior 0.043
## dont interrupt 0.044
## parents provide structure 0.044
## punish for small rules 0.042
## punish misbehavior 0.041
## respect parents teachers 0.044
## strict rules 0.039
## wait when told 0.042
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## control behavior 250 0.61 0.64 0.56 0.46 5.5 1.3
## dont interrupt 250 0.65 0.67 0.63 0.50 5.1 1.4
## parents provide structure 250 0.66 0.64 0.57 0.49 5.0 1.6
## punish for small rules 250 0.57 0.55 0.45 0.40 4.2 1.4
## punish misbehavior 250 0.56 0.51 0.40 0.34 3.6 1.7
## respect parents teachers 250 0.65 0.68 0.63 0.53 6.0 1.1
## strict rules 250 0.47 0.43 0.29 0.24 4.5 1.6
## wait when told 250 0.55 0.61 0.54 0.42 6.0 1.1
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 7 miss
## control behavior 0.01 0.02 0.06 0.13 0.21 0.32 0.26 0
## dont interrupt 0.02 0.03 0.06 0.14 0.33 0.24 0.17 0
## parents provide structure 0.04 0.04 0.08 0.16 0.24 0.26 0.17 0
## punish for small rules 0.04 0.09 0.14 0.27 0.30 0.11 0.05 0
## punish misbehavior 0.14 0.15 0.20 0.23 0.12 0.10 0.06 0
## respect parents teachers 0.00 0.01 0.04 0.05 0.20 0.29 0.42 0
## strict rules 0.04 0.12 0.10 0.22 0.24 0.16 0.12 0
## wait when told 0.00 0.01 0.02 0.08 0.17 0.34 0.38 0
Raw alpha is .57 here, which is pretty low. It looks like removing the items “behavior problems no love” and “don’t force activity” would improve things quite a bit.
wide.affection <- d %>%
filter(category == "affection") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.af <- as.matrix(select(wide.affection, -workerid))
alpha(x = alpha.af)
## Some items ( behavior problems no love ) were negatively correlated with the total scale and probably should be reversed. To do this, run the function again with the 'check.keys=TRUE' option
##
## Reliability analysis
## Call: alpha(x = alpha.af)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.57 0.67 0.68 0.2 2 0.05 5.7 0.66
##
## lower alpha upper 95% confidence boundaries
## 0.47 0.57 0.67
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r
## behavior problems no love 0.66 0.72 0.72 0.27
## dont force activity 0.60 0.70 0.70 0.25
## holding and cradling 0.49 0.59 0.60 0.17
## not spoiled with too much affection 0.52 0.64 0.65 0.20
## safe loving environment 0.52 0.59 0.60 0.17
## strong bond mom 0.51 0.61 0.62 0.18
## talk about feelings 0.49 0.61 0.62 0.18
## talk about opinions 0.49 0.61 0.62 0.18
## S/N alpha se
## behavior problems no love 2.6 0.045
## dont force activity 2.3 0.049
## holding and cradling 1.4 0.059
## not spoiled with too much affection 1.7 0.056
## safe loving environment 1.5 0.057
## strong bond mom 1.5 0.057
## talk about feelings 1.6 0.059
## talk about opinions 1.5 0.058
##
## Item statistics
## n raw.r std.r r.cor r.drop mean
## behavior problems no love 250 0.27 0.21 -0.019 -0.04 4.6
## dont force activity 249 0.38 0.33 0.133 0.10 5.1
## holding and cradling 250 0.64 0.70 0.665 0.51 6.3
## not spoiled with too much affection 250 0.62 0.55 0.438 0.34 4.6
## safe loving environment 250 0.58 0.68 0.649 0.49 6.7
## strong bond mom 250 0.57 0.65 0.595 0.43 6.3
## talk about feelings 250 0.64 0.63 0.573 0.40 6.0
## talk about opinions 250 0.61 0.65 0.581 0.47 6.2
## sd
## behavior problems no love 1.65
## dont force activity 1.52
## holding and cradling 0.93
## not spoiled with too much affection 1.82
## safe loving environment 0.68
## strong bond mom 0.96
## talk about feelings 1.60
## talk about opinions 0.99
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 7
## behavior problems no love 0.05 0.07 0.12 0.22 0.23 0.17 0.14
## dont force activity 0.02 0.03 0.10 0.22 0.18 0.21 0.22
## holding and cradling 0.00 0.00 0.00 0.05 0.11 0.27 0.56
## not spoiled with too much affection 0.04 0.10 0.18 0.15 0.16 0.14 0.22
## safe loving environment 0.00 0.00 0.01 0.01 0.04 0.16 0.78
## strong bond mom 0.00 0.00 0.01 0.06 0.10 0.26 0.58
## talk about feelings 0.03 0.04 0.04 0.03 0.10 0.17 0.59
## talk about opinions 0.00 0.00 0.02 0.04 0.13 0.30 0.51
## miss
## behavior problems no love 0
## dont force activity 0
## holding and cradling 0
## not spoiled with too much affection 0
## safe loving environment 0
## strong bond mom 0
## talk about feelings 0
## talk about opinions 0
Raw alpha is .55, which is again low. Dropping “reward accuracy” will help.
wide.active_learning <- d %>%
filter(category == "active_learning") %>%
select(workerid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.al <- as.matrix(select(wide.active_learning, -workerid))
alpha(x = alpha.al)
##
## Reliability analysis
## Call: alpha(x = alpha.al)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.55 0.63 0.63 0.18 1.7 0.052 5.8 0.62
##
## lower alpha upper 95% confidence boundaries
## 0.45 0.55 0.66
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## can learn good and bad 0.47 0.55 0.54 0.15 1.2 0.060
## examples are good 0.50 0.58 0.57 0.16 1.4 0.058
## follow-in good 0.54 0.62 0.61 0.19 1.6 0.055
## learn before talking 0.48 0.58 0.57 0.16 1.4 0.059
## learn by playing 0.47 0.56 0.55 0.15 1.3 0.060
## reasons for rules 0.56 0.62 0.61 0.19 1.6 0.054
## reward accuracy 0.62 0.67 0.65 0.23 2.0 0.049
## spontaneous play 0.52 0.61 0.61 0.19 1.6 0.056
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## can learn good and bad 250 0.62 0.67 0.631 0.490 6.4 0.86
## examples are good 250 0.53 0.60 0.520 0.390 6.3 0.87
## follow-in good 250 0.44 0.47 0.335 0.209 5.6 1.23
## learn before talking 250 0.57 0.59 0.516 0.385 6.3 1.13
## learn by playing 250 0.61 0.65 0.597 0.469 6.4 0.94
## reasons for rules 250 0.55 0.47 0.326 0.220 5.6 1.81
## reward accuracy 250 0.38 0.29 0.084 0.047 3.9 1.66
## spontaneous play 250 0.48 0.49 0.358 0.255 5.7 1.22
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 7 miss
## can learn good and bad 0.00 0.00 0.02 0.03 0.06 0.32 0.58 0
## examples are good 0.00 0.00 0.01 0.05 0.07 0.36 0.51 0
## follow-in good 0.00 0.02 0.04 0.12 0.21 0.36 0.25 0
## learn before talking 0.00 0.01 0.02 0.03 0.10 0.18 0.65 0
## learn by playing 0.00 0.00 0.01 0.03 0.09 0.29 0.58 0
## reasons for rules 0.07 0.03 0.06 0.04 0.13 0.26 0.42 0
## reward accuracy 0.06 0.16 0.20 0.20 0.20 0.10 0.08 0
## spontaneous play 0.00 0.01 0.04 0.13 0.18 0.33 0.30 0
Get eigenvalues for determining number of factors.
att.mat <- select(wide.attitudes, -workerid)
ev <- eigen(cor(x=att.mat, use = "complete.obs")) # get eigenvalues, removes NAs
ap <- parallel(subject=nrow(att.mat), var=ncol(att.mat),
rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)
Now plot factor analysis
n.factors <- 3
x <- na.omit(att.mat) #get rid of NA caused by DU responses- gets rid of entire case
af <- factanal(x, factors = n.factors, scores = "regression")
loadings <- data.frame(af$loadings[,1:n.factors]) %>%
mutate(item = rownames(af$loadings)) %>%
gather(factor, loading, starts_with("Factor"))
qplot(factor, item, fill=loading, geom="tile", data = loadings) +
scale_fill_continuous(low="#000000", high="#FFFFFF")
qplot(factor, item, fill=loading > .35, geom="tile", data = loadings) +
scale_fill_solarized()
Merge back in subject id info.
wide.attitudes <- na.omit(wide.attitudes)
a.scores <- af$scores %>%
data.frame %>%
mutate(workerid = as.character(wide.attitudes$workerid)) %>%
left_join(subinfo)
And summary:
a.factor.names <- c("Affection and Attachment", "Rules and Respect", "Strict Rules")
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(ses != "") %>%
mutate(high.ses = ses > 5,
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(high.ses, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=high.ses,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(gender != "") %>%
mutate(male.gender = gender == "Male",
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(male.gender, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=male.gender,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(education != "") %>%
mutate(high.education = (education == "4year" |education == "someGrad" | education == "Grad"),
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(high.education, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=high.education,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(age != "") %>%
mutate(young.age = (age == "0-19" |age == "20-29"),
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(young.age, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=young.age,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(children != "") %>%
mutate(n.children = children,
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(n.children, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=n.children,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(children != "") %>%
mutate(yes.children = (children == "1" | children == "2" | children == "3"| children == "4"| children == "5"),
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(yes.children, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=yes.children,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
ms <- a.scores %>%
gather(factor, score, starts_with("Factor")) %>%
filter(ethnicity != "") %>%
mutate(hispanic = ethnicity== "Hispanic",
factor.num = as.numeric(str_replace(factor,"Factor","")),
factor.name = a.factor.names[factor.num]) %>%
group_by(hispanic, factor.name) %>%
multi_boot_standard(col = "score")
qplot(factor.name, mean, fill=hispanic,
geom = "bar", stat = "identity", position = "dodge",
data=ms) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
The Rules and Respect subscale seems to hang together best in terms of alpha correlations, though both Rules and Respect and Affection and Attachment items load onto seperate factors as predicted. The Active Learning subscale is performing much worse, with a low alpha and no factor associated with it.
In combination with the results of Experiment 5, it seems that the next step is to exclude the items that had low correlations with other items, and perhaps add a few more items in their place. Since Experiment 5 showed very little variability when the scale was dichotomized (i.e., all items were endorsed most of the time), it might make sense to anchor the scale differently, as “How much do you Agree with the following statement” (rather than Agree or Disagree), with 0 signifying not at all, and 6 signifying strongly agree.