Preliminaries.
## [1] "dplyr" "langcog" "tidyr" "ggplot2" "lme4"
##
## Attaching package: 'langcog'
## The following object is masked from 'package:base':
##
## scale
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## %+%(): ggplot2, psych
## alpha(): ggplot2, psych
## filter(): dplyr, stats
## lag(): dplyr, stats
##
## Attaching package: 'ggthemes'
## The following objects are masked from 'package:langcog':
##
## scale_color_solarized, scale_colour_solarized,
## scale_fill_solarized
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
Read in participant data.
files <- dir("../production-results/parenting_behaviors_e2/")
attitudes <- data.frame()
behaviors <- data.frame()
subinfo <- data.frame()
for (f in files) {
jf <- paste("../production-results/parenting_behaviors_e2/",f,sep="")
jd <- fromJSON(paste(readLines(jf), collapse=""))
#paq answers
attitudes_id <- data.frame(sid = as.factor(jd$WorkerId),
sent = jd$answers$data$sentence[1:24],
rating = as.numeric(jd$answers$data$rating[1:24]))
attitudes <- bind_rows(attitudes, attitudes_id)
#behaviors
behaviors_id <- data.frame(sid = as.factor(jd$WorkerId),
sent = jd$answers$data$sentence[25:36],
rating = as.numeric(jd$answers$data$rating[25:36]))
behaviors <- bind_rows(behaviors, behaviors_id)
#demographics
subinfo_id <- data.frame(sid = as.factor(jd$WorkerId),
behave_age = jd$answers$data$behaveAge,
children = jd$answers$data$children,
youngest = jd$answers$data$childAgeYoung,
oldest = jd$answers$data$childAgeOld,
ses = jd$answers$data$ladder,
gender = jd$answers$data$gender,
age = jd$answers$data$age,
education = jd$answers$data$education,
ethnicity = jd$answers$data$ethnicity,
race = as.character(jd$answers$data$race[1]),
comments = jd$answers$data$comments)
subinfo <- bind_rows(subinfo, subinfo_id)
}
Read in trial info and questionnaire labels.
labels <- read.csv("sent_forms.csv")
labels$sent <- as.character(labels$sent)
behave <- read.csv("behaviors_e2.csv")
Clean up labels.
attitudes$sent <- str_replace_all(as.character(attitudes$sent), "[â‘”“’']", "")
behaviors$sent <- str_replace_all(as.character(behaviors$sent), "[â‘”“’']", "")
Make data frames.
dq <- attitudes %>%
left_join(labels)%>%
mutate(category_paq = category)%>%
select(-category)
db <- behaviors%>%
left_join(behave)%>%
left_join(subinfo)%>%
filter(behave_age != "older")%>%
mutate(category_bev = category)%>%
select(-category)
#remove items that parents marked as "my child is too young"
db$rating[db$rating == 0] <- NA
#rescore reverse coded items
dq$rating <- as.numeric(dq$rating)
dq$rating[dq$reverse_code == 1] <- 6 - dq$rating[dq$reverse_code == 1]
Get means by category.
atts <- dq %>%
group_by(sid, category_paq) %>%
summarise(rating_paq = mean(rating))
bevs <- db %>%
group_by(sid, category_bev) %>%
summarise(rating_bev = mean(rating))
all <- atts %>%
left_join(bevs)%>%
left_join(subinfo)%>%
filter(!children == "0")
all$category_paq <- str_replace(all$category_paq, "AA", "AA_paq")
all$category_paq <- str_replace(all$category_paq, "EL", "EL_paq")
all$category_paq <- str_replace(all$category_paq, "RR", "RR_paq")
all$category_bev <- str_replace(all$category_bev, "AA", "AA_behave")
all$category_bev <- str_replace(all$category_bev, "EL", "EL_behave")
all$category_bev <- str_replace(all$category_bev, "RR", "RR_behave")
subinfo$education <- factor(subinfo$education,
levels = c("highSchool","someCollege","4year","someGrad","Grad"))
qplot(ses, data=subinfo)
qplot(children, data=subinfo)
qplot(gender, data=subinfo)
qplot(education, data=subinfo)
qplot(age, data=subinfo)
qplot(race, data=subinfo)
qplot(ethnicity, data=subinfo)
Get mean ratings for sentences.
ms <- dq %>%
group_by(category_paq, short_sent, reverse_code) %>%
multi_boot_standard(col = "rating", na.rm = TRUE) %>%
arrange(category_paq, desc(mean))
ms$short_sent_ord <- factor(ms$short_sent,
levels = ms$short_sent)
#for comparison with CDM data
beh <- ms
save(beh, file = "beh.RData")
Plot responses to individual questionnaire items.
qplot(short_sent_ord, mean, col = category_paq,
ymin = ci_lower, ymax = ci_upper, pch = factor(reverse_code),
geom = "pointrange",
data = ms) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
xlab("") +
ylab("Mean Rating") +
ylim(c(0,6)) +
scale_colour_solarized()
Plot mean subscale scores.
atts_m <- dq %>%
group_by(category_paq) %>%
multi_boot_standard(col = "rating", na.rm = TRUE) %>%
arrange(category_paq, desc(mean))
ggplot(atts_m, aes(x = category_paq, y = mean)) +
geom_bar(stat="identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))+
ylim(c(0,6))
Get mean ratings for behaviors.
ms_b <- db %>%
group_by(category_bev, short_sent) %>%
multi_boot_standard(col = "rating", na.rm = TRUE) %>%
arrange(category_bev, desc(mean))
ms_b$short_sent_ord <- factor(ms_b$short_sent,
levels = ms_b$short_sent)
Recode variables for plotting.
bev <- behaviors %>%
left_join(behave)%>%
filter(rating != 0)%>%
group_by(category)
bev$rating[bev$rating=="1"] <- "Never"
bev$rating[bev$rating=="2"] <- "Almost never"
bev$rating[bev$rating=="3"] <- "Occasionally"
bev$rating[bev$rating=="4"] <- "Once or twice per week"
bev$rating[bev$rating=="5"] <- "Most days"
bev$rating[bev$rating=="6"] <- "Multiple times ever day"
bev$rating <- factor(bev$rating, levels = c("My child is too young", "Never", "Almost never","Occasionally","Once or twice per week","Most days","Multiple times ever day"))
bev$short_sent <- factor(bev$short_sent, levels = c("read","practice numbers and letters","make observations", "educational programming","talk about feelings","spend time cuddling","sleep in the same bed","hug and kiss","talk sternly","give time out or punishments","talk about setting limits","help with chores"))
subinfo$behave_age[subinfo$behave_age == "0-6"] <- "0-6 months"
subinfo$behave_age[subinfo$behave_age == "7-12"] <- "7-12 months"
subinfo$behave_age <- factor(subinfo$behave_age, levels = c("0-6 months", "7-12 months", "1-1.5","1.5-2","2-2.5","2.5-3","3-3.5","3.5-4", "4-4.5","4.5-5", "older"))
Plot mean frequency of individual behavior items. This treats frequency as a continuous variable which it really isn’t- is there a better way or does this seem okay?
Parents were asked to answer questions about their youngest child. How old is their youngest child?
qplot(behave_age, data=subinfo)
How frequent were individual behaviors?
qplot(short_sent_ord, mean, col = category_bev,
ymin = ci_lower, ymax = ci_upper,
geom = "pointrange",
data = ms_b) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
xlab("") +
ylab("Frequency of Behavior") +
ylim(c(0,6)) +
scale_colour_solarized()
What do the means look like?
ggplot(bev, aes(short_sent, fill = rating, shape = category)) +
geom_bar(position = "fill")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
xlab("Behavior") +
ylab("Proportion responding")
Are behaviors within a category correlated with each other? This is relevant if we choose to use mean scores within these categories for subsequent analyses.
Rules and Respect
wide.bev <- db %>%
filter(category_bev == "RR") %>%
select(sid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.rr <- as.matrix(select(wide.bev, -sid))
psych::alpha(x = alpha.rr)
##
## Reliability analysis
## Call: psych::alpha(x = alpha.rr)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.71 0.71 0.66 0.38 2.5 0.03 3.7 1
##
## lower alpha upper 95% confidence boundaries
## 0.65 0.71 0.77
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## give time out or punishments 0.65 0.65 0.57 0.39 1.9
## help with chores 0.72 0.72 0.64 0.46 2.6
## talk about setting limits 0.62 0.62 0.55 0.36 1.7
## talk sternly 0.59 0.59 0.49 0.32 1.4
## alpha se
## give time out or punishments 0.039
## help with chores 0.031
## talk about setting limits 0.043
## talk sternly 0.046
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## give time out or punishments 197 0.73 0.73 0.59 0.49 3.1 1.2
## help with chores 206 0.68 0.65 0.44 0.38 4.1 1.3
## talk about setting limits 202 0.78 0.76 0.64 0.54 4.2 1.4
## talk sternly 224 0.81 0.79 0.72 0.59 3.8 1.3
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 miss
## give time out or punishments 0.12 0.20 0.31 0.24 0.12 0.01 0.19
## help with chores 0.04 0.08 0.21 0.20 0.34 0.13 0.16
## talk about setting limits 0.02 0.10 0.21 0.17 0.30 0.19 0.17
## talk sternly 0.03 0.10 0.33 0.25 0.19 0.11 0.08
Affection and Attachment
wide.bev <- db %>%
filter(category_bev == "AA") %>%
select(sid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.aa <- as.matrix(select(wide.bev, -sid))
psych::alpha(x = alpha.aa)
##
## Reliability analysis
## Call: psych::alpha(x = alpha.aa)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.55 0.59 0.6 0.27 1.5 0.048 4.4 0.87
##
## lower alpha upper 95% confidence boundaries
## 0.46 0.55 0.65
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## hug and kiss 0.40 0.42 0.34 0.19 0.71 0.067
## sleep in the same bed 0.68 0.69 0.65 0.42 2.18 0.036
## spend time cuddling 0.28 0.32 0.26 0.14 0.47 0.080
## talk about feelings 0.53 0.58 0.59 0.32 1.40 0.055
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## hug and kiss 243 0.69 0.75 0.72 0.47 5.5 1.1
## sleep in the same bed 238 0.61 0.50 0.19 0.15 3.0 1.6
## spend time cuddling 240 0.79 0.82 0.81 0.56 4.9 1.3
## talk about feelings 199 0.56 0.61 0.36 0.27 4.1 1.2
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 miss
## hug and kiss 0.00 0.02 0.07 0.03 0.14 0.73 0.00
## sleep in the same bed 0.26 0.15 0.22 0.12 0.19 0.06 0.02
## spend time cuddling 0.02 0.05 0.09 0.09 0.31 0.44 0.02
## talk about feelings 0.02 0.06 0.30 0.18 0.33 0.12 0.18
Early Learning
wide.bev <- db %>%
filter(category_bev == "EL") %>%
select(sid, short_sent, rating) %>%
spread(short_sent, rating)
alpha.el <- as.matrix(select(wide.bev, -sid))
psych::alpha(x = alpha.el)
##
## Reliability analysis
## Call: psych::alpha(x = alpha.el)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.69 0.7 0.65 0.37 2.3 0.032 4.3 1
##
## lower alpha upper 95% confidence boundaries
## 0.63 0.69 0.76
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N
## educational programming 0.69 0.70 0.61 0.43 2.3
## make observations 0.61 0.61 0.53 0.35 1.6
## practice numbers and letters 0.60 0.60 0.51 0.33 1.5
## read 0.62 0.62 0.52 0.35 1.6
## alpha se
## educational programming 0.034
## make observations 0.044
## practice numbers and letters 0.045
## read 0.042
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## educational programming 224 0.70 0.66 0.46 0.38 4.3 1.4
## make observations 218 0.76 0.74 0.61 0.51 4.3 1.3
## practice numbers and letters 215 0.76 0.76 0.64 0.53 4.3 1.3
## read 236 0.75 0.74 0.62 0.50 4.6 1.2
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 miss
## educational programming 0.06 0.06 0.12 0.19 0.38 0.19 0.08
## make observations 0.00 0.08 0.26 0.16 0.26 0.24 0.11
## practice numbers and letters 0.02 0.06 0.21 0.20 0.34 0.17 0.12
## read 0.03 0.04 0.14 0.15 0.41 0.23 0.03
Early Learning and Rules and Respect items hang together pretty well (alphas = .70) but Affection and Attachment items are not as great (alpha = .55). Dropping “sleep in the same bed” would help. Also, “hug and kiss” is close to ceiling with most people reporting “multiple times every day.”
Education
mf <- bevs %>%
left_join(subinfo)%>%
filter(education != "") %>%
group_by(education, category_bev) %>%
multi_boot_standard(col = "rating_bev", na.rm = TRUE)
ggplot(mf, aes(category_bev, mean, fill=education)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
Gender
mf <- bevs %>%
left_join(subinfo)%>%
filter(gender == "Man" || gender == "Woman") %>%
group_by(gender, category_bev) %>%
multi_boot_standard(col = "rating_bev", na.rm = TRUE)
ggplot(mf, aes(category_bev, mean, fill=gender)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
Moms are engaging with their children more overall.
Age of child
mf <- bevs %>%
left_join(subinfo)%>%
filter(behave_age != "") %>%
group_by(behave_age, category_bev) %>%
multi_boot_standard(col = "rating_bev", na.rm = TRUE)
ggplot(mf, aes(category_bev, mean, fill=behave_age)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
More behaviors are possible as kids get older.
Race
mf <- bevs %>%
left_join(subinfo)%>%
filter(race != "NULL") %>%
group_by(race, category_bev) %>%
multi_boot_standard(col = "rating_bev", na.rm = TRUE)
ggplot(mf, aes(category_bev, mean, fill=race)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9)) +
scale_fill_solarized()
SES
mf <- bevs %>%
left_join(subinfo)%>%
filter(ses != "") %>%
group_by(ses, category_bev) %>%
multi_boot_standard(col = "rating_bev", na.rm = TRUE)
ggplot(mf, aes(category_bev, mean, fill=ses)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
This might not be the best plot- I’d rather see the individual behaviors, but not sure the best way to present this without overwhelming.
ggplot(filter(all, !is.na(category_bev)), aes(x= rating_bev, y = rating_paq)) +
facet_grid(category_bev ~category_paq) +
geom_point(aes(color = category_paq)) +
xlab("Frequency of behavior") +
ylab("PAQ score") +
scale_colour_solarized()+
geom_smooth(method="lm", se=FALSE)
Set up dataframes for analyses.
d <- db %>%
left_join(atts)%>%
left_join(subinfo)%>%
filter(!is.na(rating))%>%
filter(children != 0)%>%
filter(behave_age != "older")%>%
filter(behave_age != "")%>%
select(sid, short_sent, rating, category_paq, category_bev, rating_paq, behave_age)%>%
spread(category_paq, rating_paq)
d$behave_age <- as.character(d$behave_age)
d$behave_age[d$behave_age == "0-6 months"] <- 1
d$behave_age[d$behave_age == "7-12 months"] <- 7
d$behave_age[d$behave_age == "1-1.5"] <- 13
d$behave_age[d$behave_age == "1.5-2"] <- 19
d$behave_age[d$behave_age == "2-2.5"] <- 25
d$behave_age[d$behave_age == "2.5-3"] <- 31
d$behave_age[d$behave_age == "3-3.5"] <- 37
d$behave_age[d$behave_age == "3.5-4"] <- 43
d$behave_age[d$behave_age == "4-4.5"] <- 49
d$behave_age[d$behave_age == "4.5-5"] <- 55
d$behave_age <- as.numeric(d$behave_age)
d<- d%>%
mutate(child_age = behave_age)%>%
select(-behave_age)%>%
mutate(rating_bin = rating)
d$rating_bin[d$rating %in% 1:4] <- "low"
d$rating_bin[d$rating %in% 5:6] <- "high"
d_aa <- d%>%
filter(category_bev == "AA")%>%
filter(!is.na(rating), !is.na(rating_bin))
d_el <- d%>%
filter(category_bev == "EL")%>%
filter(!is.na(rating),!is.na(rating_bin))
d_rr <- d%>%
filter(category_bev == "RR")%>%
filter(!is.na(rating),!is.na(rating_bin))
d_aa$rating_bin <- as.factor(d_aa$rating_bin)
d_el$rating_bin <- as.factor(d_el$rating_bin)
d_rr$rating_bin <- as.factor(d_rr$rating_bin)
Plot
d_plot_aa <- d_aa%>%
gather("subscale", "rating_paq", AA:RR)%>%
group_by(subscale, rating_bin) %>%
multi_boot_standard(col = "rating_paq", na.rm = TRUE)
ggplot(d_plot_aa, aes(subscale, mean, fill=rating_bin)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
d_plot_el <- d_el%>%
gather("subscale", "rating_paq", AA:RR)%>%
group_by(subscale, rating_bin) %>%
multi_boot_standard(col = "rating_paq", na.rm = TRUE)
ggplot(d_plot_el, aes(subscale, mean, fill=rating_bin)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
d_plot_rr <- d_rr%>%
gather("subscale", "rating_paq", AA:RR)%>%
group_by(subscale, rating_bin) %>%
multi_boot_standard(col = "rating_paq", na.rm = TRUE)
ggplot(d_plot_rr, aes(subscale, mean, fill=rating_bin)) +
geom_bar(stat="identity", position = "dodge") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper),
position = position_dodge(width = .9))
Separate models for each category of behavior. Do PAQ subscale scores predict the frequency of parenting behaviors?
model <- summary(glmer(rating_bin ~ AA + RR + EL + child_age +
(1|sid) +
(1|short_sent), #behavior item
data = d_aa,
family = "binomial"))
model
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## rating_bin ~ AA + RR + EL + child_age + (1 | sid) + (1 | short_sent)
## Data: d_aa
##
## AIC BIC logLik deviance df.resid
## 777.2 809.9 -381.6 763.2 785
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.6545 -0.4463 -0.1969 0.5350 3.0713
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 1.262 1.123
## short_sent (Intercept) 2.678 1.636
## Number of obs: 792, groups: sid, 205; short_sent, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.90594 1.14315 4.292 1.77e-05 ***
## AA -0.97914 0.22194 -4.412 1.03e-05 ***
## RR 0.01263 0.15873 0.080 0.937
## EL -0.10770 0.21141 -0.509 0.610
## child_age -0.01160 0.00986 -1.176 0.240
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) AA RR EL
## AA -0.191
## RR -0.179 -0.100
## EL -0.172 -0.673 -0.298
## child_age -0.143 -0.082 -0.207 0.095
#for EL behaviors
model <- summary(glmer(rating_bin ~ AA + RR + EL + child_age +
(1|sid) +
(1|short_sent), #behavior item
data = d_el,
family = "binomial"))
model
model <- summary(glmer(rating_bin ~ AA + RR + EL + child_age +
(1|sid) +
(1|short_sent), #behavior item
data = d_rr,
family = "binomial"))
model
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## rating_bin ~ AA + RR + EL + child_age + (1 | sid) + (1 | short_sent)
## Data: d_rr
##
## AIC BIC logLik deviance df.resid
## 883.9 916.4 -434.9 869.9 760
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6250 -0.6407 0.3210 0.5723 1.8340
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 1.847 1.3590
## short_sent (Intercept) 0.964 0.9819
## Number of obs: 767, groups: sid, 204; short_sent, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.19556 0.98437 4.262 2.02e-05 ***
## AA -0.22895 0.22913 -0.999 0.3177
## RR -0.16977 0.16965 -1.001 0.3170
## EL -0.16866 0.23191 -0.727 0.4671
## child_age -0.01906 0.01052 -1.812 0.0701 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) AA RR EL
## AA -0.131
## RR -0.251 -0.121
## EL -0.259 -0.708 -0.266
## child_age -0.191 -0.105 -0.167 0.084
PAQ scores predict Early Learning and Affection and attachment behaviors in the way we predicted- higher PAQ scores within a subscale predict more frequent parenting behaviors within the same category (i.e., frequency of Affection and Attachment behaviors are predicted by AA PAQ scores but not EL or RR PAQ scores). Additionally, Early Learning and Rules and Respect but not Affection and Attachment behaviors increase with child age.