Analysis walkthrough for the paper
Woodard, K., Zettersten, M., & Pollak, S.D. (accepted). The representation of emotion knowledge across development. Child Development.
For more background information and the underlying data, visit the OSF repository for this report: OSF site
We first examined whether participants used common English language emotion categories (e.g., sad, happy, anger, disgust, fear, surprise, neutral, calm, excitement ) to structure their placement of facial cues. To do so, we computed the average distance between images that shared the same category label (e.g., the distance of one happy face to another happy face) versus images that had differing category labels (e.g., the distance of one happy face to a sad face) for each participant (see also Unger et al., 2016 for a similar approach). We then fit linear mixed-effects models predicting the average distance between item pairs from the category match for an image pair (same category pair vs. different category pair; centered) across age groups, including by-participant random intercepts and by-participant random slopes for category match.
#read in data
subj_dist_long <- read.csv(here(root_path,"analysis","paper_2020","processed_data","Grid_subject_distance_item_pairs.csv"))
#### Summarizing Distances ####
#average distances across category types within each participant
avg_dist_cat_by_subj <- subj_dist_long %>%
group_by(subject, Age, Gender, age_bin,age_group,sort,category_pair) %>%
summarize(N=n(),avg_dist=mean(dist),ci_dist=qt(0.975, N-1)*sd(dist,na.rm=T)/sqrt(N))
#### Adults & children comparison ####
avg_dist_cat_by_subj <- avg_dist_cat_by_subj %>%
ungroup() %>%
mutate(AgeC=Age-mean(Age,na.rm=T),
category_pairC=ifelse(category_pair=="between",-0.5,0.5),
age_groupC=ifelse(age_group=="kids",-0.5,0.5),
sortC=ifelse(sort=="Sort1",-0.5,0.5))
#### Summarizing Differences in distance - within-between ####
#focus on differences in dist (same v. between categories)
avg_dist_diff_cat_by_subj <- subj_dist_long %>%
group_by(subject, Age, Gender, age_bin,age_group,sort,category_pair) %>%
summarize(avg_dist=mean(dist,na.rm=T)) %>%
ungroup() %>%
group_by(subject, Age, Gender, age_bin,age_group,sort) %>%
pivot_wider(names_from=category_pair,values_from=avg_dist) %>%
mutate(avg_dist_diff=between-within) %>%
mutate(sort_name=ifelse(sort=="Sort1","Same Individual",ifelse(sort=="Sort2","Different Individuals","Practice"))) %>%
ungroup()
#average across subjects by age bin
avg_dist_diff_cat_across_subj <- avg_dist_diff_cat_by_subj %>%
select(-between,-within) %>%
group_by(age_bin,age_group,sort,sort_name) %>%
summarize(N=n(),average_dist=mean(avg_dist_diff,na.rm=T),ci_dist=qt(0.975, N-1)*sd(avg_dist_diff,na.rm=T)/sqrt(N))
#clean labels for plot
age_names <- list(
'3 to 4'="3 y/o",
'4 to 5'="4 y/o",
'5 to 6'="5 y/o",
'6 to 7'="6 y/o",
'adults'="adults")
age_labeller <- function(variable,value){
return(age_names[value])}
ggplot(filter(avg_dist_diff_cat_across_subj, sort!="Practice"),aes(x=sort_name,y=average_dist,fill=age_bin,color=age_bin, linetype=sort_name))+
geom_errorbar(aes(ymin=average_dist-ci_dist,ymax=average_dist+ci_dist),width=0,size=1)+
geom_point(size=5)+
facet_wrap(~age_bin,nrow=1, labeller = age_labeller, strip.position = c("bottom"))+
geom_hline(yintercept=0)+
theme(legend.position=c(.1,.8), legend.title = element_blank(),
legend.margin = margin(.5,.5,.5,.5,"cm"),
legend.background = element_rect(fill="white",size=0.6,
linetype="solid", colour ="black")) +
#theme(legend.position='top', legend.justification ='center')+
scale_color_manual(values=c("#8c510a","#bf812d","#80cdc1","#35978f","#01665e"))+
guides(color=FALSE) +
guides(fill=FALSE) +
#scale_y_continuous(breaks=c(0,0.1,0.2,0.3, 0.4, 0.5,0.6), limits=c(0,0.5))+
ylab("Sorting by Emotion Categories \nLow<----------------------------------->High")+
xlab("Age Group")+
labs(linetype = "Sorting Phase") +
theme(axis.line.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x= element_blank(),
axis.title.x = element_text(size=20,face="bold"),
axis.text.y = element_text(size=18),
axis.title.y= element_text(size=18,face="bold"),
strip.text.x = element_text(size=18))
# interaction between emotion category and sort
m <- lmer(avg_dist~category_pairC*sortC+(1+category_pairC|subject),data=filter(avg_dist_cat_by_subj,sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC * sortC + (1 + category_pairC | subject)
## Data: filter(avg_dist_cat_by_subj, sort != "Practice")
##
## REML criterion at convergence: -1446.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.79141 -0.62199 0.04924 0.60213 3.09237
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.001221 0.03495
## category_pairC 0.006089 0.07803 1.00
## Residual 0.003192 0.05650
## Number of obs: 582, groups: subject, 146
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.406300 0.003722 153.822826 109.160 <2e-16 ***
## category_pairC -0.086712 0.007979 150.213725 -10.867 <2e-16 ***
## sortC -0.004218 0.004686 433.643650 -0.900 0.369
## category_pairC:sortC -0.010911 0.009373 433.741926 -1.164 0.245
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ctgr_C sortC
## categry_prC 0.629
## sortC 0.003 0.001
## ctgry_prC:C 0.001 0.003 0.001
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m, method="Wald")[5:8,]
## 2.5 % 97.5 %
## (Intercept) 0.39900508 0.413595358
## category_pairC -0.10235136 -0.071072797
## sortC -0.01340225 0.004966063
## category_pairC:sortC -0.02928166 0.007459340
anova(m) #Satterthwaite's method
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.37691 0.37691 1 150.21 118.0923 <2e-16 ***
## sortC 0.00259 0.00259 1 433.64 0.8103 0.3685
## category_pairC:sortC 0.00433 0.00433 1 433.74 1.3552 0.2450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The full model above yields a singular fit, due to the high covariance between the random intercept and random slope. However, a model removing the covariance to avoid the singular fit warning yields virtually identical results.
## model removing correlation between random intercept and slope
## to avoid singular fit yields equivalent results
m <- lmer(avg_dist~category_pairC*sortC+(1+category_pairC||subject),data=filter(avg_dist_cat_by_subj,sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC * sortC + (1 + category_pairC || subject)
## Data: filter(avg_dist_cat_by_subj, sort != "Practice")
##
## REML criterion at convergence: -1352.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.52037 -0.49961 -0.00169 0.52365 3.01513
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.001004 0.03169
## subject.1 category_pairC 0.005315 0.07290
## Residual 0.003604 0.06004
## Number of obs: 582, groups: subject, 146
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.406261 0.003617 145.314064 112.331 <2e-16 ***
## category_pairC -0.086713 0.007824 145.125569 -11.083 <2e-16 ***
## sortC -0.004297 0.004980 289.687512 -0.863 0.389
## category_pairC:sortC -0.010912 0.009961 289.636861 -1.095 0.274
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ctgr_C sortC
## categry_prC 0.000
## sortC 0.003 0.000
## ctgry_prC:C 0.000 0.003 0.000
confint(m, method="Wald")[4:7,]
## 2.5 % 97.5 %
## (Intercept) 0.39917210 0.413349027
## category_pairC -0.10204680 -0.071378275
## sortC -0.01405835 0.005463539
## category_pairC:sortC -0.03043619 0.008612046
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.44274 0.44274 1 145.13 122.8384 <2e-16 ***
## sortC 0.00268 0.00268 1 289.69 0.7446 0.3889
## category_pairC:sortC 0.00432 0.00432 1 289.64 1.2000 0.2742
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Emotion Category: Children versus Adults
m <- lmer(avg_dist~category_pairC*age_groupC+(1+category_pairC|subject),data=filter(avg_dist_cat_by_subj,sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC * age_groupC + (1 + category_pairC |
## subject)
## Data: filter(avg_dist_cat_by_subj, sort != "Practice")
##
## REML criterion at convergence: -1538.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.166 -0.603 0.031 0.603 3.031
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.0008777 0.02963
## category_pairC 0.0021239 0.04609 1.00
## Residual 0.0030475 0.05520
## Number of obs: 582, groups: subject, 146
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.396445 0.003758 154.604290 105.484 < 2e-16 ***
## category_pairC -0.120435 0.006675 172.856803 -18.042 < 2e-16 ***
## age_groupC -0.043663 0.007517 154.604290 -5.809 3.47e-08 ***
## category_pairC:age_groupC -0.149511 0.013350 172.856803 -11.199 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ctgr_C ag_grC
## categry_prC 0.469
## age_groupC 0.451 0.212
## ctgry_pC:_C 0.212 0.451 0.469
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m, method="Wald")[5:8,]
## 2.5 % 97.5 %
## (Intercept) 0.38907861 0.40381107
## category_pairC -0.13351764 -0.10735149
## age_groupC -0.05839557 -0.02893064
## category_pairC:age_groupC -0.17567685 -0.12334455
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.99201 0.99201 1 172.86 325.521 < 2.2e-16 ***
## age_groupC 0.10283 0.10283 1 154.60 33.742 3.472e-08 ***
## category_pairC:age_groupC 0.38221 0.38221 1 172.86 125.418 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The full model above yields a singular fit, due to the high covariance between the random intercept and random slope. However, a model removing the covariance to avoid the singular fit warning yields virtually identical results.
## model removing correlation between random intercept and slope
## to avoid singular fit yields equivalent results
m <- lmer(avg_dist~category_pairC*age_groupC+(1+category_pairC||subject),data=filter(avg_dist_cat_by_subj,sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC * age_groupC + (1 + category_pairC ||
## subject)
## Data: filter(avg_dist_cat_by_subj, sort != "Practice")
##
## REML criterion at convergence: -1484.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9390 -0.5730 0.0000 0.5658 3.4008
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.0006350 0.02520
## subject.1 category_pairC 0.0008777 0.02963
## Residual 0.0035994 0.06000
## Number of obs: 582, groups: subject, 146
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.396433 0.003637 144.039244 109.01 < 2e-16 ***
## category_pairC -0.120441 0.006212 143.878660 -19.39 < 2e-16 ***
## age_groupC -0.043640 0.007273 144.039244 -6.00 1.52e-08 ***
## category_pairC:age_groupC -0.149497 0.012423 143.878660 -12.03 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) ctgr_C ag_grC
## categry_prC 0.000
## age_groupC 0.451 0.000
## ctgry_pC:_C 0.000 0.450 0.000
confint(m, method="Wald")[4:7,]
## 2.5 % 97.5 %
## (Intercept) 0.38930543 0.40356113
## category_pairC -0.13261594 -0.10826642
## age_groupC -0.05789568 -0.02938428
## category_pairC:age_groupC -0.17384700 -0.12514794
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 1.35319 1.35319 1 143.88 375.946 < 2.2e-16 ***
## age_groupC 0.12957 0.12957 1 144.04 35.999 1.523e-08 ***
## category_pairC:age_groupC 0.52121 0.52121 1 143.88 144.805 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#### Kids Only, Age continuous ####
m <- lmer(avg_dist~AgeC*category_pairC+(1+category_pairC|subject),data=filter(avg_dist_cat_by_subj,age_group=="kids"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ AgeC * category_pairC + (1 + category_pairC | subject)
## Data: filter(avg_dist_cat_by_subj, age_group == "kids" & sort != "Practice")
##
## REML criterion at convergence: -1109.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8609 -0.5620 0.0317 0.6077 3.3269
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.0008363 0.02892
## category_pairC 0.0018810 0.04337 1.00
## Residual 0.0030561 0.05528
## Number of obs: 422, groups: subject, 106
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.418262 0.003891 113.250745 107.487 < 2e-16 ***
## AgeC -0.010256 0.003533 113.064064 -2.903 0.00445 **
## category_pairC -0.045678 0.006836 131.133382 -6.682 6.13e-10 ***
## AgeC:category_pairC -0.032921 0.006207 130.912541 -5.304 4.69e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) AgeC ctgr_C
## AgeC 0.000
## categry_prC 0.445 0.001
## AgC:ctgry_C 0.001 0.446 0.000
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[5:8,]
## 2.5 % 97.5 %
## (Intercept) 0.41063558 0.425889162
## AgeC -0.01718093 -0.003331043
## category_pairC -0.05907732 -0.032279259
## AgeC:category_pairC -0.04508561 -0.020756142
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## AgeC 0.025751 0.025751 1 113.06 8.4259 0.004448 **
## category_pairC 0.136441 0.136441 1 131.13 44.6446 6.128e-10 ***
## AgeC:category_pairC 0.085982 0.085982 1 130.91 28.1341 4.686e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The full model above yields a singular fit, due to the high covariance between the random intercept and random slope. However, a model removing the covariance to avoid the singular fit warning yields virtually identical results.
## model removing correlation between random intercept and slope
## to avoid singular fit yields equivalent results
m <- lmer(avg_dist~AgeC*category_pairC+(1+category_pairC||subject),data=filter(avg_dist_cat_by_subj,age_group=="kids"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ AgeC * category_pairC + (1 + category_pairC || subject)
## Data: filter(avg_dist_cat_by_subj, age_group == "kids" & sort != "Practice")
##
## REML criterion at convergence: -1072.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0005 -0.5370 0.0117 0.5494 3.7110
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.0005445 0.02333
## subject.1 category_pairC 0.0003269 0.01808
## Residual 0.0037324 0.06109
## Number of obs: 422, groups: subject, 106
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.418258 0.003740 104.303815 111.823 < 2e-16 ***
## AgeC -0.010253 0.003396 104.106336 -3.019 0.00319 **
## category_pairC -0.045668 0.006202 104.252793 -7.363 4.35e-11 ***
## AgeC:category_pairC -0.032928 0.005630 104.044897 -5.849 5.77e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) AgeC ctgr_C
## AgeC 0.000
## categry_prC 0.000 0.000
## AgC:ctgry_C 0.000 0.000 -0.001
confint(m,method="Wald")[4:7,]
## 2.5 % 97.5 %
## (Intercept) 0.41092679 0.425588656
## AgeC -0.01690805 -0.003597502
## category_pairC -0.05782407 -0.033511331
## AgeC:category_pairC -0.04396213 -0.021894244
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## AgeC 0.034028 0.034028 1 104.11 9.1169 0.003186 **
## category_pairC 0.202344 0.202344 1 104.25 54.2134 4.346e-11 ***
## AgeC:category_pairC 0.127689 0.127689 1 104.05 34.2114 5.767e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#3-4 subgroup
m <- lmer(avg_dist~category_pairC+(1+category_pairC|subject),data=filter(avg_dist_cat_by_subj,age_bin=="3 to 4"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC | subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "3 to 4" & sort != "Practice")
##
## REML criterion at convergence: -241.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7093 -0.5911 0.1150 0.5653 1.7269
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.000843 0.02903
## category_pairC 0.001039 0.03223 1.00
## Residual 0.002153 0.04640
## Number of obs: 84, groups: subject, 21
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.427330 0.008110 20.793729 52.689 <2e-16 ***
## category_pairC -0.009543 0.012329 27.677639 -0.774 0.446
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.446
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[5:6,]
## 2.5 % 97.5 %
## (Intercept) 0.4114343 0.44322672
## category_pairC -0.0337081 0.01462266
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.00129 0.00129 1 27.678 0.599 0.4455
The full model above yields a singular fit, due to the high covariance between the random intercept and random slope. However, a model removing the covariance to avoid the singular fit warning yields virtually identical results.
## model removing correlation between random intercept and slope
## to avoid singular fit yields equivalent results
m <- lmer(avg_dist~category_pairC+(1+category_pairC||subject),data=filter(avg_dist_cat_by_subj,age_bin=="3 to 4"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC || subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "3 to 4" & sort != "Practice")
##
## REML criterion at convergence: -234.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2347 -0.5886 0.1179 0.5805 2.0110
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.0006762 0.02600
## subject.1 category_pairC 0.0000000 0.00000
## Residual 0.0025696 0.05069
## Number of obs: 84, groups: subject, 21
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.427330 0.007924 19.999998 53.929 <2e-16 ***
## category_pairC -0.009543 0.011062 62.000002 -0.863 0.392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.000
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[4:5,]
## 2.5 % 97.5 %
## (Intercept) 0.41179979 0.44286119
## category_pairC -0.03122322 0.01213778
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.0019123 0.0019123 1 62 0.7442 0.3916
#4-5
m <- lmer(avg_dist~category_pairC+(1+category_pairC|subject),data=filter(avg_dist_cat_by_subj,age_bin=="4 to 5"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC | subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "4 to 5" & sort != "Practice")
##
## REML criterion at convergence: -331.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.53933 -0.57396 0.07195 0.50426 2.27254
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.001044 0.03231
## category_pairC 0.001996 0.04468 1.00
## Residual 0.003449 0.05873
## Number of obs: 134, groups: subject, 34
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.433550 0.007521 35.773818 57.644 <2e-16 ***
## category_pairC -0.009208 0.012724 43.288052 -0.724 0.473
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.445
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[5:6,]
## 2.5 % 97.5 %
## (Intercept) 0.41880870 0.44829115
## category_pairC -0.03414714 0.01573151
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.0018063 0.0018063 1 43.288 0.5236 0.4732
The full model above yields a singular fit, due to the high covariance between the random intercept and random slope. However, a model removing the covariance to avoid the singular fit warning yields virtually identical results.
## model removing correlation between random intercept and slope
## to avoid singular fit yields equivalent results
m <- lmer(avg_dist~category_pairC+(1+category_pairC||subject),data=filter(avg_dist_cat_by_subj,age_bin=="4 to 5"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC || subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "4 to 5" & sort != "Practice")
##
## REML criterion at convergence: -320.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9409 -0.5212 0.0962 0.5348 2.4285
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.0006941 0.02635
## subject.1 category_pairC 0.0000000 0.00000
## Residual 0.0042940 0.06553
## Number of obs: 134, groups: subject, 34
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.433558 0.007251 33.282377 59.796 <2e-16 ***
## category_pairC -0.009116 0.011322 99.561273 -0.805 0.423
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.000
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[4:5,]
## 2.5 % 97.5 %
## (Intercept) 0.41934725 0.44776932
## category_pairC -0.03130643 0.01307363
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.0027841 0.0027841 1 99.561 0.6484 0.4226
#5-6
m <- lmer(avg_dist~category_pairC+(1+category_pairC|subject),data=filter(avg_dist_cat_by_subj,age_bin=="5 to 6"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC | subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "5 to 6" & sort != "Practice")
##
## REML criterion at convergence: -289.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.14864 -0.60611 -0.09781 0.73896 2.74479
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.0009797 0.03130
## category_pairC 0.0021707 0.04659 1.00
## Residual 0.0029752 0.05455
## Number of obs: 112, groups: subject, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.408842 0.007846 28.368514 52.111 < 2e-16 ***
## category_pairC -0.063975 0.013557 31.814379 -4.719 4.54e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.490
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[5:6,]
## 2.5 % 97.5 %
## (Intercept) 0.39346496 0.42421928
## category_pairC -0.09054512 -0.03740407
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.066257 0.066257 1 31.814 22.27 4.543e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The full model above yields a singular fit, due to the high covariance between the random intercept and random slope. However, a model removing the covariance to avoid the singular fit warning yields virtually identical results.
## model removing correlation between random intercept and slope
## to avoid singular fit yields equivalent results
m <- lmer(avg_dist~category_pairC+(1+category_pairC||subject),data=filter(avg_dist_cat_by_subj,age_bin=="5 to 6"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC || subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "5 to 6" & sort != "Practice")
##
## REML criterion at convergence: -279.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3818 -0.5216 -0.0616 0.5989 3.4955
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.0007991 0.02827
## subject.1 category_pairC 0.0012006 0.03465
## Residual 0.0033907 0.05823
## Number of obs: 112, groups: subject, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.408842 0.007669 27.000017 53.312 < 2e-16 ***
## category_pairC -0.063975 0.012805 27.000000 -4.996 3.08e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.000
confint(m,method="Wald")[4:5,]
## 2.5 % 97.5 %
## (Intercept) 0.39381141 0.42387283
## category_pairC -0.08907258 -0.03887661
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.08463 0.08463 1 27 24.959 3.079e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#6-7
m <- lmer(avg_dist~category_pairC+(1+category_pairC|subject),data=filter(avg_dist_cat_by_subj,age_bin=="6 to 7"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC | subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "6 to 7" & sort != "Practice")
##
## REML criterion at convergence: -240.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3684 -0.5077 0.0157 0.5380 3.5268
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.0002018 0.01421
## category_pairC 0.0008698 0.02949 1.00
## Residual 0.0033671 0.05803
## Number of obs: 92, groups: subject, 23
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.399049 0.006736 35.414174 59.241 < 2e-16 ***
## category_pairC -0.109816 0.013572 33.938926 -8.091 1.99e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.199
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[5:6,]
## 2.5 % 97.5 %
## (Intercept) 0.3858468 0.41225159
## category_pairC -0.1364172 -0.08321401
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.22042 0.22042 1 33.939 65.465 1.987e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The full model above yields a singular fit, due to the high covariance between the random intercept and random slope. However, a model removing the covariance to avoid the singular fit warning yields virtually identical results.
## model removing correlation between random intercept and slope
## to avoid singular fit yields equivalent results
m <- lmer(avg_dist~category_pairC+(1+category_pairC||subject),data=filter(avg_dist_cat_by_subj,age_bin=="6 to 7"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC || subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "6 to 7" & sort != "Practice")
##
## REML criterion at convergence: -239
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5668 -0.5767 -0.0203 0.5336 3.5980
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 2.964e-21 5.444e-11
## subject.1 category_pairC 0.000e+00 0.000e+00
## Residual 3.777e-03 6.146e-02
## Number of obs: 92, groups: subject, 23
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.399049 0.006407 90.000000 62.279 < 2e-16 ***
## category_pairC -0.109816 0.012815 90.000000 -8.569 2.7e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.000
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[4:5,]
## 2.5 % 97.5 %
## (Intercept) 0.3864909 0.41160749
## category_pairC -0.1349321 -0.08469907
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 0.27737 0.27737 1 90 73.435 2.704e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#adults
m <- lmer(avg_dist~category_pairC+(1+category_pairC|subject),data=filter(avg_dist_cat_by_subj,age_bin=="adults"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC | subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "adults" & sort != "Practice")
##
## REML criterion at convergence: -445.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2782 -0.6480 0.0514 0.5266 3.1892
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.0007332 0.02708
## category_pairC 0.0003863 0.01966 1.00
## Residual 0.0027182 0.05214
## Number of obs: 160, groups: subject, 40
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.374613 0.005943 40.124364 63.03 <2e-16 ***
## category_pairC -0.195190 0.008810 76.204676 -22.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.254
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[5:6,]
## 2.5 % 97.5 %
## (Intercept) 0.3629654 0.3862611
## category_pairC -0.2124569 -0.1779229
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 1.3343 1.3343 1 76.205 490.88 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The full model above yields a singular fit, due to the high covariance between the random intercept and random slope. However, a model removing the covariance to avoid the singular fit warning yields virtually identical results.
## model removing correlation between random intercept and slope
## to avoid singular fit yields equivalent results
m <- lmer(avg_dist~category_pairC+(1+category_pairC||subject),data=filter(avg_dist_cat_by_subj,age_bin=="adults"&sort!="Practice"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: avg_dist ~ category_pairC + (1 + category_pairC || subject)
## Data: filter(avg_dist_cat_by_subj, age_bin == "adults" & sort != "Practice")
##
## REML criterion at convergence: -441.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3700 -0.6405 -0.0229 0.5731 3.5638
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.0006599 0.02569
## subject.1 category_pairC 0.0000000 0.00000
## Residual 0.0028859 0.05372
## Number of obs: 160, groups: subject, 40
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.374613 0.005877 39.000000 63.75 <2e-16 ***
## category_pairC -0.195190 0.008494 119.000000 -22.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## categry_prC 0.000
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
confint(m,method="Wald")[4:5,]
## 2.5 % 97.5 %
## (Intercept) 0.3630954 0.3861311
## category_pairC -0.2118379 -0.1785419
anova(m)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## category_pairC 1.524 1.524 1 119 528.07 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Next, we investigated whether dimensions of bipolar valence, bivariate valence, and arousal predicted participant’s sorting behavior using the image ratings collected from a separate group of adults. To do so, we fit a series of linear mixed-effects model regressing the average distance between item pairs on the similarity of the image pairs on the dimension of interest (bipolar valence, arousal, positivity, and negativity) – measured in terms of the difference in average stimulus rating –, age group (adults: .5; children: -.5), and their interaction, including a by-participant random intercept, a by-participant random slope for the dimension of interest, and a by-item random intercept.
#### Read in and prepare data ####
subj_dist_long <- read.csv(here(root_path,"analysis","paper_2020","processed_data","Grid_subject_distance_item_pairs.csv"))
ratings_pairs <- read.csv(here(root_path,"analysis","paper_2020","processed_data","ratings_item_pairs.csv"))
#select variables of interest
subj_dist_long <- subj_dist_long %>% select(participant,sort,subject,Age,Gender,age_bin,age_group,
dist,items,item1,item2,image_cat_1,image_cat_2,image_tax_cat_1,
image_tax_cat_2,emotion_pair_same,shared_category,category_pair)
subj_dist_long <- subj_dist_long %>%
left_join(ratings_pairs)
# average distance item pair by group
dist_long_byGroup <- subj_dist_long %>%
group_by(age_bin,age_group,items,sort) %>%
summarize(N=n(),average_dist=mean(dist),ci_dist=qt(0.975, N-1)*sd(dist,na.rm=T)/sqrt(N))
dist_long_byGroup <- dist_long_byGroup %>%
left_join(ratings_pairs)
#prepare dimensions of affect data
dimension_data <- subj_dist_long %>% filter(sort!="Practice") %>%
select(subject, sort, Age, age_bin, age_group, Gender, dist, items, dist_valence, dist_arousal, dist_pos, dist_neg) %>%
mutate(subject= as.factor(subject), Gender=as.factor(Gender), age_group = as.factor(age_group), age_bin = as.factor(age_bin), sort=as.factor(sort)) %>%
mutate(sortC=ifelse(sort=="Sort1",-0.5,0.5))
# center dimensions
dimension_data <- dimension_data %>% mutate(dist_valence=dist_valence-mean(dist_valence)) %>%
mutate(dist_arousal=dist_arousal-mean(dist_arousal)) %>%
mutate(dist_pos=dist_pos-mean(dist_pos)) %>%
mutate(dist_neg=dist_neg-mean(dist_neg))
dimension_data <- dimension_data %>% mutate(age_groupC=ifelse(age_group=="kids",-0.5,0.5))
m1 <- lmer(dist~dist_valence*age_groupC+(dist_valence|subject)+(1|items),data=dimension_data)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dist ~ dist_valence * age_groupC + (dist_valence | subject) +
## (1 | items)
## Data: dimension_data
##
## REML criterion at convergence: -13290.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7372 -0.7633 -0.1108 0.6580 3.6004
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## items (Intercept) 0.0011965 0.03459
## subject (Intercept) 0.0008208 0.02865
## dist_valence 0.0009062 0.03010 0.12
## Residual 0.0423148 0.20571
## Number of obs: 44523, groups: items, 306; subject, 146
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.496e-01 3.488e-03 2.686e+02 128.874 < 2e-16 ***
## dist_valence 5.594e-02 3.308e-03 2.234e+02 16.911 < 2e-16 ***
## age_groupC 2.236e-02 5.748e-03 1.440e+02 3.891 0.000152 ***
## dist_valence:age_groupC 7.107e-02 5.843e-03 1.438e+02 12.164 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dst_vl ag_grC
## dist_valenc 0.075
## age_groupC 0.372 0.041
## dst_vlnc:_C 0.039 0.399 0.104
confint(m1, method="Wald")[6:9,]
## 2.5 % 97.5 %
## (Intercept) 0.44273752 0.45641211
## dist_valence 0.04945459 0.06242077
## age_groupC 0.01109639 0.03362744
## dist_valence:age_groupC 0.05961922 0.08252283
anova(m1) #yields F and p-values when lmerTest is loaded (Satterthwaite by default)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## dist_valence 12.1013 12.1013 1 223.41 285.983 < 2.2e-16 ***
## age_groupC 0.6405 0.6405 1 144.04 15.136 0.0001523 ***
## dist_valence:age_groupC 6.2607 6.2607 1 143.84 147.956 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- lmer(dist~dist_arousal*age_groupC+(1+dist_arousal|subject)+(1|items),data=dimension_data)
summary(m2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dist ~ dist_arousal * age_groupC + (1 + dist_arousal | subject) +
## (1 | items)
## Data: dimension_data
##
## REML criterion at convergence: -10390.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3418 -0.7822 -0.1127 0.6822 3.3996
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## items (Intercept) 0.0037779 0.06146
## subject (Intercept) 0.0008123 0.02850
## dist_arousal 0.0004079 0.02020 -0.13
## Residual 0.0451124 0.21240
## Number of obs: 44523, groups: items, 306; subject, 146
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.495e-01 4.540e-03 4.003e+02 99.02 < 2e-16 ***
## dist_arousal 1.386e-02 5.502e-03 3.639e+02 2.52 0.012168 *
## age_groupC 2.236e-02 5.750e-03 1.440e+02 3.89 0.000153 ***
## dist_arousal:age_groupC 3.226e-02 4.903e-03 1.436e+02 6.58 8.18e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dst_rs ag_grC
## dist_arousl -0.026
## age_groupC 0.286 -0.018
## dst_rsl:g_C -0.026 0.201 -0.091
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00249479 (tol = 0.002, component 1)
confint(m2, method="Wald")[6:9,]
## 2.5 % 97.5 %
## (Intercept) 0.440646684 0.45844271
## dist_arousal 0.003080467 0.02464801
## age_groupC 0.011096279 0.03363446
## dist_arousal:age_groupC 0.022653219 0.04187219
anova(m2)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## dist_arousal 0.28645 0.28645 1 363.87 6.3496 0.0121681 *
## age_groupC 0.68260 0.68260 1 144.01 15.1311 0.0001527 ***
## dist_arousal:age_groupC 1.95341 1.95341 1 143.63 43.3010 8.176e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 <- lmer(dist~dist_pos*age_groupC+(1+dist_pos|subject)+(1|items),data=dimension_data, control = lmerControl(optimizer ="Nelder_Mead"))
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dist ~ dist_pos * age_groupC + (1 + dist_pos | subject) + (1 |
## items)
## Data: dimension_data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: -12621.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5987 -0.7645 -0.1070 0.6640 3.4452
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## items (Intercept) 0.002149 0.04636
## subject (Intercept) 0.000819 0.02862
## dist_pos 0.001677 0.04095 0.07
## Residual 0.042841 0.20698
## Number of obs: 44523, groups: items, 306; subject, 146
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.496e-01 3.909e-03 3.418e+02 115.01 < 2e-16 ***
## dist_pos 6.947e-02 5.076e-03 3.020e+02 13.69 < 2e-16 ***
## age_groupC 2.235e-02 5.747e-03 1.440e+02 3.89 0.000153 ***
## dist_pos:age_groupC 9.701e-02 8.024e-03 1.439e+02 12.09 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dst_ps ag_grC
## dist_pos 0.037
## age_groupC 0.332 0.022
## dst_ps:g_gC 0.021 0.357 0.063
confint(m3, method="Wald")[6:9,]
## 2.5 % 97.5 %
## (Intercept) 0.44192271 0.45724592
## dist_pos 0.05951835 0.07941576
## age_groupC 0.01109053 0.03361999
## dist_pos:age_groupC 0.08128083 0.11273325
anova(m3)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## dist_pos 8.0239 8.0239 1 301.99 187.293 < 2.2e-16 ***
## age_groupC 0.6482 0.6482 1 144.03 15.129 0.0001529 ***
## dist_pos:age_groupC 6.2621 6.2621 1 143.86 146.169 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m4 <- lmer(dist~dist_neg*age_groupC+(1+dist_neg|subject)+(1|items),data=dimension_data)
summary(m4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dist ~ dist_neg * age_groupC + (1 + dist_neg | subject) + (1 |
## items)
## Data: dimension_data
##
## REML criterion at convergence: -13152.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9960 -0.7700 -0.1139 0.6647 3.4913
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## items (Intercept) 0.0007901 0.02811
## subject (Intercept) 0.0008208 0.02865
## dist_neg 0.0028336 0.05323 0.15
## Residual 0.0425360 0.20624
## Number of obs: 44523, groups: items, 306; subject, 146
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.495e-01 3.293e-03 2.282e+02 136.496 < 2e-16 ***
## dist_neg 9.769e-02 5.583e-03 1.930e+02 17.497 < 2e-16 ***
## age_groupC 2.237e-02 5.750e-03 1.440e+02 3.891 0.000152 ***
## dist_neg:age_groupC 1.088e-01 1.030e-02 1.437e+02 10.560 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dst_ng ag_grC
## dist_neg 0.110
## age_groupC 0.394 0.057
## dst_ng:g_gC 0.054 0.417 0.137
confint(m4, method="Wald")[6:9,]
## 2.5 % 97.5 %
## (Intercept) 0.44309307 0.45600327
## dist_neg 0.08675059 0.10863709
## age_groupC 0.01110355 0.03364212
## dist_neg:age_groupC 0.08861838 0.12900986
anova(m4)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## dist_neg 13.0225 13.0225 1 193.03 306.153 < 2.2e-16 ***
## age_groupC 0.6440 0.6440 1 144.00 15.141 0.000152 ***
## dist_neg:age_groupC 4.7436 4.7436 1 143.73 111.519 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Children Only Analyses: Age is continuous
dimension_data <- dimension_data %>%mutate(AgeC=Age-mean(Age,na.rm=T))
dimension_data <- dimension_data %>% filter(age_group == "kids")
m1 <- lmer(dist~dist_valence*AgeC+(1+dist_valence|subject)+(1|items),data=dimension_data)
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dist ~ dist_valence * AgeC + (1 + dist_valence | subject) + (1 |
## items)
## Data: dimension_data
##
## REML criterion at convergence: -7663.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6182 -0.7878 -0.1117 0.6670 3.3582
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## items (Intercept) 0.0007210 0.02685
## subject (Intercept) 0.0007169 0.02677
## dist_valence 0.0006146 0.02479 0.28
## Residual 0.0451389 0.21246
## Number of obs: 32283, groups: items, 306; subject, 106
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.384e-01 3.244e-03 1.613e+02 135.149 < 2e-16 ***
## dist_valence 2.045e-02 2.848e-03 1.481e+02 7.181 3.12e-11 ***
## AgeC 4.300e-03 2.595e-03 1.041e+02 1.657 0.101
## dist_valence:AgeC 1.307e-02 2.344e-03 1.039e+02 5.574 1.97e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dst_vl AgeC
## dist_valenc 0.188
## AgeC 0.002 0.001
## dst_vlnc:AC 0.001 0.003 0.235
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00504843 (tol = 0.002, component 1)
confint(m1, method="Wald")[6:9,]
## 2.5 % 97.5 %
## (Intercept) 0.432028966 0.444744180
## dist_valence 0.014869198 0.026032713
## AgeC -0.000787109 0.009386866
## dist_valence:AgeC 0.008473394 0.017662716
anova(m1)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## dist_valence 2.32773 2.32773 1 148.06 51.5681 3.119e-11 ***
## AgeC 0.12389 0.12389 1 104.07 2.7447 0.1006
## dist_valence:AgeC 1.40269 1.40269 1 103.89 31.0750 1.974e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- lmer(dist~dist_arousal*AgeC+(1+dist_arousal|subject)+(1|items),data=dimension_data,
control = lmerControl(optimizer ="Nelder_Mead"))
summary(m2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dist ~ dist_arousal * AgeC + (1 + dist_arousal | subject) + (1 |
## items)
## Data: dimension_data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: -6844
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0147 -0.7924 -0.1009 0.6734 3.2010
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## items (Intercept) 0.0013989 0.03740
## subject (Intercept) 0.0007132 0.02671
## dist_arousal 0.0003177 0.01782 -0.06
## Residual 0.0463018 0.21518
## Number of obs: 32283, groups: items, 306; subject, 106
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.438371 0.003569 213.764568 122.821 <2e-16 ***
## dist_arousal -0.002235 0.003847 279.434492 -0.581 0.562
## AgeC 0.004302 0.002596 104.055349 1.657 0.100
## dist_arousal:AgeC 0.003077 0.002190 103.809697 1.405 0.163
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dst_rs AgeC
## dist_arousl -0.018
## AgeC 0.002 0.000
## dst_rsl:AgC 0.000 0.001 -0.037
confint(m2, method="Wald")[6:9,]
## 2.5 % 97.5 %
## (Intercept) 0.4313755042 0.445366451
## dist_arousal -0.0097761627 0.005305757
## AgeC -0.0007855494 0.009389535
## dist_arousal:AgeC -0.0012159833 0.007369882
anova(m2)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## dist_arousal 0.015627 0.015627 1 279.43 0.3375 0.5617
## AgeC 0.127179 0.127179 1 104.06 2.7468 0.1005
## dist_arousal:AgeC 0.091375 0.091375 1 103.81 1.9735 0.1631
m3 <- lmer(dist~dist_pos*AgeC+(1+dist_pos|subject)+(1|items),data=dimension_data)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dist ~ dist_pos * AgeC + (1 + dist_pos | subject) + (1 | items)
## Data: dimension_data
##
## REML criterion at convergence: -7323.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4577 -0.7868 -0.1039 0.6694 3.1469
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## items (Intercept) 0.0010817 0.03289
## subject (Intercept) 0.0007154 0.02675
## dist_pos 0.0010329 0.03214 0.25
## Residual 0.0455415 0.21340
## Number of obs: 32283, groups: items, 306; subject, 106
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.384e-01 3.420e-03 1.900e+02 128.170 < 2e-16 ***
## dist_pos 2.101e-02 4.069e-03 1.869e+02 5.164 6.16e-07 ***
## AgeC 4.295e-03 2.595e-03 1.041e+02 1.655 0.101
## dist_pos:AgeC 1.434e-02 3.106e-03 1.039e+02 4.618 1.11e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dst_ps AgeC
## dist_pos 0.145
## AgeC 0.002 0.001
## dist_ps:AgC 0.001 0.002 0.207
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00206313 (tol = 0.002, component 1)
confint(m3, method="Wald")[6:9,]
## 2.5 % 97.5 %
## (Intercept) 0.4316891292 0.445096812
## dist_pos 0.0130366478 0.028986968
## AgeC -0.0007910293 0.009381876
## dist_pos:AgeC 0.0082541658 0.020427777
anova(m3)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## dist_pos 1.21437 1.21437 1 186.88 26.6652 6.163e-07 ***
## AgeC 0.12476 0.12476 1 104.08 2.7395 0.1009
## dist_pos:AgeC 0.97114 0.97114 1 103.88 21.3243 1.115e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m4 <- lmer(dist~dist_neg*AgeC+(1+dist_neg|subject)+(1|items),data=dimension_data)
summary(m4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dist ~ dist_neg * AgeC + (1 + dist_neg | subject) + (1 | items)
## Data: dimension_data
##
## REML criterion at convergence: -7996.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5346 -0.7885 -0.1098 0.6696 3.2553
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## items (Intercept) 0.0003467 0.01862
## subject (Intercept) 0.0007184 0.02680
## dist_neg 0.0021657 0.04654 0.28
## Residual 0.0448182 0.21170
## Number of obs: 32283, groups: items, 306; subject, 106
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.384e-01 3.050e-03 1.307e+02 143.723 < 2e-16 ***
## dist_neg 4.339e-02 4.996e-03 1.215e+02 8.685 2.09e-14 ***
## AgeC 4.308e-03 2.596e-03 1.040e+02 1.659 0.1
## dist_neg:AgeC 2.586e-02 4.350e-03 1.038e+02 5.946 3.73e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dst_ng AgeC
## dist_neg 0.213
## AgeC 0.002 0.001
## dist_ng:AgC 0.001 0.003 0.238
confint(m4, method="Wald")[6:9,]
## 2.5 % 97.5 %
## (Intercept) 0.4323885944 0.444344673
## dist_neg 0.0335994009 0.053184240
## AgeC -0.0007804195 0.009396269
## dist_neg:AgeC 0.0173388461 0.034389546
anova(m4)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## dist_neg 3.3805 3.3805 1 121.48 75.4279 2.094e-14 ***
## AgeC 0.1234 0.1234 1 104.03 2.7535 0.1001
## dist_neg:AgeC 1.5846 1.5846 1 103.81 35.3565 3.730e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We examined how well emotion category predicted participant’s sorting behavior compared to valence and arousal. To do so, we computed the average distance between all stimulus pairs (n = 306 unique pairs) for each age group and predicted these distances from a pair’s similarity on each dimension of interest simultaneously. This general linear model revealed how much each dimension aided in explaining variance in each age group’s sorting behavior. First, we estimated the use of bipolar valence, arousal, and whether image pairs shared the same discrete emotion category (0 = different category pair; 1 = same category pair). Second, we estimated the effects of bivariate valence with positivity and negativity as two orthogonal dimensions.
#function for summarizing the linear model results to an overall table
create_effect_table <- function (m,caption="") {
#create tdiddy summary of model, removing intercept
tidy_m <- m %>%
broom::tidy() %>%
filter(!(term %in% c("(Intercept)"))) #%>%
#select(-std.error)
#extract overall sum squared from model and place in a data frame
overall_sqr <- data.frame(term="Overall", Rsqr=summary(m)$r.squared)
#extract delta-R squared using the lmSupport package
rsqr_table <- modelEffectSizes(m, Print=FALSE)$Effects %>%
as.data.frame() %>%
rownames_to_column(var="term") %>%
filter(!(term %in% c("(Intercept)"))) %>%
select("term","dR-sqr")
#join/ bind everything into one data frame
tidy_m <- tidy_m %>%
left_join(rsqr_table) %>%
bind_rows(overall_sqr)
#create table using kable
opts <- options(knitr.kable.NA = "")
tidy_m %>%
kable(
caption = caption,
col.names=c("Predictor","Estimate","SE","t-value","p","dR^2","Overall R^2"),
digits=c(0,2,2,2,4,2,2)
)
}
#kids, 3
m <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="3 to 4"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_valence + dist_arousal + emotion_pair_same,
## data = filter(dist_long_byGroup, age_bin == "3 to 4"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.118108 -0.031050 -0.003978 0.028958 0.131698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.429241 0.005741 74.772 <2e-16 ***
## dist_valence 0.003742 0.002240 1.671 0.0959 .
## dist_arousal -0.004377 0.003922 -1.116 0.2652
## emotion_pair_same -0.006484 0.011752 -0.552 0.5815
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04676 on 302 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.01305, Adjusted R-squared: 0.003246
## F-statistic: 1.331 on 3 and 302 DF, p-value: 0.2643
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bipolar Valence, Arousal, and Emotion Category: 3-year-olds")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_valence | 0.00 | 0.00 | 1.67 | 0.0959 | 0.01 | |
| dist_arousal | 0.00 | 0.00 | -1.12 | 0.2652 | 0.00 | |
| emotion_pair_same | -0.01 | 0.01 | -0.55 | 0.5815 | 0.00 | |
| Overall | 0.01 |
#kids, 4
m <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="4 to 5"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_valence + dist_arousal + emotion_pair_same,
## data = filter(dist_long_byGroup, age_bin == "4 to 5"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.119964 -0.021734 -0.000595 0.025800 0.103821
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.432930 0.004916 88.071 < 2e-16 ***
## dist_valence 0.010945 0.001918 5.706 2.76e-08 ***
## dist_arousal -0.015826 0.003358 -4.713 3.74e-06 ***
## emotion_pair_same -0.001493 0.010063 -0.148 0.882
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04004 on 302 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.1287, Adjusted R-squared: 0.12
## F-statistic: 14.87 on 3 and 302 DF, p-value: 4.725e-09
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bipolar Valence, Arousal, and Emotion Category: 4-year-olds")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_valence | 0.01 | 0.00 | 5.71 | 0.0000 | 0.09 | |
| dist_arousal | -0.02 | 0.00 | -4.71 | 0.0000 | 0.06 | |
| emotion_pair_same | 0.00 | 0.01 | -0.15 | 0.8821 | 0.00 | |
| Overall | 0.13 |
#kids, 5
m <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="5 to 6"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_valence + dist_arousal + emotion_pair_same,
## data = filter(dist_long_byGroup, age_bin == "5 to 6"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.170585 -0.038114 0.001383 0.037027 0.139176
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.399233 0.006970 57.276 < 2e-16 ***
## dist_valence 0.029263 0.002720 10.759 < 2e-16 ***
## dist_arousal -0.015041 0.004762 -3.159 0.00175 **
## emotion_pair_same -0.031740 0.014269 -2.224 0.02686 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05677 on 302 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.3127, Adjusted R-squared: 0.3059
## F-statistic: 45.8 on 3 and 302 DF, p-value: < 2.2e-16
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bipolar Valence, Arousal, and Emotion Category: 5-year-olds")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_valence | 0.03 | 0.00 | 10.76 | 0.0000 | 0.26 | |
| dist_arousal | -0.02 | 0.00 | -3.16 | 0.0017 | 0.02 | |
| emotion_pair_same | -0.03 | 0.01 | -2.22 | 0.0269 | 0.01 | |
| Overall | 0.31 |
#kids, 6
m <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="6 to 7"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_valence + dist_arousal + emotion_pair_same,
## data = filter(dist_long_byGroup, age_bin == "6 to 7"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.182787 -0.043893 0.008792 0.045142 0.169198
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.387582 0.007960 48.692 < 2e-16 ***
## dist_valence 0.045016 0.003106 14.493 < 2e-16 ***
## dist_arousal -0.020755 0.005438 -3.817 0.000164 ***
## emotion_pair_same -0.059197 0.016295 -3.633 0.000329 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06483 on 302 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.4614, Adjusted R-squared: 0.456
## F-statistic: 86.22 on 3 and 302 DF, p-value: < 2.2e-16
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bipolar Valence, Arousal, and Emotion Category: 6-year-olds")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_valence | 0.05 | 0.00 | 14.49 | 0e+00 | 0.37 | |
| dist_arousal | -0.02 | 0.01 | -3.82 | 2e-04 | 0.03 | |
| emotion_pair_same | -0.06 | 0.02 | -3.63 | 3e-04 | 0.02 | |
| Overall | 0.46 |
#adults
m <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="adults"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_valence + dist_arousal + emotion_pair_same,
## data = filter(dist_long_byGroup, age_bin == "adults"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.181247 -0.031275 0.008034 0.039218 0.171422
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.316438 0.007758 40.789 < 2e-16 ***
## dist_valence 0.090942 0.003027 30.041 < 2e-16 ***
## dist_arousal -0.020292 0.005300 -3.829 0.000156 ***
## emotion_pair_same -0.083556 0.015881 -5.261 2.72e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06319 on 302 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.7817, Adjusted R-squared: 0.7795
## F-statistic: 360.5 on 3 and 302 DF, p-value: < 2.2e-16
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bipolar Valence, Arousal, and Emotion Category: Adults")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_valence | 0.09 | 0.00 | 30.04 | 0e+00 | 0.65 | |
| dist_arousal | -0.02 | 0.01 | -3.83 | 2e-04 | 0.01 | |
| emotion_pair_same | -0.08 | 0.02 | -5.26 | 0e+00 | 0.02 | |
| Overall | 0.78 |
# previously computed bootstrap CIs and delta R^2 values:
## See 3_Grid_dimension_predictions_distances_clean.R for scripts bor bootstrapping CIs
drsq_v_a_ec <- read.csv(here("analysis","paper_2020","processed_data","drsqr_v_a_ec.csv"))
#clean labels for plot
drsq_v_a_ec <- drsq_v_a_ec %>% mutate(age_bin=case_when(
age_bin == "3 to 4" ~ "3 y/o",
age_bin == "4 to 5" ~ "4 y/o",
age_bin == "5 to 6" ~ "5 y/o",
age_bin == "6 to 7" ~ "6 y/o",
age_bin == "adults" ~ "adults",
))
p_drsq_v_a_ec <- ggplot(drsq_v_a_ec,aes(minus_model_name,delta_rsq, fill=minus_model_name))+
geom_bar(stat="identity",color="black")+
geom_hline(yintercept=0,color="black")+
geom_errorbar(aes(ymin=delta_rsq_lower,ymax=delta_rsq_upper),width=0.1)+
# scale_fill_brewer(palette="Set2",
# name = "Model Predictor",
# limits=c("dist_valence","dist_arousal","emotion_pair_same"),
# breaks=c("dist_valence","dist_arousal","emotion_pair_same"),
# labels=c("Valence","Arousal","Same Emotion Category"))+
scale_fill_viridis_d(
name = " ", #Model Predictor
limits=c("dist_valence","dist_arousal","emotion_pair_same"),
breaks=c("dist_valence","dist_arousal","emotion_pair_same"),
labels=c("Valence","Arousal","Emotion Category"))+
scale_x_discrete(
limits=c("dist_valence","dist_arousal","emotion_pair_same"),
breaks=c("dist_valence","dist_arousal","emotion_pair_same"),
labels=c("Valence","Arousal","Same Emotion Category"))+
theme(axis.text.x = element_text(angle=90, vjust=0.5,size=18),
axis.title.x = element_text(size=20,face="bold"),
axis.text.y = element_text(size=18),
axis.title.y= element_text(size=20,face="bold"),
strip.text.x = element_text(size=18),
legend.text =element_text(size=20),
legend.title =element_text(size=22,face="bold"))+
theme(#axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
ylab("Delta R Squared")+
xlab("Age Group (years)") +
facet_wrap(~age_bin,nrow=1, strip.position = c("bottom"))+
theme(legend.position=c(0.1,0.8))
p_drsq_v_a_ec
#kids, 3
m <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="3 to 4"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_pos + dist_neg + dist_arousal +
## emotion_pair_same, data = filter(dist_long_byGroup, age_bin ==
## "3 to 4"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.115262 -0.031670 -0.004228 0.028143 0.134248
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.246e-01 6.391e-03 66.443 <2e-16 ***
## dist_pos -5.009e-03 4.958e-03 -1.010 0.3131
## dist_neg 1.166e-02 5.230e-03 2.229 0.0266 *
## dist_arousal -4.276e-05 4.507e-03 -0.009 0.9924
## emotion_pair_same -3.918e-03 1.182e-02 -0.332 0.7405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04662 on 301 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.02221, Adjusted R-squared: 0.009217
## F-statistic: 1.709 on 4 and 301 DF, p-value: 0.1478
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bivariate Valence, Arousal, and Emotion Category: 3-year-olds")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_pos | -0.01 | 0.00 | -1.01 | 0.3131 | 0.00 | |
| dist_neg | 0.01 | 0.01 | 2.23 | 0.0266 | 0.02 | |
| dist_arousal | 0.00 | 0.00 | -0.01 | 0.9924 | 0.00 | |
| emotion_pair_same | 0.00 | 0.01 | -0.33 | 0.7405 | 0.00 | |
| Overall | 0.02 |
#kids, 4
m <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="4 to 5"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_pos + dist_neg + dist_arousal +
## emotion_pair_same, data = filter(dist_long_byGroup, age_bin ==
## "4 to 5"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.116690 -0.024719 0.000412 0.026093 0.102877
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.422831 0.005333 79.291 < 2e-16 ***
## dist_pos -0.002723 0.004137 -0.658 0.5109
## dist_neg 0.024421 0.004364 5.596 4.95e-08 ***
## dist_arousal -0.009087 0.003761 -2.416 0.0163 *
## emotion_pair_same 0.004157 0.009862 0.421 0.6737
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0389 on 301 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.1803, Adjusted R-squared: 0.1695
## F-statistic: 16.56 on 4 and 301 DF, p-value: 2.822e-12
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bivariate Valence, Arousal, and Emotion Category: 4-year-olds")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_pos | 0.00 | 0.00 | -0.66 | 0.5109 | 0.00 | |
| dist_neg | 0.02 | 0.00 | 5.60 | 0.0000 | 0.09 | |
| dist_arousal | -0.01 | 0.00 | -2.42 | 0.0163 | 0.02 | |
| emotion_pair_same | 0.00 | 0.01 | 0.42 | 0.6737 | 0.00 | |
| Overall | 0.18 |
#kids, 5
m <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="5 to 6"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_pos + dist_neg + dist_arousal +
## emotion_pair_same, data = filter(dist_long_byGroup, age_bin ==
## "5 to 6"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.165601 -0.032707 -0.002381 0.034316 0.149575
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.372339 0.006979 53.348 <2e-16 ***
## dist_pos -0.013422 0.005414 -2.479 0.0137 *
## dist_neg 0.068672 0.005712 12.022 <2e-16 ***
## dist_arousal 0.006008 0.004922 1.220 0.2232
## emotion_pair_same -0.016946 0.012908 -1.313 0.1902
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05091 on 301 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.4492, Adjusted R-squared: 0.4419
## F-statistic: 61.37 on 4 and 301 DF, p-value: < 2.2e-16
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bivariate Valence, Arousal, and Emotion Category: 5-year-olds")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_pos | -0.01 | 0.01 | -2.48 | 0.0137 | 0.01 | |
| dist_neg | 0.07 | 0.01 | 12.02 | 0.0000 | 0.26 | |
| dist_arousal | 0.01 | 0.00 | 1.22 | 0.2232 | 0.00 | |
| emotion_pair_same | -0.02 | 0.01 | -1.31 | 0.1902 | 0.00 | |
| Overall | 0.45 |
#kids, 6
m <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="6 to 7"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_pos + dist_neg + dist_arousal +
## emotion_pair_same, data = filter(dist_long_byGroup, age_bin ==
## "6 to 7"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.164992 -0.041168 0.005804 0.036657 0.189090
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.355708 0.007939 44.804 <2e-16 ***
## dist_pos -0.007820 0.006159 -1.270 0.2051
## dist_neg 0.090119 0.006498 13.870 <2e-16 ***
## dist_arousal 0.005149 0.005599 0.920 0.3585
## emotion_pair_same -0.042476 0.014683 -2.893 0.0041 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05791 on 301 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.5717, Adjusted R-squared: 0.566
## F-statistic: 100.4 on 4 and 301 DF, p-value: < 2.2e-16
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bivariate Valence, Arousal, and Emotion Category: 6-year-olds")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_pos | -0.01 | 0.01 | -1.27 | 0.2051 | 0.00 | |
| dist_neg | 0.09 | 0.01 | 13.87 | 0.0000 | 0.27 | |
| dist_arousal | 0.01 | 0.01 | 0.92 | 0.3585 | 0.00 | |
| emotion_pair_same | -0.04 | 0.01 | -2.89 | 0.0041 | 0.01 | |
| Overall | 0.57 |
#adults
m <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="adults"))
summary(m)
##
## Call:
## lm(formula = average_dist ~ dist_pos + dist_neg + dist_arousal +
## emotion_pair_same, data = filter(dist_long_byGroup, age_bin ==
## "adults"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.177894 -0.033465 0.002811 0.036301 0.190643
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.294450 0.008378 35.145 < 2e-16 ***
## dist_pos 0.054320 0.006499 8.358 2.38e-15 ***
## dist_neg 0.105502 0.006857 15.387 < 2e-16 ***
## dist_arousal -0.003213 0.005909 -0.544 0.587
## emotion_pair_same -0.076227 0.015494 -4.920 1.43e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06111 on 301 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.7965, Adjusted R-squared: 0.7938
## F-statistic: 294.5 on 4 and 301 DF, p-value: < 2.2e-16
#vif(m) #check VIF
create_effect_table(m,caption="Predicting Sorting Distance from Bivariate Valence, Arousal, and Emotion Category: Adults")
| Predictor | Estimate | SE | t-value | p | dR^2 | Overall R^2 |
|---|---|---|---|---|---|---|
| dist_pos | 0.05 | 0.01 | 8.36 | 0.000 | 0.05 | |
| dist_neg | 0.11 | 0.01 | 15.39 | 0.000 | 0.16 | |
| dist_arousal | 0.00 | 0.01 | -0.54 | 0.587 | 0.00 | |
| emotion_pair_same | -0.08 | 0.02 | -4.92 | 0.000 | 0.02 | |
| Overall | 0.8 |
# previously computed bootstrap CIs and delta R^2 values:
## See 3_Grid_dimension_predictions_distances_clean.R for scripts bor bootstrapping CIs
drsq_pn_a_ec <- read.csv(here(root_path,"analysis","paper_2020","processed_data","drsqr_pn_a_ec.csv"))
drsq_pn_a_ec <- drsq_pn_a_ec %>% mutate(age_bin=case_when(
age_bin == "3 to 4" ~ "3 y/o",
age_bin == "4 to 5" ~ "4 y/o",
age_bin == "5 to 6" ~ "5 y/o",
age_bin == "6 to 7" ~ "6 y/o",
age_bin == "adults" ~ "adults",
))
p_drsq_pn_a_ec <- ggplot(drsq_pn_a_ec,aes(minus_model_name,delta_rsq, fill=minus_model_name))+
geom_bar(stat="identity",color="black")+
geom_hline(yintercept=0,color="black")+
geom_errorbar(aes(ymin=delta_rsq_lower,ymax=delta_rsq_upper),width=0.1)+
# scale_fill_brewer(palette="Set1",
# name = "Model Predictor",
# limits=c("dist_pos","dist_neg","dist_arousal","emotion_pair_same"),
# breaks=c("dist_pos","dist_neg","dist_arousal","emotion_pair_same"),
# labels=c("Positive","Negative","Arousal","Same Emotion Category"))+
scale_fill_viridis_d(
name = "Model Predictor",
limits=c("dist_pos","dist_neg","dist_arousal","emotion_pair_same"),
breaks=c("dist_pos","dist_neg","dist_arousal","emotion_pair_same"),
labels=c("Positive","Negative","Arousal","Same Emotion Category"))+
scale_x_discrete(
limits=c("dist_pos","dist_neg","dist_arousal","emotion_pair_same"),
breaks=c("dist_pos","dist_neg","dist_arousal","emotion_pair_same"),
labels=c("Positive","Negative","Arousal","Same Emotion Category"))+
theme(axis.text.x = element_text(angle=90, vjust=0.5,size=18),
axis.title.x = element_text(size=20,face="bold"),
axis.text.y = element_text(size=18),
axis.title.y= element_text(size=20,face="bold"),
strip.text.x = element_text(size=18))+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
ylab("Delta R Squared")+
facet_wrap(~age_bin,nrow=1)+
theme(legend.position=c(0.1,0.8))
p_drsq_pn_a_ec
We also investigated whether bivariate valence – with separate dimensions of positivity and negativity—led to better model performance than bipolar valence. To do so, we compared the models including bipolar valence to the models including positivity and negativity in each age group. In general, replacing the valence predictor with separate predictors of positivity and negativity led to improved model performance (i.e., explained more variance in sorting behavior) in all but the youngest age group, with the most substantial gains in predictor among the 5- and 6-year-olds.
#kids, 3
m_v <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="3 to 4"))
m_pn <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="3 to 4"))
anova(m_v,m_pn)
## Analysis of Variance Table
##
## Model 1: average_dist ~ dist_valence + dist_arousal + emotion_pair_same
## Model 2: average_dist ~ dist_pos + dist_neg + dist_arousal + emotion_pair_same
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 302 0.66024
## 2 301 0.65411 1 0.0061282 2.82 0.09413 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#kids, 4
m_v <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="4 to 5"))
m_pn <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="4 to 5"))
anova(m_v,m_pn)
## Analysis of Variance Table
##
## Model 1: average_dist ~ dist_valence + dist_arousal + emotion_pair_same
## Model 2: average_dist ~ dist_pos + dist_neg + dist_arousal + emotion_pair_same
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 302 0.48412
## 2 301 0.45541 1 0.028711 18.976 1.817e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#kids, 5
m_v <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="5 to 6"))
m_pn <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="5 to 6"))
anova(m_v,m_pn)
## Analysis of Variance Table
##
## Model 1: average_dist ~ dist_valence + dist_arousal + emotion_pair_same
## Model 2: average_dist ~ dist_pos + dist_neg + dist_arousal + emotion_pair_same
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 302 0.97340
## 2 301 0.78011 1 0.19329 74.581 3.46e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#kids, 6
m_v <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="6 to 7"))
m_pn <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="6 to 7"))
anova(m_v,m_pn)
## Analysis of Variance Table
##
## Model 1: average_dist ~ dist_valence + dist_arousal + emotion_pair_same
## Model 2: average_dist ~ dist_pos + dist_neg + dist_arousal + emotion_pair_same
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 302 1.2693
## 2 301 1.0094 1 0.25994 77.514 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#adults
m_v <- lm(average_dist~dist_valence+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="adults"))
m_pn <- lm(average_dist~dist_pos+dist_neg+dist_arousal+emotion_pair_same,data=filter(dist_long_byGroup,age_bin=="adults"))
anova(m_v,m_pn)
## Analysis of Variance Table
##
## Model 1: average_dist ~ dist_valence + dist_arousal + emotion_pair_same
## Model 2: average_dist ~ dist_pos + dist_neg + dist_arousal + emotion_pair_same
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 302 1.2058
## 2 301 1.1241 1 0.081703 21.878 4.395e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Our last sets of analyses took a bottom-up approach, using unsupervised methods to provide a different perspective on how emotions might be represented without reliance on any predetermined dimensions or categories. For this reason, we analyzed the Same and Different Individual Sorts separately because we did not want to assume that a happy face in one sort had anything to do with a happy face in another.
First, we used 2-dimensional multidimensional scaling (MDS) to visually represent participant’s classifications. To better understand the dimensions, we fit vectors of image ratings for bipolar valence, arousal, positivity, and negativity onto our MDS solution over 1,000 permutations to derive the squared correlation coefficient of each vector (envfit in Vegan package; Oksanen, 2019).
subj_dist_long <- read.csv(here(root_path,"analysis","paper_2020","processed_data","Grid_subject_distance_item_pairs.csv"))
ratings <- read.csv(here(root_path,"analysis","paper_2020","processed_data","ratings_data.csv"))
#average across all distances
avg_dist_long <- subj_dist_long %>%
group_by(sort,age_group,item1,item2) %>%
summarize(avg_dist=mean(dist)) %>%
ungroup() %>%
mutate(sort=as.character(sort),age_group=as.character(age_group),item1=as.character(item1),item2=as.character(item2))
#average distance object organized by sorting group
avg_dist <- avg_dist_long %>%
group_by(sort,age_group) %>%
nest() %>%
mutate(dist_obj = purrr::map(data, long_to_dist))
MDS solution fit with each dimension.
set.seed(51314)
#### Plot MDS - Kids: Sorting Same Individual ####
cur_dist <- filter(avg_dist,sort=="Sort1"&age_group=="kids")$dist_obj[[1]]
sort1_kids_cmd <- data.frame(cmdscale(cur_dist, k=2))
ratings1 <- ratings %>% filter(sort=="Sort1") %>% dplyr::select(image, pos, neg, valence, arousal) %>% rename(positivity=pos, negativity=neg)
sort1k_fit <- envfit(sort1_kids_cmd,ratings1, permutations = 1000)
sort1k_fit
##
## ***VECTORS
##
## X1 X2 r2 Pr(>r)
## positivity 0.89029 -0.45539 0.8442 0.000999 ***
## negativity -0.99420 -0.10759 0.9710 0.000999 ***
## valence 0.97611 -0.21727 0.9155 0.000999 ***
## arousal 0.43022 -0.90272 0.4298 0.015984 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 1000
##
## ***FACTORS:
##
## Centroids:
## X1 X2
## imageM07ang_c -0.2063 -0.1067
## imageM07ang_o -0.1604 -0.1368
## imageM07calm_c 0.1128 -0.0134
## imageM07calm_o 0.1553 -0.0287
## imageM07disg_c -0.2072 -0.0782
## imageM07disg_o -0.1299 -0.1467
## imageM07exc_c 0.1630 -0.0962
## imageM07exc_o 0.0790 0.0694
## imageM07fear_c -0.0196 0.1219
## imageM07fear_o -0.0401 0.1455
## imageM07hap_c 0.1792 -0.0519
## imageM07hap_o 0.1507 -0.0701
## imageM07neut_c -0.0101 0.1100
## imageM07neut_o 0.0272 0.1048
## imageM07sad_c -0.1820 0.0864
## imageM07sad_o -0.1453 0.1097
## imageM07surp_c 0.0793 0.0283
## imageM07surp_o 0.1544 -0.0473
##
## Goodness of fit:
## r2 Pr(>r)
## image 1 1
## Permutation: free
## Number of permutations: 1000
# prepare face images
cur_cluster <- hclust(cur_dist)
image_paths <- paste(here("experiment","stimuli_sort1"),"/",labels(cur_cluster),".png",sep="")
cur_images <- data.frame(
label=labels(cur_cluster),
image=image_paths,
x=seq(1,length(image_paths)),
y=rep(c(-0.05,-0.1),length(image_paths)/2))
sort1_images <- cur_images
cur_cmd <- data.frame(cmdscale(cur_dist))
cur_cmd$label <- rownames(cur_cmd)
cur_cmd <- merge(cur_cmd,sort1_images)
#include correlations at correct angles
data.scores = as.data.frame(scores(sort1_kids_cmd))
en_coord_cont = as.data.frame(scores(sort1k_fit, "vectors")) * ordiArrowMul(sort1k_fit)
#Plot MDS with rating vectors
p1 <- ggplot(data=cur_cmd,aes(X1,X2))+
geom_image(data=cur_cmd, aes(image=image), size=0.08)+
ylab("Dimension 2")+
xlab("Dimension 1")+
geom_segment(data = en_coord_cont,aes(x = 0, y = 0, xend = X1/7, yend = X2/7, color=row.names(en_coord_cont)),
size =1, alpha = 0.8, arrow = arrow(length = unit(0.03, "npc"))) + #ends="both" for arrow
geom_text(data = en_coord_cont, aes(x = X1/7, y = (X2/7) - .01), colour = "black",
fontface = "bold", label = row.names(en_coord_cont))+
theme(axis.text.x = element_text(angle=90, vjust=0.5,size=18),
axis.title.x = element_text(size=18,face="bold"),
axis.text.y = element_text(size=16),
axis.title.y= element_text(size=18,face="bold"),
strip.text.x = element_text(size=16))+
ggtitle("Children")+
theme(legend.position="none", plot.title = element_text(hjust = 0.5,size=18))
p1
MDS solution fit with each dimension.
#### Plot MDS - Adults: Sorting Same Individual ####
cur_dist <- filter(avg_dist,sort=="Sort1"&age_group=="adults")$dist_obj[[1]]
sort1_adults_cmd <- data.frame(cmdscale(cur_dist, k=2))
sort1a_fit <- envfit(sort1_adults_cmd,ratings1, permutations=1000)
sort1a_fit
##
## ***VECTORS
##
## X1 X2 r2 Pr(>r)
## positivity 0.94471 0.32790 0.9249 0.000999 ***
## negativity -0.86387 0.50371 0.9787 0.000999 ***
## valence 0.99915 -0.04119 0.9625 0.000999 ***
## arousal 0.52883 0.84873 0.3954 0.020979 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 1000
##
## ***FACTORS:
##
## Centroids:
## X1 X2
## imageM07ang_c -0.2803 0.1552
## imageM07ang_o -0.3055 0.1319
## imageM07calm_c 0.2056 -0.0312
## imageM07calm_o 0.2549 0.0581
## imageM07disg_c -0.2783 0.1761
## imageM07disg_o -0.2043 0.1306
## imageM07exc_c 0.3223 0.0412
## imageM07exc_o 0.0140 -0.1904
## imageM07fear_c -0.1088 -0.1706
## imageM07fear_o -0.1567 -0.1736
## imageM07hap_c 0.3228 0.0467
## imageM07hap_o 0.3513 0.1044
## imageM07neut_c -0.0459 -0.0983
## imageM07neut_o -0.0606 -0.1090
## imageM07sad_c -0.2503 0.0068
## imageM07sad_o -0.2514 -0.0380
## imageM07surp_c 0.1023 -0.1451
## imageM07surp_o 0.3690 0.1051
##
## Goodness of fit:
## r2 Pr(>r)
## image 1 1
## Permutation: free
## Number of permutations: 1000
#include correlations at correct angles
data.scores = as.data.frame(scores(sort1_adults_cmd))
en_coord_cont = as.data.frame(scores(sort1a_fit, "vectors")) * ordiArrowMul(sort1a_fit)
#Plot MDS with rating vectors
cur_cmd <- data.frame(cmdscale(cur_dist))
cur_cmd$label <- rownames(cur_cmd)
cur_cmd <- merge(cur_cmd,sort1_images)
p2 <- ggplot(data=cur_cmd,aes(X1,X2))+
geom_image(data=cur_cmd, aes(image=image), size=0.08)+
ylab("Dimension 2")+
xlab("Dimension 1")+
geom_segment(data = en_coord_cont,aes(x = 0, y = 0, xend = X1/4, yend = X2/4, color=row.names(en_coord_cont)),
size =1, alpha = 0.8, arrow = arrow(length = unit(0.03, "npc"))) + #ends="both" for arrow
geom_text(data = en_coord_cont, aes(x = X1/4, y = (X2/4) - .01), colour = "black",
fontface = "bold", label = row.names(en_coord_cont))+
theme(axis.text.x = element_text(angle=90, vjust=0.5,size=18),
axis.title.x = element_text(size=18,face="bold"),
axis.text.y = element_text(size=16),
axis.title.y= element_text(size=18,face="bold"),
strip.text.x = element_text(size=16))+
ggtitle("Adults")+
theme(legend.position="none", plot.title = element_text(hjust = 0.5,size=18))
p2
MDS solution fit with each dimension.
#### Plot MDS - Kids: Sorting Different Individuals ####
cur_dist <- filter(avg_dist,sort=="Sort2"&age_group=="kids")$dist_obj[[1]]
sort2_kids_cmd <- data.frame(cmdscale(cur_dist, k=2))
ratings2 <- ratings %>% filter(sort=="Sort2") %>% select(image, pos, neg, valence, arousal) %>% rename(positivity=pos, negativity=neg)
sort2k_fit <- envfit(sort2_kids_cmd,ratings2, permutations=1000)
#show fit
sort2k_fit
##
## ***VECTORS
##
## X1 X2 r2 Pr(>r)
## positivity 0.80844 -0.58858 0.8494 0.000999 ***
## negativity -0.98756 0.15727 0.9095 0.000999 ***
## valence 0.92326 -0.38418 0.8585 0.000999 ***
## arousal 0.14836 -0.98893 0.2185 0.161838
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 1000
##
## ***FACTORS:
##
## Centroids:
## X1 X2
## imageF01fear_c 0.0713 0.0339
## imageF04ang_o -0.2218 -0.1318
## imageF07neut_c 0.0987 0.0117
## imageF10disg_o -0.2309 -0.0931
## imageF13exc_o 0.1206 -0.1064
## imageF14surp_c 0.0201 0.1306
## imageF15hap_c 0.1519 -0.0893
## imageF17calm_o 0.1394 -0.1522
## imageF22sad_c -0.0218 0.1425
## imageM02ang_c -0.1828 0.0053
## imageM03calm_c 0.1220 0.0097
## imageM04disg_c -0.2117 -0.0936
## imageM05exc_c 0.1605 -0.0644
## imageM08fear_o -0.0106 0.1773
## imageM12hap_o 0.0866 -0.0847
## imageM14neut_o -0.0153 0.1501
## imageM15surp_o 0.0710 0.0840
## imageM17sad_o -0.1474 0.0703
##
## Goodness of fit:
## r2 Pr(>r)
## image 1 1
## Permutation: free
## Number of permutations: 1000
#include correlations at correct angles
data.scores = as.data.frame(scores(sort2_kids_cmd))
en_coord_cont = as.data.frame(scores(sort2k_fit, "vectors")) * ordiArrowMul(sort2k_fit)
#create image paths
cur_cluster <- hclust(cur_dist)
image_paths <- paste(here("experiment","stimuli_sort2"),"/",labels(cur_cluster),".png",sep="")
cur_images <- data.frame(
label=labels(cur_cluster),
image=image_paths,
x=seq(1,length(image_paths)),
y=rep(c(-0.05,-0.1),length(image_paths)/2))
sort2_images <- cur_images
cur_cmd <- data.frame(cmdscale(cur_dist))
cur_cmd$label <- rownames(cur_cmd)
cur_cmd <- merge(cur_cmd,sort2_images)
#Plot MDS with rating vectors
p3 <- ggplot(data=cur_cmd,aes(X1,X2))+
geom_image(data=cur_cmd, aes(image=image), size=0.08)+
ylab("Dimension 2")+
xlab("Dimension 1")+
geom_segment(data = en_coord_cont,aes(x = 0, y = 0, xend = X1/7, yend = X2/7, color=row.names(en_coord_cont)),
size =1, alpha = 0.8, arrow = arrow(length = unit(0.03, "npc"))) + #ends="both" for arrow
geom_text(data = en_coord_cont, aes(x = X1/7, y = (X2/7) - .01), colour = "black",
fontface = "bold", label = row.names(en_coord_cont))+
theme(axis.text.x = element_text(angle=90, vjust=0.5,size=18),
axis.title.x = element_text(size=18,face="bold"),
axis.text.y = element_text(size=16),
axis.title.y= element_text(size=18,face="bold"),
strip.text.x = element_text(size=16))+
ggtitle("Children")+
theme(legend.position="none", plot.title = element_text(hjust = 0.5,size=18))
p3
#### Plot MDS - Adults: Sorting Different Individuals ####
cur_dist <- filter(avg_dist,sort=="Sort2"&age_group=="adults")$dist_obj[[1]]
sort2_adults_cmd <- data.frame(cmdscale(cur_dist, k=2))
sort2a_fit <- envfit(sort2_adults_cmd,ratings2, permutations=1000)
sort2a_fit
##
## ***VECTORS
##
## X1 X2 r2 Pr(>r)
## positivity -0.99702 -0.07717 0.9225 0.000999 ***
## negativity 0.93410 -0.35700 0.9596 0.000999 ***
## valence -0.99097 0.13408 0.9778 0.000999 ***
## arousal -0.81869 -0.57424 0.1015 0.456543
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 1000
##
## ***FACTORS:
##
## Centroids:
## X1 X2
## imageF01fear_c 0.0125 0.1046
## imageF04ang_o 0.3042 -0.1683
## imageF07neut_c -0.0320 0.0137
## imageF10disg_o 0.3144 -0.1477
## imageF13exc_o -0.3654 -0.0526
## imageF14surp_c 0.1288 0.2532
## imageF15hap_c -0.3221 -0.0552
## imageF17calm_o -0.3218 -0.0281
## imageF22sad_c 0.1323 -0.0748
## imageM02ang_c 0.2239 -0.1905
## imageM03calm_c -0.1172 0.0268
## imageM04disg_c 0.2723 -0.1567
## imageM05exc_c -0.3494 -0.0688
## imageM08fear_o 0.1505 0.2091
## imageM12hap_o -0.3304 -0.0777
## imageM14neut_o 0.0543 0.1657
## imageM15surp_o 0.1092 0.2740
## imageM17sad_o 0.1359 -0.0267
##
## Goodness of fit:
## r2 Pr(>r)
## image 1 1
## Permutation: free
## Number of permutations: 1000
#include correlations at correct angles
data.scores = as.data.frame(scores(sort2_adults_cmd))
en_coord_cont = as.data.frame(scores(sort2a_fit, "vectors")) * ordiArrowMul(sort2a_fit)
# prepare images
cur_cmd <- data.frame(cmdscale(cur_dist))
cur_cmd$label <- rownames(cur_cmd)
cur_cmd <- merge(cur_cmd,sort2_images)
#Plot MDS with rating vectors
p4 <- ggplot(data=cur_cmd,aes(X1,X2))+
geom_image(data=cur_cmd, aes(image=image), size=0.08)+
ylab("Dimension 2")+
xlab("Dimension 1")+
geom_segment(data = en_coord_cont,aes(x = 0, y = 0, xend = X1/4, yend = X2/4, color=row.names(en_coord_cont)),
size =1, alpha = 0.8, arrow = arrow(length = unit(0.03, "npc"))) + #ends="both" for arrow
geom_text(data = en_coord_cont, aes(x = X1/4, y = (X2/4) - .02), colour = "black",
fontface = "bold", label = row.names(en_coord_cont))+
theme(axis.text.x = element_text(angle=90, vjust=0.5,size=18),
axis.title.x = element_text(size=18,face="bold"),
axis.text.y = element_text(size=16),
axis.title.y= element_text(size=18,face="bold"),
strip.text.x = element_text(size=16))+
ggtitle("Adults")+
theme(legend.position="none", plot.title = element_text(hjust = 0.5,size=18))
p4
Second, we used hierarchical clustering to examine age-related changes in how participants organized emotion cues (Ward’s method; Ward, 1963). Clustering was performed on distance matrices calculated for each age group in each sorting condition using the pairwise distances between all sorted images.
First, we define some helpful functions.
clean_labels_sort <- function(cluster_object) {
#clean up labels
cluster_object[["old_label"]] <- cluster_object[["labels"]]
cluster_object[["labels"]] <- cluster_object[["labels"]] %>%
str_replace_all(c("M07"="","F01"="","F04"="","F07"="","F10"="", "F13"="",
"F14"="","F15"="", "F17"="","F22"="","M02"="","M04"="",
"M03"="","M05"="","M08"="","M12"="","M14"="","M15"="",
"M17"="","_o"=" 1", "_c"=" 2", "ang"="angry","calm"="calm","disg"="disgust",
"exc"="excited","fear"="fear", "hap"="happy",
"neut"="neutral","sad"="sad", "surp"="surprise"))
cluster_object
}
clean_cluster <- function(d,cur_method="ward.D2",clean_labels=TRUE) {
cur_cluster <- d %>%
hclust(method = cur_method)
if (clean_labels) {
cur_cluster <- clean_labels_sort(cur_cluster)
}
cur_cluster
}
cut_cluster <- function(clst,cluster_num=3,add_ratings=TRUE) {
#create labels data frame to retain old label names (useful for joining)
labels=data.frame(old_label=pluck(clst,"old_label"),label=pluck(clst,"labels"))
clst %>%
cutree(cluster_num) %>%
as.data.frame() %>%
tibble::rownames_to_column("label") %>%
left_join(labels) %>%
rename(cluster3 = ".") %>%
dplyr::left_join(ratings_clean, by = c("old_label")) %>%
mutate(cluster3 = as.factor(cluster3))
}
Then we create the main clustering objects based on the average distances between items within each sorting and age group.
#read in data (root_path)
subj_dist_long <- read.csv(here(root_path,"analysis","paper_2020","processed_data","Grid_subject_distance_item_pairs.csv"))
ratings <- read.csv(here(root_path,"analysis","paper_2020","processed_data","ratings_data.csv")) %>%
select(image, sort, pos, neg, valence, arousal) %>%
rename(label = image)
#clean ratings names
ratings_clean <- ratings %>%
mutate(clean_label = str_replace_all(label,c("F01"="","F04"="","F07"="","F10"="", "F13"="", "F14"="","F15"="", "F17"="","F22"="","M02"="","M04"="",
"M03"="","M07"="", "M05"="","M08"="","M12"="","M14"="","M15"="",
"M17"="","_o"=" 1", "_c"=" 2", "ang"="angry", "calm"="calm",
"disg"="disgust", "exc"="excited", "fear"="fear",
"hap"="happy", "neut"="neutral", "sad"="sad",
"surp"="surprise"))) %>%
rename(old_label=label)
### Group data by age groups and age bins
#average across all distances
avg_dist_long_byGroup <- subj_dist_long %>%
group_by(sort,age_group,item1,item2) %>%
summarize(avg_dist=mean(dist)) %>%
ungroup() %>%
mutate(sort=as.character(sort),age_group=as.character(age_group),item1=as.character(item1),item2=as.character(item2))
#average distance object organized by sorting group
avg_dist_byGroup <- avg_dist_long_byGroup %>%
group_by(sort,age_group) %>%
nest() %>%
mutate(dist_obj = purrr::map(data, long_to_dist))
#average across all distances
avg_dist_long_byBin <- subj_dist_long %>%
group_by(sort,age_bin,item1,item2) %>%
summarize(avg_dist=mean(dist)) %>%
ungroup() %>%
mutate(sort=as.character(sort),age_bin=as.character(age_bin),item1=as.character(item1),item2=as.character(item2))
#average distance object organized by sorting group
avg_dist_byBin <- avg_dist_long_byBin %>%
group_by(sort,age_bin) %>%
nest() %>%
mutate(dist_obj = purrr::map(data, long_to_dist))
#### create overall grouped cluster objects ####
clusters_by_group <- avg_dist_byGroup %>%
mutate(cluster=lapply(dist_obj, function(d) clean_cluster(d))) %>%
mutate(dend = lapply(cluster, function(clst) clst %>% as.dendrogram())) %>%
#cut cluster into 3 groups
mutate(cut_cluster=lapply(cluster, function(clst) cut_cluster(clst,cluster_num=3))) %>%
#fit model predicting valence from clusters
mutate(model_valence = lapply(cut_cluster, function(df) if (sort=="Practice") {NA} else {lm(valence~cluster3, data=df)}))
clusters_by_bin <- avg_dist_byBin %>%
mutate(cluster=lapply(dist_obj, function(d) clean_cluster(d))) %>%
mutate(dend = lapply(cluster, function(clst) clst %>% as.dendrogram())) %>%
#cut cluster into 3 groups
mutate(cut_cluster=lapply(cluster, function(clst) cut_cluster(clst,cluster_num=3))) %>%
#fit model predicting valence from clusters
mutate(model_valence = lapply(cut_cluster, function(df) if (sort=="Practice") {NA} else {lm(valence~cluster3, data=df)}))
Hierarchical clustering results and similarity across age groups is visualized using dendograms and correlation matrices between dendograms.
s1_3_dend <- clusters_by_bin %>% filter(age_bin=="3 to 4",sort=="Sort1") %>% pull(dend) %>% pluck(1)
s1_3_dend %>% set("branches_lwd", 2) %>% set("branches_k_color",value=c(1,1,1), k=3) %>% plot(main = "3-year-olds")
s1_4_dend <- clusters_by_bin %>% filter(age_bin=="4 to 5",sort=="Sort1") %>% pull(dend) %>% pluck(1)
s1_4_dend %>% set("branches_lwd", 2) %>% set("branches_k_color", value=c(2,3,4), k=3) %>% plot(main = "4-year-olds")
s1_5_dend <- clusters_by_bin %>% filter(age_bin=="5 to 6",sort=="Sort1") %>% pull(dend) %>% pluck(1)
s1_5_dend %>% set("branches_lwd", 2) %>% set("branches_k_color",value=c(2,3,4), k=3) %>% plot(main = "5-year-olds")
s1_6_dend <- clusters_by_bin %>% filter(age_bin=="6 to 7",sort=="Sort1") %>% pull(dend) %>% pluck(1)
s1_6_dend %>% set("branches_lwd", 2) %>% set("branches_k_color",value=c(2,4,3), k=3) %>% plot(main = "6-year-olds")
s1_a_dend <- clusters_by_bin %>% filter(age_bin=="adults",sort=="Sort1") %>% pull(dend) %>% pluck(1)
s1_a_dend %>% set("branches_lwd", 2) %>% set("branches_k_color",value=c(4,3,2), k=3) %>% plot(main = "Adults")
s2_3_dend <- clusters_by_bin %>% filter(age_bin=="3 to 4",sort=="Sort2") %>% pull(dend) %>% pluck(1)
s2_3_dend %>% set("branches_lwd", 2) %>% set("branches_k_color",value=c(1,1,1), k=3) %>% plot(main = "3-year-olds")
s2_4_dend <- clusters_by_bin %>% filter(age_bin=="4 to 5",sort=="Sort2") %>% pull(dend) %>% pluck(1)
s2_4_dend %>% set("branches_lwd", 2) %>% set("branches_k_color", value=c(2,3,4), k=3) %>% plot(main = "4-year-olds")
s2_5_dend <- clusters_by_bin %>% filter(age_bin=="5 to 6",sort=="Sort2") %>% pull(dend) %>% pluck(1)
s2_5_dend %>% set("branches_lwd", 2) %>% set("branches_k_color",value=c(2,3,4), k=3) %>% plot(main = "5-year-olds")
s2_6_dend <- clusters_by_bin %>% filter(age_bin=="6 to 7",sort=="Sort2") %>% pull(dend) %>% pluck(1)
s2_6_dend %>% set("branches_lwd", 2) %>% set("branches_k_color",value=c(2,4,3), k=3) %>% plot(main = "6-year-olds")
s2_a_dend <- clusters_by_bin %>% filter(age_bin=="adults",sort=="Sort2") %>% pull(dend) %>% pluck(1)
s2_a_dend %>% set("branches_lwd", 2) %>% set("branches_k_color",value=c(4,2,3), k=3) %>% plot(main = "Adults")
#extract practice dendrograms
p3_dend <- clusters_by_bin %>% filter(age_bin=="3 to 4",sort=="Practice") %>% pull(dend) %>% pluck(1)
p4_dend <- clusters_by_bin %>% filter(age_bin=="4 to 5",sort=="Practice") %>% pull(dend) %>% pluck(1)
p5_dend <- clusters_by_bin %>% filter(age_bin=="5 to 6",sort=="Practice") %>% pull(dend) %>% pluck(1)
p6_dend <- clusters_by_bin %>% filter(age_bin=="6 to 7",sort=="Practice") %>% pull(dend) %>% pluck(1)
pa_dend <- clusters_by_bin %>% filter(age_bin=="adults",sort=="Practice") %>% pull(dend) %>% pluck(1)
prac_compare <- dendlist("3-year-olds" = p3_dend, "4-year-olds" = p4_dend, "5-year-olds" = p5_dend, "6-year-olds" = p6_dend, "Adults" = pa_dend)
#Correlation
ggcorrplot(cor.dendlist(prac_compare), type = "lower", lab=TRUE, outline.color="black", ggtheme = ggplot2::theme_void())
similarity_to_adults_p <- data.frame(
sort=rep("Practice",4),
age_bin = c("3 to 4","4 to 5","5 to 6","6 to 7"),
fm_to_adults=c(
Bk(pa_dend, p3_dend, k = 3)$`3`[1],
Bk(pa_dend, p4_dend, k = 3)$`3`[1],
Bk(pa_dend, p5_dend, k = 3)$`3`[1],
Bk(pa_dend, p6_dend, k = 3)$`3`[1]
),
ar_to_adults = c(
adj.rand.index(cutree(pa_dend, k=3),cutree(p3_dend, k=3)),
adj.rand.index(cutree(pa_dend, k=3),cutree(p4_dend, k=3)),
adj.rand.index(cutree(pa_dend, k=3),cutree(p5_dend, k=3)),
adj.rand.index(cutree(pa_dend, k=3),cutree(p6_dend, k=3))
)
)
similarity_to_adults_p %>%
select(age_bin,fm_to_adults,ar_to_adults)%>%
kable(
caption = "Comparing children's hierarchical clustering solutions to adults' during Practice (k=3)",
col.names=c("Age Group","Fowlkes-Mallows Index","Adjusted Rand Index"),
digits=c(0,2,2)
)
| Age Group | Fowlkes-Mallows Index | Adjusted Rand Index |
|---|---|---|
| 3 to 4 | 0.41 | 0.21 |
| 4 to 5 | 1.00 | 1.00 |
| 5 to 6 | 1.00 | 1.00 |
| 6 to 7 | 1.00 | 1.00 |
#Correlation
sort1_compare <- dendlist("3-year-olds" = s1_3_dend, "4-year-olds" = s1_4_dend, "5-year-olds" = s1_5_dend, "6-year-olds" = s1_6_dend, "Adults" = s1_a_dend)
ggcorrplot(cor.dendlist(sort1_compare), type = "lower", lab=TRUE, outline.color="black", ggtheme = ggplot2::theme_void())
similarity_to_adults_s1 <- data.frame(
sort=rep("Sort1",4),
age_bin = c("3 to 4","4 to 5","5 to 6","6 to 7"),
fm_to_adults=c(
Bk(s1_a_dend, s1_3_dend, k = 3)$`3`[1],
Bk(s1_a_dend, s1_4_dend, k = 3)$`3`[1],
Bk(s1_a_dend, s1_5_dend, k = 3)$`3`[1],
Bk(s1_a_dend, s1_6_dend, k = 3)$`3`[1]
),
ar_to_adults = c(
adj.rand.index(cutree(s1_a_dend, k=3),cutree(s1_3_dend, k=3)),
adj.rand.index(cutree(s1_a_dend, k=3),cutree(s1_4_dend, k=3)),
adj.rand.index(cutree(s1_a_dend, k=3),cutree(s1_5_dend, k=3)),
adj.rand.index(cutree(s1_a_dend, k=3),cutree(s1_6_dend, k=3))
)
)
similarity_to_adults_s1 %>%
select(age_bin,fm_to_adults,ar_to_adults)%>%
kable(
caption = "Comparing children's hierarchical clustering solutions to adults' during the Same Individual Sort (k=3)",
col.names=c("Age Group","Fowlkes-Mallows Index","Adjusted Rand Index"),
digits=c(0,2,2)
)
| Age Group | Fowlkes-Mallows Index | Adjusted Rand Index |
|---|---|---|
| 3 to 4 | 0.32 | 0.02 |
| 4 to 5 | 0.40 | 0.14 |
| 5 to 6 | 0.67 | 0.49 |
| 6 to 7 | 0.88 | 0.83 |
#Correlation
sort2_compare <- dendlist("3-year-olds" = s2_3_dend, "4-year-olds" = s2_4_dend, "5-year-olds" = s2_5_dend, "6-year-olds" = s2_6_dend, "Adults" = s2_a_dend)
ggcorrplot(cor.dendlist(sort2_compare), type = "lower", lab=TRUE, outline.color="black", ggtheme = ggplot2::theme_void())
similarity_to_adults_s2 <- data.frame(
sort=rep("Sort2",4),
age_bin = c("3 to 4","4 to 5","5 to 6","6 to 7"),
fm_to_adults=c(
Bk(s2_a_dend, s2_3_dend, k = 3)$`3`[1],
Bk(s2_a_dend, s2_4_dend, k = 3)$`3`[1],
Bk(s2_a_dend, s2_5_dend, k = 3)$`3`[1],
Bk(s2_a_dend, s2_6_dend, k = 3)$`3`[1]
),
ar_to_adults = c(
adj.rand.index(cutree(s2_a_dend, k=3),cutree(s2_3_dend, k=3)),
adj.rand.index(cutree(s2_a_dend, k=3),cutree(s2_4_dend, k=3)),
adj.rand.index(cutree(s2_a_dend, k=3),cutree(s2_5_dend, k=3)),
adj.rand.index(cutree(s2_a_dend, k=3),cutree(s2_6_dend, k=3))
)
)
similarity_to_adults_s2 %>%
select(age_bin,fm_to_adults,ar_to_adults)%>%
kable(
caption = "Comparing children's hierarchical clustering solutions to adults' during the Different Individuals Sort (k=3)",
col.names=c("Age Group","Fowlkes-Mallows Index","Adjusted Rand Index"),
digits=c(0,2,2)
)
| Age Group | Fowlkes-Mallows Index | Adjusted Rand Index |
|---|---|---|
| 3 to 4 | 0.44 | 0.16 |
| 4 to 5 | 0.42 | 0.14 |
| 5 to 6 | 0.65 | 0.49 |
| 6 to 7 | 0.58 | 0.38 |
The clusters were strong predictors of valence ratings across age group (providing convergent evidence that valence was main dimension organizing participants’ sorting across ages).
Children
#children
clusters_by_group %>% filter(sort=="Sort1"&age_group=="kids") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 34.452 1 118.579 1.611e-08 ***
## cluster3 42.917 2 73.858 1.718e-08 ***
## Residuals 4.358 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Adults
#adults
clusters_by_group %>% filter(sort=="Sort1"&age_group=="adults") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 26.658 1 66.950 6.517e-07 ***
## cluster3 41.303 2 51.865 1.826e-07 ***
## Residuals 5.973 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Children
#children
clusters_by_group %>% filter(sort=="Sort2"&age_group=="kids") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 207.321 1 355.89 7.344e-12 ***
## cluster3 30.328 2 26.03 1.325e-05 ***
## Residuals 8.738 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Adults
#adults
clusters_by_group %>% filter(sort=="Sort2"&age_group=="adults") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 104.360 1 369.785 5.569e-12 ***
## cluster3 34.833 2 61.712 5.776e-08 ***
## Residuals 4.233 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3-year-olds
#children
## 3 to 4
clusters_by_bin %>% filter(sort=="Sort1"&age_bin=="3 to 4") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 142.507 1 54.1133 2.382e-06 ***
## cluster3 7.773 2 1.4758 0.26
## Residuals 39.502 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
4-year-olds
## 4 to 5
clusters_by_bin %>% filter(sort=="Sort1"&age_bin=="4 to 5") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 16.874 1 8.5527 0.01046 *
## cluster3 17.680 2 4.4806 0.02981 *
## Residuals 29.595 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
5-year-olds
## 5 to 6
clusters_by_bin %>% filter(sort=="Sort1"&age_bin=="5 to 6") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 34.452 1 44.143 7.821e-06 ***
## cluster3 35.568 2 22.787 2.842e-05 ***
## Residuals 11.707 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
6-year-olds
## 6 to 7
clusters_by_bin %>% filter(sort=="Sort1"&age_bin=="6 to 7") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 26.658 1 82.810 1.702e-07 ***
## cluster3 42.447 2 65.928 3.707e-08 ***
## Residuals 4.829 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adults
# adults
clusters_by_bin %>% filter(sort=="Sort1"&age_bin=="adults") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 26.658 1 66.950 6.517e-07 ***
## cluster3 41.303 2 51.865 1.826e-07 ***
## Residuals 5.973 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3-year-olds
#children
## 3-year-olds
clusters_by_bin %>% filter(sort=="Sort2"&age_bin=="3 to 4") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 151.926 1 78.8265 2.333e-07 ***
## cluster3 10.156 2 2.6346 0.1046
## Residuals 28.910 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
4-year-olds
## 4-year-olds
clusters_by_bin %>% filter(sort=="Sort2"&age_bin=="4 to 5") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 67.337 1 51.6210 3.151e-06 ***
## cluster3 19.499 2 7.4742 0.005596 **
## Residuals 19.567 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
5-year-olds
## 5-year-olds
clusters_by_bin %>% filter(sort=="Sort2"&age_bin=="5 to 6") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 71.727 1 98.745 5.430e-08 ***
## cluster3 28.170 2 19.391 6.933e-05 ***
## Residuals 10.896 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
6-year-olds
## 6-year-olds
clusters_by_bin %>% filter(sort=="Sort2"&age_bin=="6 to 7") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 207.321 1 355.89 7.344e-12 ***
## cluster3 30.328 2 26.03 1.325e-05 ***
## Residuals 8.738 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adults
# adults
clusters_by_bin %>% filter(sort=="Sort2"&age_bin=="adults") %>% pull(model_valence) %>% pluck(1) %>% Anova(type="III")
## Anova Table (Type III tests)
##
## Response: valence
## Sum Sq Df F value Pr(>F)
## (Intercept) 104.360 1 369.785 5.569e-12 ***
## cluster3 34.833 2 61.712 5.776e-08 ***
## Residuals 4.233 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] fossil_0.4.0 shapefiles_0.7 foreign_0.8-81 maps_3.4.0
## [5] sp_1.4-5 vegan_2.5-7 lattice_0.20-44 permute_0.9-5
## [9] factoextra_1.0.7 cluster_2.1.2 ggcorrplot_0.1.3 corrplot_0.90
## [13] dendextend_1.15.1 dendroextras_0.2.3 ggimage_0.2.9 ggdendro_0.1.22
## [17] harrietr_0.2.3 lmSupport_2.9.13 lmerTest_3.1-3 car_3.0-11
## [21] carData_3.0-4 lme4_1.1-27.1 Matrix_1.3-4 cowplot_1.1.1
## [25] here_1.0.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
## [29] purrr_0.3.4 readr_2.0.1 tidyr_1.1.3 tibble_3.1.4
## [33] ggplot2_3.3.5 tidyverse_1.3.1 knitr_1.34
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 VGAM_1.1-5
## [4] plyr_1.8.6 lazyeval_0.2.2 splines_4.1.1
## [7] unmarked_1.1.1 digest_0.6.28 yulab.utils_0.0.4
## [10] htmltools_0.5.2 viridis_0.6.1 magick_2.7.3
## [13] fansi_0.5.0 magrittr_2.0.1 AICcmodavg_2.3-1
## [16] tzdb_0.1.2 openxlsx_4.2.4 modelr_0.1.8
## [19] colorspace_2.0-2 rvest_1.0.1 ggrepel_0.9.1
## [22] haven_2.4.3 xfun_0.26 crayon_1.4.1
## [25] jsonlite_1.7.2 survival_3.2-11 ape_5.5
## [28] glue_1.4.2 gtable_0.3.0 abind_1.4-5
## [31] scales_1.1.1 DBI_1.1.1 Rcpp_1.0.7
## [34] viridisLite_0.4.0 xtable_1.8-4 tmvnsim_1.0-2
## [37] gridGraphics_0.5-1 tidytree_0.3.6 stats4_4.1.1
## [40] httr_1.4.2 gplots_3.1.1 ellipsis_0.3.2
## [43] farver_2.1.0 pkgconfig_2.0.3 sass_0.4.0
## [46] dbplyr_2.1.1 utf8_1.2.2 reshape2_1.4.4
## [49] labeling_0.4.2 ggplotify_0.1.0 tidyselect_1.1.1
## [52] rlang_0.4.11 munsell_0.5.0 cellranger_1.1.0
## [55] tools_4.1.1 cli_3.0.1 generics_0.1.0
## [58] broom_0.7.9 evaluate_0.14 fastmap_1.1.0
## [61] yaml_2.2.1 ggtree_3.2.0 fs_1.5.0
## [64] zip_2.2.0 caTools_1.18.2 nlme_3.1-152
## [67] aplot_0.1.1 xml2_1.3.2 compiler_4.1.1
## [70] pbkrtest_0.5.1 rstudioapi_0.13 curl_4.3.2
## [73] reprex_2.0.1 treeio_1.18.1 bslib_0.3.0
## [76] stringi_1.7.4 highr_0.9 psych_2.1.9
## [79] nloptr_1.2.2.2 vctrs_0.3.8 pillar_1.6.2
## [82] lifecycle_1.0.0 pwr_1.3-0 jquerylib_0.1.4
## [85] data.table_1.14.0 bitops_1.0-7 raster_3.4-13
## [88] patchwork_1.1.1 R6_2.5.1 KernSmooth_2.23-20
## [91] gridExtra_2.3 rio_0.5.27 codetools_0.2-18
## [94] boot_1.3-28 MASS_7.3-54 gtools_3.9.2
## [97] assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.2
## [100] mnormt_2.0.2 mgcv_1.8-36 parallel_4.1.1
## [103] hms_1.1.0 grid_4.1.1 ggfun_0.0.4
## [106] minqa_1.2.4 rmarkdown_2.11 numDeriv_2016.8-1.1
## [109] gvlma_1.0.0.3 lubridate_1.7.10