Main analysis questions
Are children’s drawings OWN drawings more similar to other drawings they’ve made than to other kids (of the same category)?
Caculate average JSD within each category for each participant with every other participant (including themselves) Then average across categories Do this for shuffled and then unshuffled drawings
jsd_by_sub <- jsd_by_sketch_long %>%
filter(!sketch_1==sketch_2) %>%
mutate(same_sub = (sub_id_sketch_1 == sub_id_sketch_2)) %>%
filter(category_sketch_1 == category_sketch_2) %>%
# for each combination of subs, within categories
group_by(sub_id_sketch_1, sub_id_sketch_2, category_sketch_1, same_sub) %>%
summarize(mean_jsd = mean(jsd), count_drawings_sub_1 = length(unique(sketch_1)), count_drawings_sub_2 = length(unique(sketch_2))) %>%
ungroup() %>%
# don't consider pairings where there were less than 2 drawing per category
filter(count_drawings_sub_1 > 2) %>%
filter(count_drawings_sub_2 > 2) %>%
group_by(sub_id_sketch_1, same_sub) %>%
summarize(mean_jsd = mean(mean_jsd))
## `summarise()` has grouped output by 'sub_id_sketch_1', 'sub_id_sketch_2',
## 'category_sketch_1'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'sub_id_sketch_1'. You can override using
## the `.groups` argument.
t.test(jsd_by_sub$mean_jsd[jsd_by_sub$same_sub==FALSE], jsd_by_sub$mean_jsd[jsd_by_sub$same_sub==TRUE])
##
## Welch Two Sample t-test
##
## data: jsd_by_sub$mean_jsd[jsd_by_sub$same_sub == FALSE] and jsd_by_sub$mean_jsd[jsd_by_sub$same_sub == TRUE]
## t = 3.945, df = 27.039, p-value = 0.0005108
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.0591551 0.1873656
## sample estimates:
## mean of x mean of y
## 0.5713785 0.4481182
Now do the same thing with shuffled drawings
jsd_by_sub_shuffled <- jsd_by_sketch_long_shuffled %>%
filter(!sketch_1==sketch_2) %>% # only between on-identifical sketches
mutate(same_sub = (sub_id_sketch_1 == sub_id_sketch_2)) %>%
filter(category_sketch_1 == category_sketch_2) %>% # make sure we're looking within category
# for each combination of subs, within categories
group_by(sub_id_sketch_1, sub_id_sketch_2, category_sketch_1, same_sub) %>%
summarize(mean_jsd = mean(jsd), count_drawings_sub_1 = length(unique(sketch_1)), count_drawings_sub_2 = length(unique(sketch_2))) %>%
ungroup() %>%
# don't consider pairings where there was only 1 drawing per category
filter(count_drawings_sub_1 > 2) %>%
filter(count_drawings_sub_2 > 2) %>%
group_by(sub_id_sketch_1, same_sub) %>%
summarize(mean_jsd = mean(mean_jsd))
## `summarise()` has grouped output by 'sub_id_sketch_1', 'sub_id_sketch_2',
## 'category_sketch_1'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'sub_id_sketch_1'. You can override using
## the `.groups` argument.
t.test(jsd_by_sub_shuffled$mean_jsd[jsd_by_sub_shuffled$same_sub==FALSE], jsd_by_sub_shuffled$mean_jsd[jsd_by_sub_shuffled$same_sub==TRUE])
##
## Welch Two Sample t-test
##
## data: jsd_by_sub_shuffled$mean_jsd[jsd_by_sub_shuffled$same_sub == FALSE] and jsd_by_sub_shuffled$mean_jsd[jsd_by_sub_shuffled$same_sub == TRUE]
## t = 0.92884, df = 33.94, p-value = 0.3595
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01848466 0.04960184
## sample estimates:
## mean of x mean of y
## 0.5610821 0.5455235
Make data structure to test this within a linear regression to be sure it’s significant.
full_data_jsd <- jsd_by_sub %>%
mutate(condition = 'unshuffled') %>%
full_join(jsd_by_sub_shuffled %>% mutate(condition='shuffled'))
## Joining with `by = join_by(sub_id_sketch_1, same_sub, mean_jsd, condition)`
Pans out in an interaction term.
summary(lm(data = full_data_jsd, mean_jsd ~ condition*same_sub ))
##
## Call:
## lm(formula = mean_jsd ~ condition * same_sub, data = full_data_jsd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44812 -0.02724 0.00704 0.03580 0.23069
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.56108 0.01773 31.653 < 2e-16 ***
## conditionunshuffled 0.01030 0.02507 0.411 0.68214
## same_subTRUE -0.01556 0.02507 -0.621 0.53624
## conditionunshuffled:same_subTRUE -0.10770 0.03545 -3.038 0.00304 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09038 on 100 degrees of freedom
## Multiple R-squared: 0.2343, Adjusted R-squared: 0.2113
## F-statistic: 10.2 on 3 and 100 DF, p-value: 6.402e-06
ggplot(data = full_data_jsd, aes(x=same_sub, y=mean_jsd, col=same_sub)) +
geom_point() +
# geom_smooth(method='lm') +
geom_line(aes(group=sub_id_sketch_1), color='grey') +
facet_wrap(~condition)
Examine JSD y category and age
Here, each dot is going to be an individual sketch of a category, with the y-axis is it’s similarity to other drawings made either by other children (of that same category). Each drawing it tagged with the age of the drawer at the time.
xd <- jsd_by_sketch_long %>%
filter(!sketch_1==sketch_2) %>%
mutate(same_sub = (sub_id_sketch_1 == sub_id_sketch_2)) %>%
filter(category_sketch_1 == category_sketch_2) %>%
# for each combination of subs, within categories
group_by(sub_id_sketch_1, sub_id_sketch_2, same_sub, category_sketch_1, years_old_sketch_1) %>%
summarize(mean_jsd = mean(jsd), count_drawings_sub_1 = length(unique(sketch_1)), count_drawings_sub_2 = length(unique(sketch_2)), years_old_sketch_1 = years_old_sketch_1[1]) %>%
ungroup() %>%
filter(same_sub==FALSE) %>% # only look between subjects here because 1 drawings per age per child by definiton
group_by(sub_id_sketch_1, category_sketch_1, years_old_sketch_1) %>%
summarize(mean_jsd = mean(mean_jsd))
## `summarise()` has grouped output by 'sub_id_sketch_1', 'sub_id_sketch_2',
## 'same_sub', 'category_sketch_1'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'sub_id_sketch_1', 'category_sketch_1'. You
## can override using the `.groups` argument.
ggplot(data=xd, aes(x=years_old_sketch_1, y=mean_jsd, col=category_sketch_1)) +
geom_point(alpha=.2) +
geom_smooth(method='lm') +
ylab('Dissimialarity to other childrens drawings(JSD)') +
facet_wrap(~category_sketch_1)
## `geom_smooth()` using formula = 'y ~ x'