Load packages

library(png)
library(grid)
library(ggplot2)
library(xtable)
require(here)
## Loading required package: here
## here() starts at /Users/gkacherg/Documents/GitHub/curiobaby_drop
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
require(tidyboot)
## Loading required package: tidyboot
library(apa) # for generating APA-style text to report statistical tests
library(ggpubr)
require(gridExtra)
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
require(papaja)
## Loading required package: papaja
require(kableExtra)
## Loading required package: kableExtra
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
#require(qualtRics)
require(lmerTest)
## Loading required package: lmerTest
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
require(ggpubr)
require(DT)
## Loading required package: DT

Load Data

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   SC0 = col_double(),
##   `Random ID` = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   SC0 = col_double(),
##   `Random ID` = col_double()
## )
## ℹ Use `spec()` for the full column specifications.

Clean data

# ToDo: count exclusions
int_dat <- int_dat %>% filter(is.na(Exclusion))

lik_dat <- lik_dat %>% filter(is.na(Exclusion))

Pivot

int_long <- int_dat %>% relocate(subjID) %>% # SC0 = score of catch trials
  pivot_longer(cols=4:ncol(int_dat), names_to="trial", values_to = "rating") %>%
  separate(col=trial, sep='-', into=c("drop","choice")) %>% select(-Exclusion) %>%
  rename(catch_trials_correct=`SC0`)

lik_long <- lik_dat %>% relocate(subjID) %>%
  pivot_longer(cols=4:ncol(int_dat), names_to="trial", values_to = "rating") %>%
  separate(col=trial, sep='-', into=c("drop","choice")) %>% select(-Exclusion) %>%
  filter(!is.na(rating))

Data inspection

table(int_long$subjID)
## 
## 10535 14210 14916 16357 18172 18653 19068 21976 23362 28686 29068 38392 39396 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 40378 41047 41873 42169 42613 46062 49414 51547 57404 57532 58741 66869 67369 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 69110 78787 80426 80848 82004 82157 85052 85841 89419 89470 89891 90003 92691 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 93644 94684 95478 97625 98736 98907 
##    39    39    39    39    39    39
table(lik_long$subjID)
## 
## 10257 17530 18126 18686 21633 21825 28819 29951 33335 33980 35093 36148 36912 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 37316 40587 42009 43570 43937 45321 48185 51140 52067 55451 58257 60280 63615 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 64055 66351 66893 68015 68530 71180 72312 73067 75615 77937 79509 79546 79990 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 80550 80913 82938 83291 89350 92635 93635 94704 98753 99765 
##    39    39    39    39    39    39    39    39    39    39
sum(is.na(int_long$rating))
## [1] 0
sum(is.na(lik_long$rating))
## [1] 0

Summarize Choice Data

#combos = adult_long %>% distinct(drop, choice, target)
distractors = c("trig prism", "trig prism", "octahedron", "sphere", "octahedron", "pyramid", "pyramid", "sphere", "sphere", "pyramid", "octahedron", "dumbbell", "dumbbell", "sphere", "pentagon", "cone", "octahedron", "dumbbell", "dumbbell", "cone")

adult_agg <- adult_long %>% group_by(drop, target, relation) %>%
  summarize(chose_target = mean(chose_target))
## `summarise()` has grouped output by 'drop', 'target'. You can override using the `.groups` argument.
adult_agg$distractor = distractors
  # do we want raw counts for chose target vs. foil?
# for trial 1, 70% chose target (30% chose distractor)

child_agg <- child_long %>% group_by(drop, target, relation) %>%
  summarize(chose_target = mean(chose_target, na.rm=T)) 
## `summarise()` has grouped output by 'drop', 'target'. You can override using the `.groups` argument.
child_agg$distractor = distractors

Summarize Ratings Data

lik_agg <- lik_long %>% mutate(rating = as.numeric(rating)) %>%
  group_by(drop, choice) %>%
  summarise(likelihood = mean(rating) - 1) # shift from 1-6 to 0-5
## `summarise()` has grouped output by 'drop'. You can override using the `.groups` argument.
int_agg <- int_long %>% mutate(rating = as.numeric(rating)) %>%
  group_by(drop, choice) %>%
  summarise(interest = mean(rating) - 1) 
## `summarise()` has grouped output by 'drop'. You can override using the `.groups` argument.
# also want ratings scaled 0-1?

results_table <- lik_agg %>% left_join(int_agg) %>%
  arrange(desc(likelihood))
## Joining, by = c("drop", "choice")
datatable(results_table) %>% formatRound(columns=c("likelihood","interest"), digits=3)

Merge ratings with adults’ drop choices

adult_agg <- adult_agg %>% left_join(lik_agg, by=c("drop"="drop", "target"="choice")) %>%
  rename(targ_lik = likelihood) %>%
  left_join(lik_agg, by=c("drop"="drop", "distractor"="choice")) %>%
  rename(dist_lik = likelihood)

adult_agg <- adult_agg %>% left_join(int_agg, by=c("drop"="drop", "target"="choice")) %>%
  rename(targ_int = interest) %>%
  left_join(int_agg, by=c("drop"="drop", "distractor"="choice")) %>%
  rename(dist_int = interest)

adult_agg <- adult_agg %>% 
  mutate(prop_targ_lik = targ_lik / (targ_lik + dist_lik),
         prop_targ_int = targ_int / (targ_int + dist_int))

Merge ratings with children’s drop choices

child_agg <- child_agg %>% left_join(lik_agg, by=c("drop"="drop", "target"="choice")) %>%
  rename(targ_lik = likelihood) %>%
  left_join(lik_agg, by=c("drop"="drop", "distractor"="choice")) %>%
  rename(dist_lik = likelihood)

child_agg <- child_agg %>% left_join(int_agg, by=c("drop"="drop", "target"="choice")) %>%
  rename(targ_int = interest) %>%
  left_join(int_agg, by=c("drop"="drop", "distractor"="choice")) %>%
  rename(dist_int = interest)

child_agg <- child_agg %>% 
  mutate(prop_targ_lik = targ_lik / (targ_lik + dist_lik),
         prop_targ_int = targ_int / (targ_int + dist_int))

Adult Choices vs. Ratings Table

adult_agg %>% relocate(relation) %>% 
  relocate(distractor, .after=target) %>% 
  relocate(prop_targ_lik, .after=dist_lik) %>%
  DT::datatable() %>% 
  formatRound(columns=c("chose_target","targ_lik","dist_lik",
                        "targ_int","dist_int","prop_targ_lik",
                        "prop_targ_int"), digits=3)

Compare adult choices to ratings-derived proportions

Regression:

m_lik <- lm(chose_target ~ relation * prop_targ_lik, data=adult_agg)
m_int <- lm(chose_target ~ relation * prop_targ_int, data=adult_agg)
summary(m_lik) # Rsq = .63
## 
## Call:
## lm(formula = chose_target ~ relation * prop_targ_lik, data = adult_agg)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.166929 -0.045984  0.000186  0.060379  0.237002 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     1.0769     0.6936   1.553    0.140
## relationsupport                -1.0416     0.8532  -1.221    0.240
## prop_targ_lik                  -0.2232     0.7653  -0.292    0.774
## relationsupport:prop_targ_lik   0.9138     0.9574   0.954    0.354
## 
## Residual standard error: 0.1072 on 16 degrees of freedom
## Multiple R-squared:  0.6324, Adjusted R-squared:  0.5635 
## F-statistic: 9.177 on 3 and 16 DF,  p-value: 0.0009123
summary(m_int) # Rsq = .84
## 
## Call:
## lm(formula = chose_target ~ relation * prop_targ_int, data = adult_agg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15408 -0.02446  0.01719  0.04478  0.10658 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                     0.5899     0.3035   1.944   0.0697 .
## relationsupport                -0.8069     0.3535  -2.283   0.0365 *
## prop_targ_int                   0.3701     0.3932   0.941   0.3605  
## relationsupport:prop_targ_int   0.9202     0.4791   1.921   0.0728 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07179 on 16 degrees of freedom
## Multiple R-squared:  0.8352, Adjusted R-squared:  0.8043 
## F-statistic: 27.03 on 3 and 16 DF,  p-value: 1.677e-06
anova(m_lik, m_int)
## Analysis of Variance Table
## 
## Model 1: chose_target ~ relation * prop_targ_lik
## Model 2: chose_target ~ relation * prop_targ_int
##   Res.Df      RSS Df Sum of Sq F Pr(>F)
## 1     16 0.183934                      
## 2     16 0.082461  0   0.10147
m3 <- lm(chose_target ~ relation * prop_targ_lik + relation * prop_targ_int, data=adult_agg)
summary(m3)
## 
## Call:
## lm(formula = chose_target ~ relation * prop_targ_lik + relation * 
##     prop_targ_int, data = adult_agg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15457 -0.02199  0.01202  0.04563  0.09992 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     0.8815     0.5184   1.700    0.111
## relationsupport                -0.9836     0.6250  -1.574    0.138
## prop_targ_lik                  -0.3978     0.5597  -0.711    0.489
## prop_targ_int                   0.4592     0.4295   1.069    0.303
## relationsupport:prop_targ_lik   0.2206     0.7198   0.306    0.764
## relationsupport:prop_targ_int   0.8888     0.5366   1.656    0.120
## 
## Residual standard error: 0.075 on 14 degrees of freedom
## Multiple R-squared:  0.8426, Adjusted R-squared:  0.7864 
## F-statistic: 14.99 on 5 and 14 DF,  p-value: 3.41e-05
p1 <- adult_agg %>% ggplot(aes(x=chose_target, y=prop_targ_lik, color=relation)) + geom_point() +
  xlab("Proportion of Adults Choosing Target") +
  ylab("Luce choice of Target vs. Distractor Likelihood") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson") 
  stat_cor(inherit.aes = F, aes(x=chose_target, y=prop_targ_lik, 
               label = paste(..rr.label.., sep = "~`,`~")))

p2 <- adult_agg %>% ggplot(aes(x=chose_target, y=prop_targ_int, color=relation)) + geom_point() +
  xlab("Proportion of Adults Choosing Target") +
  ylab("Luce choice of Target vs. Distractor Interestingness") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson")
  stat_cor(inherit.aes = F, aes(x=chose_target, y=prop_targ_int, 
               label = paste(..rr.label.., sep = "~`,`~")))

ggpubr::ggarrange(p1, p2, nrow=1, common.legend = T)

Compare children’s choices to ratings-derived proportions

p1 <- child_agg %>% ggplot(aes(x=chose_target, y=prop_targ_lik, color=relation)) + geom_point() +
  xlab("Proportion of Children Choosing Target") +
  ylab("Luce choice of Target vs. Distractor Likelihood") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson") 
  stat_cor(inherit.aes = F, aes(x=chose_target, y=prop_targ_lik, 
               label = paste(..rr.label.., sep = "~`,`~")))

p2 <- child_agg %>% ggplot(aes(x=chose_target, y=prop_targ_int, color=relation)) + geom_point() +
  xlab("Proportion of Children Choosing Target") +
  ylab("Luce choice of Target vs. Distractor Interestingness") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson")
  stat_cor(inherit.aes = F, aes(x=chose_target, y=prop_targ_int, 
               label = paste(..rr.label.., sep = "~`,`~")))

ggpubr::ggarrange(p1, p2, nrow=1, common.legend = T)

Compare Likelihood Ratings to Physics Models

load(here("models/model_data.RData")) # mdat, all_trials_mse, all_trials_mse_r, etc.
# load(here("models/support_probability_fit_df.RData")) 

# compare to support probability
supp_prob <- mdat %>% select(drop, target, relation, support_probability) 

trials <- adult_agg %>% select(drop, target, distractor, relation) %>%
  left_join(supp_prob, by=c("drop","target","relation")) %>%
  rename(targ_supp_prob = support_probability) %>%
  left_join(supp_prob, by=c("drop"="drop", "distractor"="target", "relation"="relation")) %>%
  rename(dist_supp_prob = support_probability) %>%
  mutate(prop_target_supp = (targ_supp_prob) / (targ_supp_prob + dist_supp_prob + 1e-5))

p1 <- adult_agg %>% left_join(trials %>% select(drop, target, distractor, relation, prop_target_supp)) %>%
  ggplot(aes(x=prop_target_supp, y=prop_targ_lik, color=relation)) + geom_point(alpha=.8) +
  xlab("Proportion Target Support in Model") +
  ylab("Luce choice of Target vs. Distractor Likelihood") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson") 
  stat_cor(inherit.aes = F, aes(x=prop_target_supp, y=prop_targ_lik, 
               label = paste(..rr.label.., sep = "~`,`~")))
## Joining, by = c("drop", "target", "relation", "distractor")
## compare to support sharpness
supp_tmp <- mdat %>% select(drop, target, relation, support_std) 

trials <- adult_agg %>% select(drop, target, distractor, relation) %>%
  left_join(supp_tmp, by=c("drop","target","relation")) %>%
  rename(targ_supp_std = support_std) %>%
  left_join(supp_tmp, by=c("drop"="drop", "distractor"="target", "relation"="relation")) %>%
  rename(dist_supp_std = support_std) %>%
  mutate(prop_target_supp = (targ_supp_std) / (targ_supp_std + dist_supp_std + 1e-5))

p2 <- adult_agg %>% left_join(trials %>% select(drop, target, distractor, relation, prop_target_supp)) %>%
  ggplot(aes(x=prop_target_supp, y=prop_targ_lik, color=relation)) + geom_point(alpha=.8) +
  xlab("Prop. Target Support SD in Model") +
  ylab("Luce choice of Target vs. Distractor Likelihood") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  stat_cor(inherit.aes = F, aes(x=prop_target_supp, y=prop_targ_lik, 
               label = paste(..rr.label.., sep = "~`,`~")))
## Joining, by = c("drop", "target", "relation", "distractor")
ggpubr::ggarrange(p1, p2, nrow=1, common.legend = T)

Directly compare ratings to model probs

mdat <- mdat %>% left_join(lik_agg, by=c("drop"="drop", "target"="choice"))

p1 <-mdat %>% ggplot(aes(x=likelihood, y=support_probability, color=relation)) +
  geom_point(alpha=.7) + theme_classic() + xlab("Adults' Mean Likelihood Rating") +
  stat_cor(inherit.aes = F, aes(x=likelihood, y=support_probability, 
               label = paste(..rr.label.., sep = "~`,`~")))

p2 <- mdat %>% ggplot(aes(x=likelihood, y=support_std, color=relation)) +
  geom_point(alpha=.7) + theme_classic() + xlab("Adults' Mean Likelihood Rating") +
  stat_cor(inherit.aes = F, aes(x=likelihood, y=support_std, 
               label = paste(..rr.label.., sep = "~`,`~")))

ggpubr::ggarrange(p1, p2, nrow=1, common.legend = T)

Other feature correlations

with(mdat, cor(likelihood, support_probability)) # r=.75
## [1] 0.7492594
with(mdat, cor(likelihood, support_std)) # .77
## [1] 0.7727355
with(mdat, cor(likelihood, support_sharpness_accuracy_mean)) # .496
## [1] 0.4958176
with(mdat, cor(likelihood, support_response_linearity_r)) # .48
## [1] 0.4777165
with(mdat, cor(likelihood, support_response_linearity_pv)) # .53
## [1] 0.5327534