Load packages

library(png)
library(grid)
library(ggplot2)
library(xtable)
require(here)

## Loading required package: here

## here() starts at /Users/gkacherg/Documents/GitHub/curiobaby_drop

require(tidyverse)

## Loading required package: tidyverse

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

require(tidyboot)

## Loading required package: tidyboot

library(apa) # for generating APA-style text to report statistical tests
library(ggpubr)
require(gridExtra)

## Loading required package: gridExtra

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

require(papaja)

## Loading required package: papaja

require(kableExtra)

## Loading required package: kableExtra

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

#require(qualtRics)
require(lmerTest)

## Loading required package: lmerTest

## Loading required package: lme4

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

require(ggpubr)
require(DT)

## Loading required package: DT

Load Data

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   SC0 = col_double(),
##   `Random ID` = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   SC0 = col_double(),
##   `Random ID` = col_double()
## )
## ℹ Use `spec()` for the full column specifications.

Clean data

# ToDo: count exclusions
int_dat <- int_dat %>% filter(is.na(Exclusion))

lik_dat <- lik_dat %>% filter(is.na(Exclusion))

Pivot

int_long <- int_dat %>% relocate(subjID) %>% # SC0 = score of catch trials
  pivot_longer(cols=4:ncol(int_dat), names_to="trial", values_to = "rating") %>%
  separate(col=trial, sep='-', into=c("drop","choice")) %>% select(-Exclusion) %>%
  rename(catch_trials_correct=`SC0`)

lik_long <- lik_dat %>% relocate(subjID) %>%
  pivot_longer(cols=4:ncol(int_dat), names_to="trial", values_to = "rating") %>%
  separate(col=trial, sep='-', into=c("drop","choice")) %>% select(-Exclusion) %>%
  filter(!is.na(rating))

Data inspection

table(int_long$subjID)

## 
## 10535 14210 14916 16357 18172 18653 19068 21976 23362 28686 29068 38392 39396 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 40378 41047 41873 42169 42613 46062 49414 51547 57404 57532 58741 66869 67369 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 69110 78787 80426 80848 82004 82157 85052 85841 89419 89470 89891 90003 92691 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 93644 94684 95478 97625 98736 98907 
##    39    39    39    39    39    39

table(lik_long$subjID)

## 
## 10257 17530 18126 18686 21633 21825 28819 29951 33335 33980 35093 36148 36912 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 37316 40587 42009 43570 43937 45321 48185 51140 52067 55451 58257 60280 63615 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 64055 66351 66893 68015 68530 71180 72312 73067 75615 77937 79509 79546 79990 
##    39    39    39    39    39    39    39    39    39    39    39    39    39 
## 80550 80913 82938 83291 89350 92635 93635 94704 98753 99765 
##    39    39    39    39    39    39    39    39    39    39

sum(is.na(int_long$rating))

## [1] 0

sum(is.na(lik_long$rating))

## [1] 0

Summarize Choice Data

#combos = adult_long %>% distinct(drop, choice, target)
distractors = c("trig prism", "trig prism", "octahedron", "sphere", "octahedron", "pyramid", "pyramid", "sphere", "sphere", "pyramid", "octahedron", "dumbbell", "dumbbell", "sphere", "pentagon", "cone", "octahedron", "dumbbell", "dumbbell", "cone")

adult_agg <- adult_long %>% group_by(drop, target, relation) %>%
  summarize(chose_target = mean(chose_target))

## `summarise()` has grouped output by 'drop', 'target'. You can override using the `.groups` argument.

adult_agg$distractor = distractors
  # do we want raw counts for chose target vs. foil?
# for trial 1, 70% chose target (30% chose distractor)

child_agg <- child_long %>% group_by(drop, target, relation) %>%
  summarize(chose_target = mean(chose_target, na.rm=T))

## `summarise()` has grouped output by 'drop', 'target'. You can override using the `.groups` argument.

child_agg$distractor = distractors

Summarize Ratings Data

lik_agg <- lik_long %>% mutate(rating = as.numeric(rating)) %>%
  group_by(drop, choice) %>%
  summarise(likelihood = mean(rating) - 1) # shift from 1-6 to 0-5

## `summarise()` has grouped output by 'drop'. You can override using the `.groups` argument.

int_agg <- int_long %>% mutate(rating = as.numeric(rating)) %>%
  group_by(drop, choice) %>%
  summarise(interest = mean(rating) - 1)

## `summarise()` has grouped output by 'drop'. You can override using the `.groups` argument.

# also want ratings scaled 0-1?

results_table <- lik_agg %>% left_join(int_agg) %>%
  arrange(desc(likelihood))

## Joining, by = c("drop", "choice")

datatable(results_table) %>% formatRound(columns=c("likelihood","interest"), digits=3)

Merge ratings with adults’ drop choices

adult_agg <- adult_agg %>% left_join(lik_agg, by=c("drop"="drop", "target"="choice")) %>%
  rename(targ_lik = likelihood) %>%
  left_join(lik_agg, by=c("drop"="drop", "distractor"="choice")) %>%
  rename(dist_lik = likelihood)

adult_agg <- adult_agg %>% left_join(int_agg, by=c("drop"="drop", "target"="choice")) %>%
  rename(targ_int = interest) %>%
  left_join(int_agg, by=c("drop"="drop", "distractor"="choice")) %>%
  rename(dist_int = interest)

adult_agg <- adult_agg %>% 
  mutate(prop_targ_lik = targ_lik / (targ_lik + dist_lik),
         prop_targ_int = targ_int / (targ_int + dist_int))

Merge ratings with children’s drop choices

child_agg <- child_agg %>% left_join(lik_agg, by=c("drop"="drop", "target"="choice")) %>%
  rename(targ_lik = likelihood) %>%
  left_join(lik_agg, by=c("drop"="drop", "distractor"="choice")) %>%
  rename(dist_lik = likelihood)

child_agg <- child_agg %>% left_join(int_agg, by=c("drop"="drop", "target"="choice")) %>%
  rename(targ_int = interest) %>%
  left_join(int_agg, by=c("drop"="drop", "distractor"="choice")) %>%
  rename(dist_int = interest)

child_agg <- child_agg %>% 
  mutate(prop_targ_lik = targ_lik / (targ_lik + dist_lik),
         prop_targ_int = targ_int / (targ_int + dist_int))

Adult Choices vs. Ratings Table

adult_agg %>% relocate(relation) %>% 
  relocate(distractor, .after=target) %>% 
  relocate(prop_targ_lik, .after=dist_lik) %>%
  DT::datatable() %>% 
  formatRound(columns=c("chose_target","targ_lik","dist_lik",
                        "targ_int","dist_int","prop_targ_lik",
                        "prop_targ_int"), digits=3)

Compare adult choices to ratings-derived proportions

Regression:

m_lik <- lm(chose_target ~ relation * prop_targ_lik, data=adult_agg)
m_int <- lm(chose_target ~ relation * prop_targ_int, data=adult_agg)
summary(m_lik) # Rsq = .63

## 
## Call:
## lm(formula = chose_target ~ relation * prop_targ_lik, data = adult_agg)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.166929 -0.045984  0.000186  0.060379  0.237002 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     1.0769     0.6936   1.553    0.140
## relationsupport                -1.0416     0.8532  -1.221    0.240
## prop_targ_lik                  -0.2232     0.7653  -0.292    0.774
## relationsupport:prop_targ_lik   0.9138     0.9574   0.954    0.354
## 
## Residual standard error: 0.1072 on 16 degrees of freedom
## Multiple R-squared:  0.6324, Adjusted R-squared:  0.5635 
## F-statistic: 9.177 on 3 and 16 DF,  p-value: 0.0009123

summary(m_int) # Rsq = .84

## 
## Call:
## lm(formula = chose_target ~ relation * prop_targ_int, data = adult_agg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15408 -0.02446  0.01719  0.04478  0.10658 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                     0.5899     0.3035   1.944   0.0697 .
## relationsupport                -0.8069     0.3535  -2.283   0.0365 *
## prop_targ_int                   0.3701     0.3932   0.941   0.3605  
## relationsupport:prop_targ_int   0.9202     0.4791   1.921   0.0728 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07179 on 16 degrees of freedom
## Multiple R-squared:  0.8352, Adjusted R-squared:  0.8043 
## F-statistic: 27.03 on 3 and 16 DF,  p-value: 1.677e-06

anova(m_lik, m_int)

## Analysis of Variance Table
## 
## Model 1: chose_target ~ relation * prop_targ_lik
## Model 2: chose_target ~ relation * prop_targ_int
##   Res.Df      RSS Df Sum of Sq F Pr(>F)
## 1     16 0.183934                      
## 2     16 0.082461  0   0.10147

m3 <- lm(chose_target ~ relation * prop_targ_lik + relation * prop_targ_int, data=adult_agg)
summary(m3)

## 
## Call:
## lm(formula = chose_target ~ relation * prop_targ_lik + relation * 
##     prop_targ_int, data = adult_agg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15457 -0.02199  0.01202  0.04563  0.09992 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     0.8815     0.5184   1.700    0.111
## relationsupport                -0.9836     0.6250  -1.574    0.138
## prop_targ_lik                  -0.3978     0.5597  -0.711    0.489
## prop_targ_int                   0.4592     0.4295   1.069    0.303
## relationsupport:prop_targ_lik   0.2206     0.7198   0.306    0.764
## relationsupport:prop_targ_int   0.8888     0.5366   1.656    0.120
## 
## Residual standard error: 0.075 on 14 degrees of freedom
## Multiple R-squared:  0.8426, Adjusted R-squared:  0.7864 
## F-statistic: 14.99 on 5 and 14 DF,  p-value: 3.41e-05

p1 <- adult_agg %>% ggplot(aes(x=chose_target, y=prop_targ_lik, color=relation)) + geom_point() +
  xlab("Proportion of Adults Choosing Target") +
  ylab("Luce choice of Target vs. Distractor Likelihood") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson") 
  stat_cor(inherit.aes = F, aes(x=chose_target, y=prop_targ_lik, 
               label = paste(..rr.label.., sep = "~`,`~")))

p2 <- adult_agg %>% ggplot(aes(x=chose_target, y=prop_targ_int, color=relation)) + geom_point() +
  xlab("Proportion of Adults Choosing Target") +
  ylab("Luce choice of Target vs. Distractor Interestingness") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson")
  stat_cor(inherit.aes = F, aes(x=chose_target, y=prop_targ_int, 
               label = paste(..rr.label.., sep = "~`,`~")))

ggpubr::ggarrange(p1, p2, nrow=1, common.legend = T)

Compare children’s choices to ratings-derived proportions

p1 <- child_agg %>% ggplot(aes(x=chose_target, y=prop_targ_lik, color=relation)) + geom_point() +
  xlab("Proportion of Children Choosing Target") +
  ylab("Luce choice of Target vs. Distractor Likelihood") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson") 
  stat_cor(inherit.aes = F, aes(x=chose_target, y=prop_targ_lik, 
               label = paste(..rr.label.., sep = "~`,`~")))

p2 <- child_agg %>% ggplot(aes(x=chose_target, y=prop_targ_int, color=relation)) + geom_point() +
  xlab("Proportion of Children Choosing Target") +
  ylab("Luce choice of Target vs. Distractor Interestingness") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson")
  stat_cor(inherit.aes = F, aes(x=chose_target, y=prop_targ_int, 
               label = paste(..rr.label.., sep = "~`,`~")))

ggpubr::ggarrange(p1, p2, nrow=1, common.legend = T)

Compare Likelihood Ratings to Physics Models

load(here("models/model_data.RData")) # mdat, all_trials_mse, all_trials_mse_r, etc.
# load(here("models/support_probability_fit_df.RData")) 

# compare to support probability
supp_prob <- mdat %>% select(drop, target, relation, support_probability) 

trials <- adult_agg %>% select(drop, target, distractor, relation) %>%
  left_join(supp_prob, by=c("drop","target","relation")) %>%
  rename(targ_supp_prob = support_probability) %>%
  left_join(supp_prob, by=c("drop"="drop", "distractor"="target", "relation"="relation")) %>%
  rename(dist_supp_prob = support_probability) %>%
  mutate(prop_target_supp = (targ_supp_prob) / (targ_supp_prob + dist_supp_prob + 1e-5))

p1 <- adult_agg %>% left_join(trials %>% select(drop, target, distractor, relation, prop_target_supp)) %>%
  ggplot(aes(x=prop_target_supp, y=prop_targ_lik, color=relation)) + geom_point(alpha=.8) +
  xlab("Proportion Target Support in Model") +
  ylab("Luce choice of Target vs. Distractor Likelihood") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  #stat_cor(method = "pearson") 
  stat_cor(inherit.aes = F, aes(x=prop_target_supp, y=prop_targ_lik, 
               label = paste(..rr.label.., sep = "~`,`~")))

## Joining, by = c("drop", "target", "relation", "distractor")

## compare to support sharpness
supp_tmp <- mdat %>% select(drop, target, relation, support_std) 

trials <- adult_agg %>% select(drop, target, distractor, relation) %>%
  left_join(supp_tmp, by=c("drop","target","relation")) %>%
  rename(targ_supp_std = support_std) %>%
  left_join(supp_tmp, by=c("drop"="drop", "distractor"="target", "relation"="relation")) %>%
  rename(dist_supp_std = support_std) %>%
  mutate(prop_target_supp = (targ_supp_std) / (targ_supp_std + dist_supp_std + 1e-5))

p2 <- adult_agg %>% left_join(trials %>% select(drop, target, distractor, relation, prop_target_supp)) %>%
  ggplot(aes(x=prop_target_supp, y=prop_targ_lik, color=relation)) + geom_point(alpha=.8) +
  xlab("Prop. Target Support SD in Model") +
  ylab("Luce choice of Target vs. Distractor Likelihood") +
  xlim(0,1) + ylim(0,1) + theme_classic() +
  geom_abline(slope=1, intercept=0, linetype="dashed") +
  stat_cor(inherit.aes = F, aes(x=prop_target_supp, y=prop_targ_lik, 
               label = paste(..rr.label.., sep = "~`,`~")))

## Joining, by = c("drop", "target", "relation", "distractor")

ggpubr::ggarrange(p1, p2, nrow=1, common.legend = T)

Directly compare ratings to model probs

mdat <- mdat %>% left_join(lik_agg, by=c("drop"="drop", "target"="choice"))

p1 <-mdat %>% ggplot(aes(x=likelihood, y=support_probability, color=relation)) +
  geom_point(alpha=.7) + theme_classic() + xlab("Adults' Mean Likelihood Rating") +
  stat_cor(inherit.aes = F, aes(x=likelihood, y=support_probability, 
               label = paste(..rr.label.., sep = "~`,`~")))

p2 <- mdat %>% ggplot(aes(x=likelihood, y=support_std, color=relation)) +
  geom_point(alpha=.7) + theme_classic() + xlab("Adults' Mean Likelihood Rating") +
  stat_cor(inherit.aes = F, aes(x=likelihood, y=support_std, 
               label = paste(..rr.label.., sep = "~`,`~")))

ggpubr::ggarrange(p1, p2, nrow=1, common.legend = T)

Other feature correlations

with(mdat, cor(likelihood, support_probability)) # r=.75

## [1] 0.7492594

with(mdat, cor(likelihood, support_std)) # .77

## [1] 0.7727355

with(mdat, cor(likelihood, support_sharpness_accuracy_mean)) # .496

## [1] 0.4958176

with(mdat, cor(likelihood, support_response_linearity_r)) # .48

## [1] 0.4777165

with(mdat, cor(likelihood, support_response_linearity_pv)) # .53

## [1] 0.5327534

Experiment 3: ratings

George, Sama, and Mike

6/3/2021

Load packages

Load Data

Clean data

Pivot

Data inspection

Summarize Choice Data

Summarize Ratings Data

Merge ratings with adults’ drop choices

Merge ratings with children’s drop choices

Adult Choices vs. Ratings Table

Compare adult choices to ratings-derived proportions

Compare children’s choices to ratings-derived proportions

Compare Likelihood Ratings to Physics Models

Directly compare ratings to model probs