getting started

getwd()

## [1] "/Users/lornaschluter/Documents"

setwd("/Users/lornaschluter/Documents")

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.37     R6_2.6.1          fastmap_1.2.0     xfun_0.52        
##  [5] cachem_1.1.0      knitr_1.50        htmltools_0.5.8.1 rmarkdown_2.29   
##  [9] lifecycle_1.0.4   cli_3.6.5         sass_0.4.10       jquerylib_0.1.4  
## [13] compiler_4.5.1    rstudioapi_0.17.1 tools_4.5.1       evaluate_1.0.4   
## [17] bslib_0.9.0       yaml_2.3.10       rlang_1.1.6       jsonlite_2.0.0

#install.packages("lme4")
#install.packages("readxl")
#install.packages("tidyverse")

library(lme4)

## Loading required package: Matrix

library(readxl)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ tidyr::pack()   masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

source("/Users/lornaschluter/Documents/boot_glmm.r")
#load(file = "Unsi_ex BA Analyses.RData)

#Data ###Raw

# Unsi_Ex
xdata <- read_excel(path = "Unsi-Ex_Data.xlsx")
str(xdata)

## tibble [453 × 31] (S3: tbl_df/tbl/data.frame)
##  $ code                      : chr [1:453] "p05_AB37_vn" "p05_AB37_vn" "p05_AB37_vn" "p05_AB37_vn" ...
##  $ age.m                     : num [1:453] 37 37 37 37 37 37 37 37 37 37 ...
##  $ age.d                     : num [1:453] 1147 1147 1147 1147 1147 ...
##  $ sex                       : chr [1:453] "m" "m" "m" "m" ...
##  $ where                     : chr [1:453] "Kita am Nordcampus" "Kita am Nordcampus" "Kita am Nordcampus" "Kita am Nordcampus" ...
##  $ E1                        : chr [1:453] "Lorna" "Lorna" "Lorna" "Lorna" ...
##  $ bal_nr                    : num [1:453] 4 4 4 4 4 4 4 4 4 4 ...
##  $ trial                     : chr [1:453] "fam1" "fam2" "fam3" "fam4" ...
##  $ trial_type                : chr [1:453] "fam_postbote" "fam_postbote" "fam_postbote" "fam_postbote" ...
##  $ order_fam                 : chr [1:453] "postbote_first" "postbote_first" "postbote_first" "postbote_first" ...
##  $ position_target           : chr [1:453] NA NA NA NA ...
##  $ position_baited_distractor: chr [1:453] NA NA NA NA ...
##  $ hide_first                : chr [1:453] NA NA NA NA ...
##  $ order_qtrial6             : chr [1:453] NA NA NA NA ...
##  $ fam_explore               : chr [1:453] NA NA NA NA ...
##  $ fam_explore_quantity      : chr [1:453] NA NA NA NA ...
##  $ fam_explore_t3            : chr [1:453] NA NA NA NA ...
##  $ fam_explore_correction    : chr [1:453] NA NA NA NA ...
##  $ fam_exploit               : chr [1:453] "correct" "incorrect" "correct" "incorrect" ...
##  $ fam_exploit_incorrect     : chr [1:453] NA "exploit.distractor.additionally" NA "exploit.distractor.additionally" ...
##  $ fam_exploit_correction    : chr [1:453] NA "no" NA "no" ...
##  $ sticker_split             : chr [1:453] NA NA NA NA ...
##  $ sticker_first             : chr [1:453] NA NA NA NA ...
##  $ sticker_exploit           : chr [1:453] NA NA NA NA ...
##  $ sticker_explore           : chr [1:453] NA NA NA NA ...
##  $ strategy                  : chr [1:453] NA NA NA NA ...
##  $ quantity_explore          : chr [1:453] NA NA NA NA ...
##  $ forced_choice             : chr [1:453] NA NA NA NA ...
##  $ meta_comments             : chr [1:453] NA NA NA NA ...
##  $ modal_comments            : chr [1:453] NA NA NA NA ...
##  $ comments                  : chr [1:453] NA NA NA NA ...

###Transformation

# factorize
##bei levels: immer wichtig, was mein Referenzwert!! => das ist die Referenz für Modell (default: alphabetisch)
#Reihenfolge bei levels und labels muss matchen!

data <- xdata %>% 
  mutate(code = factor(code), 
         sex = factor(sex, 
                      levels = c("f", "w", "m"),
                      labels = c("f", "f", "m")), #Reihenfolge muss matchen!
         where = factor(case_when(
           where == "WW" ~ "WW",
           where != "WW" ~ "Kita")),
         trial_type = factor(trial_type),
         order_fam = factor(order_fam),
         position_target = factor(position_target),
         position_baited_distractor = factor(position_baited_distractor),
         hide_first = factor(hide_first),
         order_q = factor(order_qtrial6), #auge_first/laster_first
         sticker_split = factor(sticker_split,
                                labels = c("0", "1")), #no/yes
         sticker_first = factor(sticker_first,
                                levels = c("explore_first","exploit_first","exploit_first_revised","simultaneously"),
                                labels = c("explore_first","exploit_first","exploit_first","simultaneously")),
         sticker_exploit = factor(sticker_exploit),
         exploit.comp = factor(sticker_exploit,
                               levels = c("distractor_left", "distractor_right", "target"),
                               labels = c("0", "0", "1")),
         sticker_explore = factor(sticker_explore),
         explore.comp = factor(case_when(
           sticker_explore == "target" ~ "0",
           sticker_explore != "target" ~ "1")),
         strategy = factor(strategy),
         quantity_explore = factor(quantity_explore,
                                   levels = c("single", "multiple")),
         forced_choice = factor(forced_choice),
         exploit.forced = factor(case_when(
           forced_choice == "exploit_certain" ~ "1",
           forced_choice == "exploit_uncertain" ~ "0",
           forced_choice == "explore_certain" ~ NA,
           forced_choice == "explore_uncertain" ~ NA))
         
)
  
  str(data)

## tibble [453 × 35] (S3: tbl_df/tbl/data.frame)
##  $ code                      : Factor w/ 37 levels "01_MS46_vj","02_GL37_vn",..: 23 23 23 23 23 23 23 23 23 23 ...
##  $ age.m                     : num [1:453] 37 37 37 37 37 37 37 37 37 37 ...
##  $ age.d                     : num [1:453] 1147 1147 1147 1147 1147 ...
##  $ sex                       : Factor w/ 2 levels "f","m": 2 2 2 2 2 2 2 2 2 2 ...
##  $ where                     : Factor w/ 2 levels "Kita","WW": 1 1 1 1 1 1 1 1 1 1 ...
##  $ E1                        : chr [1:453] "Lorna" "Lorna" "Lorna" "Lorna" ...
##  $ bal_nr                    : num [1:453] 4 4 4 4 4 4 4 4 4 4 ...
##  $ trial                     : chr [1:453] "fam1" "fam2" "fam3" "fam4" ...
##  $ trial_type                : Factor w/ 5 levels "fam_bilder","fam_postbote",..: 2 2 2 2 1 1 1 3 3 3 ...
##  $ order_fam                 : Factor w/ 2 levels "bilder_first",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ position_target           : Factor w/ 2 levels "target_left",..: NA NA NA NA NA NA NA 2 2 1 ...
##  $ position_baited_distractor: Factor w/ 2 levels "baited_distractor_left",..: NA NA NA NA NA NA NA 2 2 1 ...
##  $ hide_first                : Factor w/ 2 levels "distractors_first",..: NA NA NA NA NA NA NA 2 1 1 ...
##  $ order_qtrial6             : chr [1:453] NA NA NA NA ...
##  $ fam_explore               : chr [1:453] NA NA NA NA ...
##  $ fam_explore_quantity      : chr [1:453] NA NA NA NA ...
##  $ fam_explore_t3            : chr [1:453] NA NA NA NA ...
##  $ fam_explore_correction    : chr [1:453] NA NA NA NA ...
##  $ fam_exploit               : chr [1:453] "correct" "incorrect" "correct" "incorrect" ...
##  $ fam_exploit_incorrect     : chr [1:453] NA "exploit.distractor.additionally" NA "exploit.distractor.additionally" ...
##  $ fam_exploit_correction    : chr [1:453] NA "no" NA "no" ...
##  $ sticker_split             : Factor w/ 2 levels "0","1": NA NA NA NA NA NA NA 2 2 2 ...
##  $ sticker_first             : Factor w/ 3 levels "explore_first",..: NA NA NA NA NA NA NA 1 2 1 ...
##  $ sticker_exploit           : Factor w/ 3 levels "distractor_left",..: NA NA NA NA NA NA NA 1 1 1 ...
##  $ sticker_explore           : Factor w/ 7 levels "distractor_left",..: NA NA NA NA NA NA NA 3 3 4 ...
##  $ strategy                  : Factor w/ 5 levels "avoid.certain",..: NA NA NA NA NA NA NA 1 1 4 ...
##  $ quantity_explore          : Factor w/ 2 levels "single","multiple": NA NA NA NA NA NA NA NA NA NA ...
##  $ forced_choice             : Factor w/ 4 levels "exploit_certain",..: NA NA NA NA NA NA NA NA NA NA ...
##  $ meta_comments             : chr [1:453] NA NA NA NA ...
##  $ modal_comments            : chr [1:453] NA NA NA NA ...
##  $ comments                  : chr [1:453] NA NA NA NA ...
##  $ order_q                   : Factor w/ 2 levels "auge_first","laster_first": NA NA NA NA NA NA NA NA NA NA ...
##  $ exploit.comp              : Factor w/ 2 levels "0","1": NA NA NA NA NA NA NA 1 1 1 ...
##  $ explore.comp              : Factor w/ 2 levels "0","1": NA NA NA NA NA NA NA 2 2 1 ...
##  $ exploit.forced            : Factor w/ 2 levels "0","1": NA NA NA NA NA NA NA NA NA NA ...

dataset for later use

# select

#data familiarization -> not used in BA!

#data_fam <- data %>% 
 # dplyr::filter(grepl("fam", trial)) %>% 
  #select(code:order_fam, fam_explore:fam_exploit_correction)


# data without fam (and test_explores)
data.test <- data %>% 
  dplyr::filter(!grepl("fam", trial)) %>% 
  dplyr::filter(trial_type !="test_explores")

# data without fam and text_explores main sample
data.sample <- data.test %>% 
  dplyr::filter(!grepl("^p", code))

# data main test 
data.main <- data.test %>% 
  dplyr::filter(trial_type =="test_and") %>% 
  select(code:where, trial:hide_first, sticker_split:strategy, order_q:explore.comp) %>% #select interesting variables
  dplyr::filter(!is.na(exploit.comp)) %>% #drop all rows w/o exploitation data (trial-based exclusion criterion) -> wichtig na zuerst rausfiltern!
  mutate(trial.f = factor(trial),
         z.trial = scale(as.numeric(trial)), #characters kann man nicht z-standardisieren
         z.age = scale(age.d),
         age_group = factor(case_when(
           age.m < 48 ~ "3yo",
           age.m > 47 ~ "4yo")),
       )
# data main test main sample only
data.main.s <- data.main %>% 
  dplyr::filter(!grepl("^p", code))

# data forced choice
data.choice <- data.test %>% 
  dplyr::filter(trial_type =="test_or") %>% 
  select(code:where, trial:order_qtrial6, sticker_exploit:sticker_explore, forced_choice, order_q:exploit.forced) %>% 
  mutate(trial.f = factor(trial),
         z.trial = scale(as.numeric(trial)), 
         z.age = scale(age.d),
         age_group = factor(case_when(
           age.m < 48 ~ "3yo",
           age.m > 47 ~ "4yo")),
       )

#descriptive statistics ## Methods

# demographics
# install.packages("Rmisc")
library(Rmisc)

## Loading required package: lattice

## Loading required package: plyr

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following object is masked from 'package:purrr':
## 
##     compact

# sample size
n_distinct(data.main$code)

## [1] 37

n_distinct(data.main.s$code)

## [1] 20

## gender
data.main %>% 
  dplyr::group_by(sex) %>% 
  dplyr::summarise(
    n_rows = dplyr::n(), 
    n_codes = dplyr::n_distinct(code)) %>% 
   ungroup()

## # A tibble: 2 × 3
##   sex   n_rows n_codes
##   <fct>  <int>   <int>
## 1 f         70      18
## 2 m         76      19

## where
data.main %>% 
  dplyr::group_by(where) %>% 
  dplyr::summarise(
    n_rows = dplyr::n(), 
    n_codes = dplyr::n_distinct(code)) %>% 
   ungroup()

## # A tibble: 2 × 3
##   where n_rows n_codes
##   <fct>  <int>   <int>
## 1 Kita      66      17
## 2 WW        80      20

## age 
data.main %>% 
  dplyr::group_by(age_group) %>% 
  dplyr::summarise(
    n_rows = dplyr::n(), 
    n_codes = dplyr::n_distinct(code)) %>% 
   ungroup()

## # A tibble: 2 × 3
##   age_group n_rows n_codes
##   <fct>      <int>   <int>
## 1 3yo           62      16
## 2 4yo           84      21

Rmisc::summarySE(data.main, "age.m")

##    .id   N    age.m       sd        se       ci
## 1 <NA> 146 48.89041 6.783472 0.5614041 1.109593

min(data.main$age.m)

## [1] 37

max(data.main$age.m)

## [1] 59

## grouped by gender
Rmisc::summarySE(data.main, "age.m", groupvars = "sex")

##   sex  N    age.m       sd        se       ci
## 1   f 70 49.17143 6.392866 0.7640936 1.524325
## 2   m 76 48.63158 7.156987 0.8209627 1.635441

Rmisc::summarySE(data.main, "age.d", groupvars = "sex")

##   sex  N    age.d       sd       se       ci
## 1   f 70 1512.529 194.0742 23.19631 46.27536
## 2   m 76 1497.211 213.1365 24.44843 48.70377

Results

exploit performance

# exploit.comp
exploit.perf.raw <- data.main %>% 
  dplyr::count(exploit.comp) %>% #automatisch als n
  mutate(prop = n/sum(n)) #Anteil von kompetenten über n
exploit.perf.raw

## # A tibble: 2 × 3
##   exploit.comp     n  prop
##   <fct>        <int> <dbl>
## 1 0               94 0.644
## 2 1               52 0.356

# exploit.comp main sample only
exploit.raw.s <- data.main.s %>% 
  dplyr::count(exploit.comp) %>% 
  mutate(prop = n/sum(n)) 
exploit.raw.s # slightly better performance than w/ pilot

## # A tibble: 2 × 3
##   exploit.comp     n  prop
##   <fct>        <int> <dbl>
## 1 0               47 0.603
## 2 1               31 0.397

## grouped by demographic factors (age, gender)
exploit.perf.demo <- data.main %>% 
  group_by(age_group, sex) %>% 
  dplyr::count(exploit.comp) %>% 
  mutate(prop = n/sum(n)) 
exploit.perf.demo

## # A tibble: 8 × 5
## # Groups:   age_group, sex [4]
##   age_group sex   exploit.comp     n   prop
##   <fct>     <fct> <fct>        <int>  <dbl>
## 1 3yo       f     0               19 0.130 
## 2 3yo       f     1                7 0.0479
## 3 3yo       m     0               25 0.171 
## 4 3yo       m     1               11 0.0753
## 5 4yo       f     0               24 0.164 
## 6 4yo       f     1               20 0.137 
## 7 4yo       m     0               26 0.178 
## 8 4yo       m     1               14 0.0959

## grouped by trial (learning effect?)
exploit.perf.t <- data.main %>% 
  group_by(trial.f) %>% 
  dplyr::count(exploit.comp) %>% 
  mutate(prop = n/sum(n)) 
exploit.perf.t

## # A tibble: 8 × 4
## # Groups:   trial.f [4]
##   trial.f exploit.comp     n   prop
##   <fct>   <fct>        <int>  <dbl>
## 1 1       0               23 0.158 
## 2 1       1               13 0.0890
## 3 2       0               28 0.192 
## 4 2       1                9 0.0616
## 5 3       0               21 0.144 
## 6 3       1               16 0.110 
## 7 4       0               22 0.151 
## 8 4       1               14 0.0959

## grouped by predictors (sticker_split, sticker_first)
exploit.perf <- data.main %>% 
  group_by(sticker_split, sticker_first) %>% 
  dplyr::count(exploit.comp) %>%
  mutate(prop = n/sum(n)) 
exploit.perf

## # A tibble: 12 × 5
## # Groups:   sticker_split, sticker_first [6]
##    sticker_split sticker_first  exploit.comp     n    prop
##    <fct>         <fct>          <fct>        <int>   <dbl>
##  1 0             explore_first  0                9 0.0616 
##  2 0             explore_first  1                8 0.0548 
##  3 0             exploit_first  0                9 0.0616 
##  4 0             exploit_first  1                1 0.00685
##  5 0             simultaneously 0                2 0.0137 
##  6 0             simultaneously 1                1 0.00685
##  7 1             explore_first  0               38 0.260  
##  8 1             explore_first  1               17 0.116  
##  9 1             exploit_first  0               30 0.205  
## 10 1             exploit_first  1               24 0.164  
## 11 1             simultaneously 0                6 0.0411 
## 12 1             simultaneously 1                1 0.00685

## sticker exploit
exploit.loc <- data.main %>% 
  group_by(sticker_exploit) %>% 
  dplyr::count(exploit.comp) %>%
  mutate(prop = n/sum(n)) 
exploit.loc # no bias descriptively

## # A tibble: 3 × 4
## # Groups:   sticker_exploit [3]
##   sticker_exploit  exploit.comp     n  prop
##   <fct>            <fct>        <int> <dbl>
## 1 distractor_left  0               46 0.315
## 2 distractor_right 0               48 0.329
## 3 target           1               52 0.356

explore performance

# explore.comp
explore.perf.raw <- data.main %>% 
  dplyr::count(explore.comp) %>%
  mutate(prop = n/sum(n)) 
explore.perf.raw

## # A tibble: 2 × 3
##   explore.comp     n  prop
##   <fct>        <int> <dbl>
## 1 0               48 0.329
## 2 1               98 0.671

## grouped by demographic factors (age, gender)
explore.perf.demo <- data.main %>% 
  group_by(age_group, sex) %>% 
  dplyr::count(explore.comp) %>% 
  mutate(prop = n/sum(n)) 
explore.perf.demo

## # A tibble: 8 × 5
## # Groups:   age_group, sex [4]
##   age_group sex   explore.comp     n   prop
##   <fct>     <fct> <fct>        <int>  <dbl>
## 1 3yo       f     0               13 0.0890
## 2 3yo       f     1               13 0.0890
## 3 3yo       m     0               10 0.0685
## 4 3yo       m     1               26 0.178 
## 5 4yo       f     0               12 0.0822
## 6 4yo       f     1               32 0.219 
## 7 4yo       m     0               13 0.0890
## 8 4yo       m     1               27 0.185

## grouped by trial (learning effect?)
explore.perf.t <- data.main %>% 
  group_by(trial.f) %>% 
  dplyr::count(explore.comp) %>% 
  mutate(prop = n/sum(n)) 
explore.perf.t

## # A tibble: 8 × 4
## # Groups:   trial.f [4]
##   trial.f explore.comp     n   prop
##   <fct>   <fct>        <int>  <dbl>
## 1 1       0               12 0.0822
## 2 1       1               24 0.164 
## 3 2       0               11 0.0753
## 4 2       1               26 0.178 
## 5 3       0               12 0.0822
## 6 3       1               25 0.171 
## 7 4       0               13 0.0890
## 8 4       1               23 0.158

## grouped by predictors (sticker_split, sticker_first)
explore.perf <- data.main %>% 
  group_by(sticker_split, sticker_first) %>% 
  dplyr::count(explore.comp) %>%
  mutate(prop = n/sum(n)) 
explore.perf

## # A tibble: 12 × 5
## # Groups:   sticker_split, sticker_first [6]
##    sticker_split sticker_first  explore.comp     n    prop
##    <fct>         <fct>          <fct>        <int>   <dbl>
##  1 0             explore_first  0                8 0.0548 
##  2 0             explore_first  1                9 0.0616 
##  3 0             exploit_first  0                1 0.00685
##  4 0             exploit_first  1                9 0.0616 
##  5 0             simultaneously 0                1 0.00685
##  6 0             simultaneously 1                2 0.0137 
##  7 1             explore_first  0               25 0.171  
##  8 1             explore_first  1               30 0.205  
##  9 1             exploit_first  0               11 0.0753 
## 10 1             exploit_first  1               43 0.295  
## 11 1             simultaneously 0                2 0.0137 
## 12 1             simultaneously 1                5 0.0342

## sticker exploit
explore.loc <- data.main %>% 
  group_by(sticker_explore) %>% 
  dplyr::count(explore.comp) %>%
  mutate(prop = n/sum(n)) 
explore.loc # no bias descriptively

## # A tibble: 3 × 4
## # Groups:   sticker_explore [3]
##   sticker_explore  explore.comp     n  prop
##   <fct>            <fct>        <int> <dbl>
## 1 distractor_left  1               57 0.390
## 2 distractor_right 1               41 0.281
## 3 target           0               48 0.329

performance exploit.forced

# exploit_forced
forced.perf.raw <- data.choice %>% 
  dplyr::count(exploit.forced) %>%
  mutate(prop = n/sum(n)) 
forced.perf.raw

## # A tibble: 3 × 3
##   exploit.forced     n   prop
##   <fct>          <int>  <dbl>
## 1 0                 15 0.405 
## 2 1                 19 0.514 
## 3 <NA>               3 0.0811

## grouped by demographic factors (age, gender)
forced.perf.demo <- data.choice %>% 
  group_by(age_group, sex) %>% 
  dplyr::count(exploit.forced) %>% 
  mutate(prop = n/sum(n)) 
forced.perf.demo # none of the 4yo explored in last trial!

## # A tibble: 10 × 5
## # Groups:   age_group, sex [4]
##    age_group sex   exploit.forced     n   prop
##    <fct>     <fct> <fct>          <int>  <dbl>
##  1 3yo       f     0                  3 0.0811
##  2 3yo       f     1                  2 0.0541
##  3 3yo       f     <NA>               2 0.0541
##  4 3yo       m     0                  5 0.135 
##  5 3yo       m     1                  3 0.0811
##  6 3yo       m     <NA>               1 0.0270
##  7 4yo       f     0                  5 0.135 
##  8 4yo       f     1                  6 0.162 
##  9 4yo       m     0                  2 0.0541
## 10 4yo       m     1                  8 0.216

## sticker exploit
exploit.forced.loc <- data.choice %>% 
  group_by(sticker_exploit) %>% 
  dplyr::count(exploit.forced) %>%
  mutate(prop = n/sum(n)) 
exploit.forced.loc # no bias descriptively

## # A tibble: 4 × 4
## # Groups:   sticker_exploit [4]
##   sticker_exploit  exploit.forced     n   prop
##   <fct>            <fct>          <int>  <dbl>
## 1 distractor_left  0                  7 0.189 
## 2 distractor_right 0                  8 0.216 
## 3 target           1                 19 0.514 
## 4 <NA>             <NA>               3 0.0811

sticker_split performance

## proportions sticker split
sticker.split.raw <- data.main %>% 
  dplyr::count(sticker_split) %>%
  mutate(prop = n/sum(n)) 
sticker.split.raw

## # A tibble: 2 × 3
##   sticker_split     n  prop
##   <fct>         <int> <dbl>
## 1 0                30 0.205
## 2 1               116 0.795

## grouped by exploit.comp
sticker.split.perf <- data.main %>% 
  group_by(exploit.comp) %>% 
  dplyr::count(sticker_split) %>%
  mutate(prop = n/sum(n)) 
sticker.split.perf

## # A tibble: 4 × 4
## # Groups:   exploit.comp [2]
##   exploit.comp sticker_split     n   prop
##   <fct>        <fct>         <int>  <dbl>
## 1 0            0                20 0.137 
## 2 0            1                74 0.507 
## 3 1            0                10 0.0685
## 4 1            1                42 0.288

sticker_first performance

## proportions sticker first
sticker.first.raw <- data.main %>% 
  dplyr::count(sticker_first) %>%
  mutate(prop = n/sum(n)) 
sticker.first.raw

## # A tibble: 3 × 3
##   sticker_first      n   prop
##   <fct>          <int>  <dbl>
## 1 explore_first     72 0.493 
## 2 exploit_first     64 0.438 
## 3 simultaneously    10 0.0685

## grouped by exploit.comp
sticker.first.perf <- data.main %>% 
  group_by(exploit.comp) %>% 
  dplyr::count(sticker_first) %>%
  mutate(prop = n/sum(n)) 
sticker.first.perf

## # A tibble: 6 × 4
## # Groups:   exploit.comp [2]
##   exploit.comp sticker_first      n   prop
##   <fct>        <fct>          <int>  <dbl>
## 1 0            explore_first     47 0.322 
## 2 0            exploit_first     39 0.267 
## 3 0            simultaneously     8 0.0548
## 4 1            explore_first     25 0.171 
## 5 1            exploit_first     25 0.171 
## 6 1            simultaneously     2 0.0137

strategy performance

## proportions strategies
strategies.raw <- data.main %>% 
  group_by(age_group) %>% 
  dplyr::count(strategy) %>%
  mutate(prop = n/sum(n)) 
strategies.raw # less coupled; irrational/rational/avoid.certain seem equally often

## # A tibble: 10 × 4
## # Groups:   age_group [2]
##    age_group strategy            n   prop
##    <fct>     <fct>           <int>  <dbl>
##  1 3yo       avoid.certain      15 0.103 
##  2 3yo       coupled.confirm     6 0.0411
##  3 3yo       coupled.explore    12 0.0822
##  4 3yo       irrational         17 0.116 
##  5 3yo       rational           12 0.0822
##  6 4yo       avoid.certain      21 0.144 
##  7 4yo       coupled.confirm     4 0.0274
##  8 4yo       coupled.explore     8 0.0548
##  9 4yo       irrational         21 0.144 
## 10 4yo       rational           30 0.205

# gruoped by age

# controls
## familiarization, stuff
table(data.main$order_fam)

## 
##   bilder_first postbote_first 
##             84             62

Main analyses

##GLMMS

Rationale:

Pre-registered analysis: ### GLMM01 - exploit vs. chance 1. We will conduct binomial Generalized Linear Mixed Models (GLMM). The main analysis pertains to the main test phase (trials 1 – 4) and tests the key prediction that children will exploit the certain option significantly above chance (conservative chance level .5) while controlling for participants’ age (z-transformed), trial number (z-transformed), and accounting for individual differences in slope and intercept. To examine whether children’s performance differs significantly from 50% chance, we will fit this binomial mixed effect model (including z-transformed age and trial number as main predictors, random intercepts for participants and random slope for trial number) and suppress the intercept. - Main model: exploit.certain ~ z.age + z.trial + (1 + z.trial| id) - Main model BA: exploit.certain ~ (1 + z.trial| id)

# GLMM01
m1a <- lme4::glmer(exploit.comp ~ 1 + (1 + z.trial|code),
            family = "binomial",
            data = data.main,
            control = glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e4)))

## boundary (singular) fit: see help('isSingular')

summary(m1a)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: exploit.comp ~ 1 + (1 + z.trial | code)
##    Data: data.main
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     193.9     205.8     -93.0     185.9       142 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0309 -0.6790 -0.5273  0.9656  1.5882 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  code   (Intercept) 0.8313   0.9118       
##         z.trial     0.0808   0.2843   1.00
## Number of obs: 146, groups:  code, 37
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -0.7240     0.2465  -2.937  0.00332 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

#GLMM01b w/o pilot data
m1b <- lme4::glmer(exploit.comp ~ 1 + (1 + z.trial|code),
            family = "binomial",
            data = data.main.s,
            control = glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e4)))

## boundary (singular) fit: see help('isSingular')

summary(m1b) # intercept not significant any more

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: exploit.comp ~ 1 + (1 + z.trial | code)
##    Data: data.main.s
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     107.8     117.2     -49.9      99.8        74 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2108 -0.6634 -0.4354  0.7599  1.5690 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  code   (Intercept) 1.50324  1.2261       
##         z.trial     0.06326  0.2515   1.00
## Number of obs: 78, groups:  code, 20
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.5593     0.3815  -1.466    0.143
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

# transforming estimate from logit space
plogis(fixef(m1a)["(Intercept)"])

## (Intercept) 
##   0.3265232

plogis(fixef(m1b)["(Intercept)"])

## (Intercept) 
##   0.3636997

# test against chance included!!

#install.packages("multcomp")
library(multcomp)

## Loading required package: mvtnorm

## Loading required package: survival

## Loading required package: TH.data

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

# against 33% chance level
#install.packages("gtools")
library(gtools)
summary(glht(m1a,
             alternative = "two.sided",
             rhs = gtools::logit(0.33), # gegen was wird getestet (33 % im logit space)
             test = adjusted("none")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lme4::glmer(formula = exploit.comp ~ 1 + (1 + z.trial | code), 
##     data = data.main, family = "binomial", control = glmerControl(optimizer = "bobyqa", 
##         optCtrl = list(maxfun = 20000)))
## 
## Linear Hypotheses:
##                                   Estimate Std. Error z value Pr(>|z|)
## (Intercept) == -0.708185057924486  -0.7240     0.2465  -0.064    0.949
## (Adjusted p values reported -- single-step method)

# intercept = angelegtes chance level
# nicht signifikant von 0.33 verschieden

# against 33% chance w/o pilot data
summary(glht(m1b,
             alternative = "two.sided",
             rhs = gtools::logit(0.33), # gegen was wird getestet (33 % im logit space)
             test = adjusted("none")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lme4::glmer(formula = exploit.comp ~ 1 + (1 + z.trial | code), 
##     data = data.main.s, family = "binomial", control = glmerControl(optimizer = "bobyqa", 
##         optCtrl = list(maxfun = 20000)))
## 
## Linear Hypotheses:
##                                   Estimate Std. Error z value Pr(>|z|)
## (Intercept) == -0.708185057924486  -0.5593     0.3815    0.39    0.696
## (Adjusted p values reported -- single-step method)

GLMM02 - exploit predictors vs. null model

2a. Descriptively, we will analyze how many children spontaneously split their decisions (see above) and which decision they act on first. These factors are further included as predictors to our main model to test for their respective effects on children’s exploitation performance.

2b. Hence, we will extend our main model (specified as below) and run a model comparison with a respective null model lacking only the main and interaction effects of decision.split and decision.order (R function anova with argument ‘test’ set to “Chisq” with p < .05). - Full model: exploit.certain ~ decision.split * decision.order + z.age + z.trial + (1 + z.trial | id) - Null model: exploit.certain ~ 1 + z.age + z.trial + (1 + z.trial | id) - BA Null model: exploit.certain ~ 1 + (1 + z.trial | id)

# specify full model
m2 <- lme4::glmer(exploit.comp ~ 1 + sticker_split*sticker_first + z.age + z.trial + (1 + z.trial|code),
             family = "binomial",
            data = data.main)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00731414 (tol = 0.002, component 1)

## w/o pilot data
m2b <- lme4::glmer(exploit.comp ~ 1 + sticker_split*sticker_first + z.age + z.trial + (1 + z.trial|code),
             family = "binomial",
            data = data.main.s)

## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

## boundary (singular) fit: see help('isSingular')

# specify null model
m2.null <- lme4::glmer(exploit.comp ~ 1 + (1 + z.trial|code),
             family = "binomial",
            data = data.main)

## boundary (singular) fit: see help('isSingular')

## w/o pilot data
m2b.null <- lme4::glmer(exploit.comp ~ 1 + (1 + z.trial|code),
             family = "binomial",
            data = data.main.s)

# model comparison
anova(m2, m2.null, test = "Chisq") # not significant

## Data: data.main
## Models:
## m2.null: exploit.comp ~ 1 + (1 + z.trial | code)
## m2: exploit.comp ~ 1 + sticker_split * sticker_first + z.age + z.trial + (1 + z.trial | code)
##         npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)
## m2.null    4 193.90 205.84 -92.951    185.90                     
## m2        11 198.54 231.36 -88.268    176.54 9.3651  7     0.2275

## w/o pilot data
anova(m2b, m2b.null, test = "Chisq") # marginally significant

## Data: data.main.s
## Models:
## m2b.null: exploit.comp ~ 1 + (1 + z.trial | code)
## m2b: exploit.comp ~ 1 + sticker_split * sticker_first + z.age + z.trial + (1 + z.trial | code)
##          npar   AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)  
## m2b.null    4 107.8 117.22 -49.899    99.797                       
## m2b        10 108.3 131.86 -44.148    88.296 11.501  6    0.07406 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# full model nicht signifikant mehr Var. aufgeklärt (berichten in Klammern: df, Chisq, p-Wert)

# drop1 full model
drop1(m2, test = "Chisq") # jeweiligen Faktor kicken und gucken, ob signifikant zum Effekt beitragen => brauch ich jetzt nicht!

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00774251 (tol = 0.002, component 1)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00353045 (tol = 0.002, component 1)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00259368 (tol = 0.002, component 1)

## Single term deletions
## 
## Model:
## exploit.comp ~ 1 + sticker_split * sticker_first + z.age + z.trial + 
##     (1 + z.trial | code)
##                             npar    AIC    LRT Pr(Chi)  
## <none>                           198.54                 
## z.age                          1 198.65 2.1098 0.14636  
## z.trial                        1 196.75 0.2125 0.64484  
## sticker_split:sticker_first    2 199.62 5.0840 0.07871 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

drop1(m2b, test = "Chisq") # significant interaction... -> Abbildung (nicht für BA nötig -- fehlender Schritt: getrennte Analysen je nach Split ja oder nein, Modell nur mit sticker_frist (subsample gesplittet vs. nicht))

## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
## boundary (singular) fit: see help('isSingular')

## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

## boundary (singular) fit: see help('isSingular')

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

## Single term deletions
## 
## Model:
## exploit.comp ~ 1 + sticker_split * sticker_first + z.age + z.trial + 
##     (1 + z.trial | code)
##                             npar    AIC    LRT Pr(Chi)  
## <none>                           108.30                 
## z.age                          1 106.84 0.5489 0.45875  
## z.trial                        1 106.30 0.0003 0.98640  
## sticker_split:sticker_first    1 111.21 4.9184 0.02657 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Zusatz: Altersgruppen statt z.age
m2.1 <- lme4::glmer(exploit.comp ~ 1 + age_group + z.trial + (1 + z.trial|code),
             family = "binomial",
            data = data.main)

summary(m2.1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: exploit.comp ~ 1 + age_group + z.trial + (1 + z.trial | code)
##    Data: data.main
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     196.1     214.0     -92.1     184.1       140 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0994 -0.6357 -0.5521  0.9695  1.9214 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  code   (Intercept) 0.69891  0.8360       
##         z.trial     0.04085  0.2021   1.00
## Number of obs: 146, groups:  code, 37
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -1.0236     0.3734  -2.741  0.00612 **
## age_group4yo   0.5755     0.4733   1.216  0.22395   
## z.trial        0.1187     0.2020   0.588  0.55680   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ag_gr4
## age_group4y -0.766       
## z.trial      0.062  0.021

# w/o pilot data
m2.1b <- lme4::glmer(exploit.comp ~ 1 + age_group + z.trial + (1 + z.trial|code),
             family = "binomial",
            data = data.main.s)

## boundary (singular) fit: see help('isSingular')

summary(m2.1b) # 4yo still not significantly better than 3yo

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: exploit.comp ~ 1 + age_group + z.trial + (1 + z.trial | code)
##    Data: data.main.s
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     109.5     123.6     -48.7      97.5        72 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2887 -0.6029 -0.4524  0.7409  2.0301 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  code   (Intercept) 1.18585  1.089        
##         z.trial     0.06051  0.246    1.00
## Number of obs: 78, groups:  code, 20
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -1.23556    0.61850  -1.998   0.0458 *
## age_group4yo  1.13572    0.75896   1.496   0.1345  
## z.trial       0.06278    0.28216   0.223   0.8239  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ag_gr4
## age_group4y -0.801       
## z.trial      0.092  0.028
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

GLMM03 - forced choice trial

3a. Regarding the forced-choice in the fifth trial, we will analyze children’s preferences for either explore or exploit choices descriptively. -> descriptive analyses!

3b. Then, we will analyze children’s exploit choices in the final trial in more detail. We will conduct another GLMM to test whether children’s choices to exploit the certain option differ significantly from chance (see 1. main analysis, chance level set at .5) and whether this performance in the final trial is affected by their previous performance (proportion to exploit rationally in the main test trials). - Full model: exploit.certain ~ exploit.prev.performance + z.age + (1 | id) - Null model: exploit.certain ~ 1 + z.age + (1 | id) - BA null model: exploit.certain ~ 1 + (1 | id)

# compute previous performance on subject level
library(dplyr)
subject_prev_perf <- data.main %>%
  filter(trial %in% 1:4) %>% # filter previous trials
  group_by(code) %>% # group by subject
  dplyr::summarise(
    prev_perf_1_4 = mean(as.numeric(as.character(exploit.comp)), na.rm = TRUE)
  ) %>% # calculating mean of numeric versions of binary DV, because it's essentially the proportion!; as.numeric and as.character bc converting directly to numeric would give me levels 1,2 instead of 0 and 1 => character first!
  ungroup()

head(subject_prev_perf) # check if it worked

## # A tibble: 6 × 2
##   code       prev_perf_1_4
##   <fct>              <dbl>
## 1 01_MS46_vj         0.333
## 2 02_GL37_vn         0.75 
## 3 03_HG41_vj         0.25 
## 4 04_AB56_vj         0.75 
## 5 05_IT40_vj         0    
## 6 06_FT40_vj         0.333

# add previous performance to dataset
data.choice.prev <- data.test %>% 
  filter(trial_type == "test_or") %>% 
  filter(!is.na(exploit.comp)) %>% 
  left_join(subject_prev_perf, by = "code") %>% 
  mutate(z.age = scale(as.numeric(age.d)), 
         z.prev.perf = scale(as.numeric(as.character(prev_perf_1_4))))

# specify full model
m3 <- lme4::glmer(exploit.forced ~ z.prev.perf + z.age + (1 |code),  # column name pre_perf_1_4 for whatever reason??
             family = "binomial",
            data = data.choice.prev)

# specify null model
m3.null <- lme4::glmer(exploit.forced ~ 1 + (1 |code),
             family = "binomial",
            data = data.choice.prev)

## boundary (singular) fit: see help('isSingular')

summary(m3.null)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: exploit.forced ~ 1 + (1 | code)
##    Data: data.choice.prev
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##      50.7      53.7     -23.3      46.7        32 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1255 -1.1255  0.8885  0.8885  0.8885 
## 
## Random effects:
##  Groups Name        Variance  Std.Dev. 
##  code   (Intercept) 7.179e-09 8.473e-05
## Number of obs: 34, groups:  code, 34
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   0.2364     0.3454   0.684    0.494
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

# transforming estimate from logit space
plogis(fixef(m3.null)["(Intercept)"])

## (Intercept) 
##   0.5588235

# against 33% chance level
library(gtools)
summary(glht(m3.null,
             alternative = "two.sided",
             rhs = gtools::logit(0.33), 
             test = adjusted("none"))) # significantly better than lenient chance crit

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lme4::glmer(formula = exploit.forced ~ 1 + (1 | code), data = data.choice.prev, 
##     family = "binomial")
## 
## Linear Hypotheses:
##                                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept) == -0.708185057924486   0.2364     0.3454   2.734  0.00625 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

# model comparison
anova(m3, m3.null, test = "Chisq") #m3 klärt signifikant mehr Varianz auf (könnte power sein)

## Data: data.choice.prev
## Models:
## m3.null: exploit.forced ~ 1 + (1 | code)
## m3: exploit.forced ~ z.prev.perf + z.age + (1 | code)
##         npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)  
## m3.null    2 50.662 53.715 -23.331    46.662                       
## m3         4 48.275 54.381 -20.138    40.275 6.3869  2    0.04103 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(m3) #aber keiner der Effekte signifikant in summary... jedenfalls beide in positive Richtung

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: exploit.forced ~ z.prev.perf + z.age + (1 | code)
##    Data: data.choice.prev
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##      48.3      54.4     -20.1      40.3        30 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6345 -0.8039  0.3368  0.7243  1.7265 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  code   (Intercept) 0.09797  0.313   
## Number of obs: 34, groups:  code, 34
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   0.3271     0.4137   0.791    0.429
## z.prev.perf   0.7813     0.5394   1.449    0.147
## z.age         0.4594     0.4389   1.047    0.295
## 
## Correlation of Fixed Effects:
##             (Intr) z.prv.
## z.prev.perf  0.288       
## z.age        0.152 -0.003

## w/o pilot data: new data set
data.choice.prev.s <- data.choice.prev %>% 
  dplyr::filter(!grepl("^p", code)) %>% 
  mutate(z.age = scale(as.numeric(age.d)), 
         z.prev.perf = scale(as.numeric(as.character(prev_perf_1_4))))
  

### specify model w/o pilot data
m3b <- lme4::glmer(exploit.forced ~ z.prev.perf + z.age + (1 |code),  
             family = "binomial",
            data = data.choice.prev.s)

# specify null model w/o pilot data
m3b.null <- lme4::glmer(exploit.forced ~ 1 + (1 |code),
             family = "binomial",
            data = data.choice.prev.s)

## boundary (singular) fit: see help('isSingular')

# transforming estimate from logit space
plogis(fixef(m3b.null)["(Intercept)"]) # performance exactly at 50% chance level

## (Intercept) 
##         0.5

# model comparison w/o pilot data
anova(m3b, m3b.null, test = "Chisq") # klärt signifikant mehr Varianz auf

## Data: data.choice.prev.s
## Models:
## m3b.null: exploit.forced ~ 1 + (1 | code)
## m3b: exploit.forced ~ z.prev.perf + z.age + (1 | code)
##          npar    AIC    BIC   logLik -2*log(L)  Chisq Df Pr(>Chisq)    
## m3b.null    2 28.953 30.734 -12.4766   24.9533                         
## m3b         4 17.090 20.652  -4.5451    9.0901 15.863  2  0.0003592 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(m3b) # previous performance marginally significant --> ganz knapp an 0.05

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: exploit.forced ~ z.prev.perf + z.age + (1 | code)
##    Data: data.choice.prev.s
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##      17.1      20.7      -4.5       9.1        14 
## 
## Scaled residuals: 
##       Min        1Q    Median        3Q       Max 
## -0.075482 -0.000395  0.000000  0.001897  0.109060 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  code   (Intercept) 2007     44.8    
## Number of obs: 18, groups:  code, 18
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.3928     3.6910  -0.106   0.9152  
## z.prev.perf  16.5648     8.6427   1.917   0.0553 .
## z.age         7.6868     5.2253   1.471   0.1413  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) z.prv.
## z.prev.perf  0.061       
## z.age       -0.282  0.503

control analyses

We will run additional analyses on the (converging) 2b GLMM with the control variables sex, order of familiarization, position of boxes, order of hiding, and experimental test setting (lab or kindergarten).

# descriptively (looking for hints at biases)
## grouped by sex
exploit.perf.sex <- data.main %>% 
  group_by(sex) %>% 
  dplyr::count(exploit.comp) %>% 
  mutate(prop = n/sum(n)) 
exploit.perf.sex # no hint at difference

## # A tibble: 4 × 4
## # Groups:   sex [2]
##   sex   exploit.comp     n  prop
##   <fct> <fct>        <int> <dbl>
## 1 f     0               43 0.295
## 2 f     1               27 0.185
## 3 m     0               51 0.349
## 4 m     1               25 0.171

## grouped by order of familiarization
exploit.perf.ofam <- data.main %>% 
  group_by(order_fam) %>% 
  dplyr::count(exploit.comp) %>% 
  mutate(prop = n/sum(n)) 
exploit.perf.ofam # no hint at difference

## # A tibble: 4 × 4
## # Groups:   order_fam [2]
##   order_fam      exploit.comp     n  prop
##   <fct>          <fct>        <int> <dbl>
## 1 bilder_first   0               55 0.377
## 2 bilder_first   1               29 0.199
## 3 postbote_first 0               39 0.267
## 4 postbote_first 1               23 0.158

## grouped by position of target
exploit.perf.tpos <- data.main %>% 
  group_by(position_target) %>% 
  dplyr::count(exploit.comp) %>% 
  mutate(prop = n/sum(n)) 
exploit.perf.tpos # maybe bias?? slightly better performance when target_left (allerdings auch viel öfter links gewesen... - also eher SP Größe?)

## # A tibble: 4 × 4
## # Groups:   position_target [2]
##   position_target exploit.comp     n   prop
##   <fct>           <fct>        <int>  <dbl>
## 1 target_left     0               47 0.322 
## 2 target_left     1               39 0.267 
## 3 target_right    0               47 0.322 
## 4 target_right    1               13 0.0890

### control analyses position of target
mcon1 <- lme4::glmer(exploit.comp ~ 1 + position_target + (1 + z.trial|code),
            family = "binomial",
            data = data.main,
            control = glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e4)))

## boundary (singular) fit: see help('isSingular')

anova(mcon1, m2.null, test = "Chisq") # highly significant

## Data: data.main
## Models:
## m2.null: exploit.comp ~ 1 + (1 + z.trial | code)
## mcon1: exploit.comp ~ 1 + position_target + (1 + z.trial | code)
##         npar   AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)    
## m2.null    4 193.9 205.84 -92.951     185.9                         
## mcon1      5 184.1 199.02 -87.050     174.1 11.802  1  0.0005917 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(mcon1) #scheint bias zu geben

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: exploit.comp ~ 1 + position_target + (1 + z.trial | code)
##    Data: data.main
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     184.1     199.0     -87.0     174.1       141 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2277 -0.5649 -0.4046  0.7565  2.2159 
## 
## Random effects:
##  Groups Name        Variance Std.Dev. Corr
##  code   (Intercept) 1.3256   1.1514       
##         z.trial     0.2156   0.4643   1.00
## Number of obs: 146, groups:  code, 37
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                  -0.1621     0.3168  -0.512  0.60878   
## position_targettarget_right  -1.5068     0.4821  -3.125  0.00178 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## pstn_trgtt_ -0.496
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

## grouped by order of hiding
exploit.perf.ohid <- data.main %>% 
  group_by(hide_first) %>% 
  dplyr::count(exploit.comp) %>% 
  mutate(prop = n/sum(n)) 
exploit.perf.ohid #not hint at bias

## # A tibble: 4 × 4
## # Groups:   hide_first [2]
##   hide_first        exploit.comp     n  prop
##   <fct>             <fct>        <int> <dbl>
## 1 distractors_first 0               49 0.336
## 2 distractors_first 1               27 0.185
## 3 target_first      0               45 0.308
## 4 target_first      1               25 0.171

## grouped by experimental setting
exploit.perf.set <- data.main %>% 
  group_by(where) %>% 
  dplyr::count(exploit.comp) %>% 
  mutate(prop = n/sum(n)) 
exploit.perf.set #hint at better in WW (but again, more tested in WW)

## # A tibble: 4 × 4
## # Groups:   where [2]
##   where exploit.comp     n  prop
##   <fct> <fct>        <int> <dbl>
## 1 Kita  0               47 0.322
## 2 Kita  1               19 0.130
## 3 WW    0               47 0.322
## 4 WW    1               33 0.226

### control analyses setting
mcon2 <- lme4::glmer(exploit.comp ~ 1 + where + (1 + z.trial|code),
            family = "binomial",
            data = data.main,
            control = glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e4)))

## boundary (singular) fit: see help('isSingular')

anova(mcon2, m2.null, test = "Chisq") # marginally significant, possibly artefact more WW

## Data: data.main
## Models:
## m2.null: exploit.comp ~ 1 + (1 + z.trial | code)
## mcon2: exploit.comp ~ 1 + where + (1 + z.trial | code)
##         npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)  
## m2.null    4 193.90 205.84 -92.951    185.90                       
## mcon2      5 193.15 208.07 -91.575    183.15 2.7508  1    0.09721 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plotting

# histogram strategies
#install.packages("ggplot2")
library(ggplot2)

data.main$strategy <- factor(data.main$strategy, levels = c("rational", "irrational", "avoid.certain", "coupled.confirm", "coupled.explore"))

ggplot(data.main, aes(x = strategy)) +
  geom_bar(aes(y = ..prop.., group = 1), fill = "#275D97") +
  scale_y_continuous(limits = c(0, 1)) +
  theme(
    panel.background = element_blank(),
    axis.title = element_text(size = 10),  # Font size for axis titles
    axis.text = element_text(size = 8)    # Font size for tick labels
  ) +
  labs(
    x = "Strategy",
    y = "Proportion"
  )

## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# trials 1-4 proportion correct responses w/ wilson ci

#install.packages("binom")
library(binom)

# compute wilson CIs
data.plot.wilson <- data.main %>%
  filter(!is.na(exploit.comp)) %>%  # Exclude NAs
  group_by(age_group, trial) %>%
  dplyr::summarise(
    n = n(),
    successes = sum(as.numeric(as.character(exploit.comp)), na.rm = TRUE),
    .groups = "drop"
  ) %>%
  rowwise() %>%                                                      # each row independent
  dplyr::mutate(
    prop = successes / n,                                            # proportion correct for this group
    lower = binom.confint(successes, n, method = "wilson")$lower,       
    upper = binom.confint(successes, n, method = "wilson")$upper     # calculate the Wilson confidence interval for a binomial proportion
  ) %>%
  ungroup()

# set dodge so that data points etc. don't overlap
dodge_width <- 0.2

# plotting
p1 <- ggplot(data.plot.wilson, aes(x = as.numeric(trial), y = prop, color = age_group, group = age_group)) +
  geom_line(position = position_dodge(width = dodge_width), size = 1) + # include dodge
  geom_point(position = position_dodge(width = dodge_width), size = 3) + # include dodge
  geom_errorbar(
    aes(ymin = lower, ymax = upper),
    position = position_dodge(width = dodge_width), # include dodge
    width = 0.1
  ) +
  geom_hline(yintercept = 0.50, linetype = "dashed") +  # 50% chance level
  geom_hline(yintercept = 0.33, linetype = "dotted") +  # 33% chance level
  scale_x_continuous(breaks = 1:4, name = "Trial") +
  scale_y_continuous(name = "Proportion of Correct Choices", limits = c(0, 1)) +
  scale_color_manual(values = c("#275D97", "gold")) +
  labs(color = "Age Group") +
  theme_bw(base_size = 10) +
  theme(
    panel.border = element_blank(), # remove panel border
    panel.background = element_blank(), # blank background inside plotting area
    panel.grid.major = element_blank(), # remove grid lines
    panel.grid.minor = element_blank(), # remove grid lines
    axis.line = element_line(color = "black"), # show axis lines
    axis.ticks = element_line(color = "black"),
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8)
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# trial 5 proportions correct choices w/ wilson ci

library(binom)

# compute wilson CI
data.plot.trial5.wilson <- data.choice %>%
  filter(!is.na(exploit.comp)) %>%  # Exclude NAs
  group_by(age_group, trial_type) %>%
  dplyr::summarise(
    n = n(),
    successes = sum(as.numeric(as.character(exploit.comp)), na.rm = TRUE),
    .groups = "drop"
  ) %>%
  rowwise() %>%                                                      # each row independent
  dplyr::mutate(
    prop = successes / n,                                            # proportion correct for this group
    lower = binom.confint(successes, n, method = "wilson")$lower,       
    upper = binom.confint(successes, n, method = "wilson")$upper     # calculate the Wilson confidence interval for a binomial proportion
  ) %>%
  ungroup()

# Define dodge
dodge_width <- 0.2

# plotting
p2 <- ggplot(data.plot.trial5.wilson, aes(x = trial_type, y = prop, color = age_group, group = age_group)) +
  geom_point(position = position_dodge(width = dodge_width), size = 3, show.legend = FALSE) +
  geom_errorbar(
    aes(ymin = lower, ymax = upper),
    position = position_dodge(width = dodge_width),
    show.legend = FALSE,
    width = 0.1
  ) +
  geom_hline(yintercept = 0.50, linetype = "dashed") +  # 50% chance level
  geom_hline(yintercept = 0.33, linetype = "dotted") +  # 33% chance level
  scale_x_discrete(name = "Forced Choice", labels = c("test_or" = "5")) +
  scale_y_continuous(name = NULL, limits = c(0, 1)) +
  scale_color_manual(values = c("#275D97", "gold")) +
  labs(color = "Age Group") +
  theme_bw(base_size = 10) +
  theme(
    panel.border = element_blank(),
    panel.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(color = "black"),
    axis.ticks = element_line(color = "black"),
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 8)
  )

# combine two plots into single figure
# install.packages("patchwork")
library(patchwork)

## 
## Attaching package: 'patchwork'

## The following object is masked from 'package:MASS':
## 
##     area

(p1 + p2) + 
  plot_layout(widths = c(2, 1), guides = "collect") & 
  theme(legend.position = "right")

Save workspace

save.image(file = "Unsi-Ex BA Analyses.RData")

Unsi_ex BA Analyses

2025-06-20