library(tidyverse)
library(here)

## here() starts at /Users/mcfrank/Projects/eals

library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

Read data.

d <- read_csv(here("study2","EALS_study2_concatenated_allwaves.csv"))

## Rows: 123 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): participant_video_id, gender_male1, condition, datawave
## dbl (5): age_m, age_yr, ok_video_present, bid_help_toy, first_bid_toy_s
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Descriptives.

d |> 
  group_by(datawave, condition) |>
  summarise(mean_age = mean(age_m/12), 
            prop_male = mean(gender_male1 == "Male"), 
            n = n()) |>
  knitr::kable(digits = 2)

## `summarise()` has grouped output by 'datawave'. You can override using the
## `.groups` argument.

datawave	condition	mean_age	prop_male	n
spring 23 pilot	Reliable	5.25	0.41	17
spring 23 pilot	Unreliable	5.13	0.28	18
summer fall 23 data	Reliable	5.43	0.47	30
summer fall 23 data	Unreliable	5.46	0.53	32
winter 24 pilot	Unreliable	5.46	0.42	26

Plot condition effects

Let’s not do cool survival curves and just look at the simple first bid time.

ggplot(d, aes(x = condition, y = first_bid_toy_s)) + 
  geom_jitter(height = 0, width = .2) + 
  stat_summary(fun.data = "mean_cl_boot", col = "red") + 
  facet_wrap(~datawave) +
  scale_y_log10() + 
  ylab("First bid (s)")

ggplot(d, aes(x = condition, y = bid_help_toy)) + 
  geom_jitter(height = .05, width = .2) + 
  stat_summary(fun.data = "mean_cl_boot", col = "red") + 
  facet_wrap(~datawave) +
  ylab("Bids for help")

Exploration

Relations between exploration time and demographics.

ggplot(d, aes(x = age_m, y = first_bid_toy_s, col = datawave)) + 
  geom_point() + 
  geom_smooth(method = "lm", aes(group = 1), col = "black", lty = 2) + 
  geom_smooth(method = "lm", se = FALSE) + 
  scale_y_log10() + 
  ylab("First bid (s)")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Add gender.

ggplot(d, aes(x = age_m, y = first_bid_toy_s, col = gender_male1)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = TRUE) + 
  scale_y_log10() + 
  ylab("First bid (s)")

## `geom_smooth()` using formula = 'y ~ x'

Models

Trying some kitchen sink models to understand data.

summary(lm(log(first_bid_toy_s) ~ condition, 
             data = d))

## 
## Call:
## lm(formula = log(first_bid_toy_s) ~ condition, data = d)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.90227 -0.67253 -0.02091  1.03931  1.59754 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           3.5954     0.1524  23.591  < 2e-16 ***
## conditionUnreliable   0.5582     0.1939   2.879  0.00472 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.045 on 121 degrees of freedom
## Multiple R-squared:  0.06411,    Adjusted R-squared:  0.05638 
## F-statistic: 8.289 on 1 and 121 DF,  p-value: 0.004718

Add age.

summary(lm(log(first_bid_toy_s) ~ condition * age_m, 
             data = d))

## 
## Call:
## lm(formula = log(first_bid_toy_s) ~ condition * age_m, data = d)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.71289 -0.66538  0.05969  0.86352  1.94545 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                2.137738   1.006453   2.124   0.0357 *
## conditionUnreliable        0.810913   1.312850   0.618   0.5380  
## age_m                      0.022648   0.015460   1.465   0.1456  
## conditionUnreliable:age_m -0.003989   0.020150  -0.198   0.8434  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.035 on 119 degrees of freedom
## Multiple R-squared:  0.09624,    Adjusted R-squared:  0.07346 
## F-statistic: 4.224 on 3 and 119 DF,  p-value: 0.007074

Try a mixed effects model. This model doesn’t converge because not all waves have all conditions, but it does show some condition effect.

summary(lmer(log(first_bid_toy_s) ~ condition + (condition | datawave), 
             data = d))

## boundary (singular) fit: see help('isSingular')

## Linear mixed model fit by REML ['lmerMod']
## Formula: log(first_bid_toy_s) ~ condition + (condition | datawave)
##    Data: d
## 
## REML criterion at convergence: 361.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.79336 -0.70904 -0.00316  0.85360  1.53759 
## 
## Random effects:
##  Groups   Name                Variance Std.Dev. Corr
##  datawave (Intercept)         0.00000  0.0000       
##           conditionUnreliable 0.03208  0.1791    NaN
##  Residual                     1.07950  1.0390       
## Number of obs: 123, groups:  datawave, 3
## 
## Fixed effects:
##                     Estimate Std. Error t value
## (Intercept)           3.5954     0.1516  23.724
## conditionUnreliable   0.5790     0.2195   2.638
## 
## Correlation of Fixed Effects:
##             (Intr)
## cndtnUnrlbl -0.690
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

EALS Study 2 analysis

Mike Frank

2024-02-26

Plot condition effects

Exploration

Models