library(tidyverse)
library(here)
## here() starts at /Users/mcfrank/Projects/eals
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
Read data.
d <- read_csv(here("study2","EALS_study2_concatenated_allwaves.csv"))
## Rows: 123 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): participant_video_id, gender_male1, condition, datawave
## dbl (5): age_m, age_yr, ok_video_present, bid_help_toy, first_bid_toy_s
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Descriptives.
d |>
group_by(datawave, condition) |>
summarise(mean_age = mean(age_m/12),
prop_male = mean(gender_male1 == "Male"),
n = n()) |>
knitr::kable(digits = 2)
## `summarise()` has grouped output by 'datawave'. You can override using the
## `.groups` argument.
datawave | condition | mean_age | prop_male | n |
---|---|---|---|---|
spring 23 pilot | Reliable | 5.25 | 0.41 | 17 |
spring 23 pilot | Unreliable | 5.13 | 0.28 | 18 |
summer fall 23 data | Reliable | 5.43 | 0.47 | 30 |
summer fall 23 data | Unreliable | 5.46 | 0.53 | 32 |
winter 24 pilot | Unreliable | 5.46 | 0.42 | 26 |
Let’s not do cool survival curves and just look at the simple first bid time.
ggplot(d, aes(x = condition, y = first_bid_toy_s)) +
geom_jitter(height = 0, width = .2) +
stat_summary(fun.data = "mean_cl_boot", col = "red") +
facet_wrap(~datawave) +
scale_y_log10() +
ylab("First bid (s)")
ggplot(d, aes(x = condition, y = bid_help_toy)) +
geom_jitter(height = .05, width = .2) +
stat_summary(fun.data = "mean_cl_boot", col = "red") +
facet_wrap(~datawave) +
ylab("Bids for help")
Relations between exploration time and demographics.
ggplot(d, aes(x = age_m, y = first_bid_toy_s, col = datawave)) +
geom_point() +
geom_smooth(method = "lm", aes(group = 1), col = "black", lty = 2) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_log10() +
ylab("First bid (s)")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Add gender.
ggplot(d, aes(x = age_m, y = first_bid_toy_s, col = gender_male1)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE) +
scale_y_log10() +
ylab("First bid (s)")
## `geom_smooth()` using formula = 'y ~ x'
Trying some kitchen sink models to understand data.
summary(lm(log(first_bid_toy_s) ~ condition,
data = d))
##
## Call:
## lm(formula = log(first_bid_toy_s) ~ condition, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.90227 -0.67253 -0.02091 1.03931 1.59754
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.5954 0.1524 23.591 < 2e-16 ***
## conditionUnreliable 0.5582 0.1939 2.879 0.00472 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.045 on 121 degrees of freedom
## Multiple R-squared: 0.06411, Adjusted R-squared: 0.05638
## F-statistic: 8.289 on 1 and 121 DF, p-value: 0.004718
Add age.
summary(lm(log(first_bid_toy_s) ~ condition * age_m,
data = d))
##
## Call:
## lm(formula = log(first_bid_toy_s) ~ condition * age_m, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.71289 -0.66538 0.05969 0.86352 1.94545
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.137738 1.006453 2.124 0.0357 *
## conditionUnreliable 0.810913 1.312850 0.618 0.5380
## age_m 0.022648 0.015460 1.465 0.1456
## conditionUnreliable:age_m -0.003989 0.020150 -0.198 0.8434
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.035 on 119 degrees of freedom
## Multiple R-squared: 0.09624, Adjusted R-squared: 0.07346
## F-statistic: 4.224 on 3 and 119 DF, p-value: 0.007074
Try a mixed effects model. This model doesn’t converge because not all waves have all conditions, but it does show some condition effect.
summary(lmer(log(first_bid_toy_s) ~ condition + (condition | datawave),
data = d))
## boundary (singular) fit: see help('isSingular')
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(first_bid_toy_s) ~ condition + (condition | datawave)
## Data: d
##
## REML criterion at convergence: 361.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.79336 -0.70904 -0.00316 0.85360 1.53759
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## datawave (Intercept) 0.00000 0.0000
## conditionUnreliable 0.03208 0.1791 NaN
## Residual 1.07950 1.0390
## Number of obs: 123, groups: datawave, 3
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.5954 0.1516 23.724
## conditionUnreliable 0.5790 0.2195 2.638
##
## Correlation of Fixed Effects:
## (Intr)
## cndtnUnrlbl -0.690
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')