Reading and data cleaning

sl <- read_csv(here("data/SLdata.csv"))

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X1 = col_character(),
##   AGE = col_logical()
## )

## See spec(...) for full column specifications.

d <- sl %>%
  rename(subnum = SUBNO,
         age_group = AGEGROUP) %>%
  select(subnum, age_group, H12:H1, F1:F3, R1:R3) %>%
  gather(trial, looking_time, H12:H1, F1:F3, R1:R3) %>%
  mutate(looking_time = ifelse(looking_time > .01, looking_time, NA), 
         trial_type = ifelse(str_detect(trial, "H"), "habituation",
                             ifelse(str_detect(trial, "R"), "random","familiar")), 
         trial_num = as.numeric(str_replace(trial, "[HRF]","")), 
         trial_num = ifelse(trial_type == "habituation", -trial_num, trial_num), 
         age_group = (as.numeric(age_group) * 3)-1)

Visualizations

Histograms of looking times across trials (including habituation).

ggplot(d, aes(x = looking_time, fill = trial_type)) + 
  geom_histogram(binwidth = 5) + 
  facet_wrap(~trial_num, scales = "free_y") + 
  theme_bw()

## Warning: Removed 120 rows containing non-finite values (stat_bin).

Broken down further for test trials:

ggplot(filter(d, trial_type != "habituation"), 
       aes(x = looking_time, fill = age_group)) + 
  geom_histogram(binwidth = 5) + 
  facet_grid(interaction(trial_type,age_group)~trial_num, scales = "free_y") + 
  theme_bw()

This time logged.

ggplot(filter(d, trial_type != "habituation"), 
       aes(x = log(looking_time), fill = age_group)) + 
  geom_histogram(binwidth = .5) + 
  facet_grid(interaction(trial_type,age_group)~trial_num, scales = "free_y") + 
  theme_bw()

Linear looking time.

ggplot(d, aes(x = trial_num, y = looking_time, col = trial_type)) + 
  geom_point() + 
  geom_smooth(span = 2) + 
  xlim(-6, 3) + 
  facet_grid(~age_group) + 
  theme_bw()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 306 rows containing non-finite values (stat_smooth).

## Warning: Removed 306 rows containing missing values (geom_point).

ggplot(filter(d, trial_type != "habituation"),  
       aes(x = trial_num, y = looking_time, col = trial_type)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_grid(~age_group) + 
  theme_bw()

## `geom_smooth()` using formula 'y ~ x'

Log looking time

ggplot(d, 
       aes(x = trial_num, y = log(looking_time), col = trial_type)) + 
  geom_point() + 
  geom_smooth(span = 2) + 
  xlim(-6, 3) +
  ylim(0,5) + 
  facet_grid(~age_group) + 
  theme_bw()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 307 rows containing non-finite values (stat_smooth).

## Warning: Removed 307 rows containing missing values (geom_point).

ggplot(filter(d, trial_type != "habituation"),  
       aes(x = trial_num, y = log(looking_time), col = trial_type)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  ylim(0,5) + 
  facet_grid(~age_group) + 
  theme_bw()

## `geom_smooth()` using formula 'y ~ x'

Statistical Models

Reproduce the original analyses

Infants in all three age groups exhibited longer looking at the novel sequence, relative to the familiar sequence, after habituation (see Fig. 2). Twelve of the 16 2-month-olds showed this preference (Wilcoxon matched pairs test, z 1⁄4 2:38, P , 0:05), as did 11 of the 16 5-month-olds (z 1⁄4 2:33, P , 0:05) and 12 of the 16 8-month-olds (z 1⁄4 2:02, P , 0:05). (The Wilcoxon statistic takes account of the magnitude of differences in performance, and is therefore a more sensitive test of infants’ preferences than, say, a sign test.)

d %>%
  filter(trial_type != "habituation") %>%
  group_by(age_group, subnum, trial_type) %>%
  summarise(lt = mean(looking_time)) %>%
  spread(trial_type, lt) %>%
  mutate(longer = random > familiar) %>%
  group_by(age_group) %>%
  summarise(n = n(), 
            longer = sum(longer)) %>% 
  kable()

## `summarise()` regrouping output by 'age_group', 'subnum' (override with `.groups` argument)

## `summarise()` ungrouping output (override with `.groups` argument)

age_group	n	longer
2	16	12
5	16	11
8	16	12

Nicely reproduce these numbers.

These conclusions were confirmed with parametric analyses. Looking time data in some cells were positively skewed (which is often the case in visual habituation procedures with young infants), and all data were therefore log-transformed prior to analysis; data shown in Fig. 2 are based on raw scores. A 2 (sex: male vs. female) £ 3 (age: 2, 5, or 8 months)£2 (order: familiar vs. novel sequence seen first after habituation) £ 2 (test display: familiar vs. novel sequence) mixed ANOVA yielded a significant main effect of age (F(2, 36) 1⁄4 21:10, P , 0:001), the result of longer looking overall by the youngest infants. (Very young infants typically exhibit longer looking times than older infants in visual tasks, which may reflect developmental differences in infants’ basic information processing skills (see Johnson, 1996).) There was also a significant main effect of test display (F(1, 36) 1⁄4 14:67, P , 0:001), the result of longer looking overall at the novel sequence. There were no other significant main effects or interactions.

d_anova <- filter(d, trial_type %in% c("familiar","random")) %>%
  mutate(log_looking_time = log(looking_time), 
         f_age_group = as.factor(age_group))

ezANOVA(data = d_anova, 
        dv = log_looking_time, 
        wid = subnum, 
        within = trial_type, 
        between = f_age_group) %>% 
  kable(digits = 3)

## Warning: Converting "subnum" to factor for ANOVA.

## Warning: Converting "trial_type" to factor for ANOVA.

## Warning: Collapsing data to cell means. *IF* the requested effects are a subset
## of the full design, you must use the "within_full" argument, else results may be
## inaccurate.

	Effect	DFn	DFd	F	p	p<.05	ges
2	f_age_group	2	45	20.905	0.000	*	0.413
3	trial_type	1	45	15.667	0.000	*	0.078
4	f_age_group:trial_type	2	45	0.087	0.917		0.001

Overall, we see the same effects coming out quite nicely, though the numbers are different. I confess I don’t know why they have 36 degrees of freedom for F tests rather than the 45 (compared with 48 participants) that we get.

Planned comparisons (simple effects tests) revealed a reliable preference for the novel sequence in each age group (2-month-olds, Fð1; 36Þ 1⁄4 4:30, P , 0:05; 5-month-olds, Fð1; 36Þ 1⁄4 7:00, P , 0:05; 8-month-olds, Fð1; 36Þ 1⁄4 4:15, P , 0:05). (Simple effects tests take into account the omnibus error term of the ANOVA, and are the functional equivalent of a series of more conservative t-tests to examine preferences in each age group.)

Not doing this right now.

Mixed effects

d_mod <- filter(d, trial_type %in% c("familiar","random"))

linear_mod <- lmer(looking_time ~ age_group * trial_type + 
                 (1 | subnum), 
     data = d_mod)

log_mod <- lmer(log(looking_time) ~ age_group * trial_type + 
                 (1 | subnum), 
     data = d_mod)

Linear LT model

summary(linear_mod)

## Linear mixed model fit by REML ['lmerMod']
## Formula: looking_time ~ age_group * trial_type + (1 | subnum)
##    Data: d_mod
## 
## REML criterion at convergence: 2387.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2320 -0.4085 -0.1100  0.2405  4.0643 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subnum   (Intercept) 102.4    10.12   
##  Residual             191.2    13.83   
## Number of obs: 288, groups:  subnum, 48
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)                 27.2884     4.2292   6.452
## age_group                   -2.9349     0.7596  -3.864
## trial_typerandom            10.2671     3.7037   2.772
## age_group:trial_typerandom  -1.0040     0.6652  -1.509
## 
## Correlation of Fixed Effects:
##             (Intr) ag_grp trl_ty
## age_group   -0.898              
## trl_typrndm -0.438  0.393       
## ag_grp:trl_  0.393 -0.438 -0.898

summary(linear_mod)$coef %>%
  kable(digits = 3)

Es	timate St	d. Error t	value
(Intercept)	27.288	4.229	6.452
age_group	-2.935	0.760	-3.864
trial_typerandom	10.267	3.704	2.772
age_group:trial_typerandom	-1.004	0.665	-1.509

Log LT model

summary(log_mod)

## Linear mixed model fit by REML ['lmerMod']
## Formula: log(looking_time) ~ age_group * trial_type + (1 | subnum)
##    Data: d_mod
## 
## REML criterion at convergence: 675.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.76909 -0.66255 -0.03474  0.67629  2.51992 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subnum   (Intercept) 0.1561   0.3950  
##  Residual             0.4895   0.6996  
## Number of obs: 288, groups:  subnum, 48
## 
## Fixed effects:
##                             Estimate Std. Error t value
## (Intercept)                 2.925699   0.185364  15.784
## age_group                  -0.167578   0.033292  -5.034
## trial_typerandom            0.341959   0.187412   1.825
## age_group:trial_typerandom -0.007327   0.033660  -0.218
## 
## Correlation of Fixed Effects:
##             (Intr) ag_grp trl_ty
## age_group   -0.898              
## trl_typrndm -0.506  0.454       
## ag_grp:trl_  0.454 -0.506 -0.898

summary(log_mod)$coef %>%
  kable(digits = 3)

Es	timate St	d. Error t	value
(Intercept)	2.926	0.185	15.784
age_group	-0.168	0.033	-5.034
trial_typerandom	0.342	0.187	1.825
age_group:trial_typerandom	-0.007	0.034	-0.218

Additive vs. Multiplicative Effects in Kirkham, Slemmer, & Johnson

Mike Frank