Load library

library(tidyverse)
## ── Attaching packages ────────────────────
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(haven)
library(here)
## here() starts at /Users/angelinetsui/Desktop/Projects/log_LT_revisit-master

Kirkham’s dataset

Data and variable cleaning copied part of the codes from Mike:

df_kirk <- Kirkham %>%
  rename(subnum = SUBNO,
         age_group = AGEGROUP) %>%
  select(subnum, age_group, H12:H1, F1:F3, R1:R3) %>%
  gather(trial, looking_time, H12:H1, F1:F3, R1:R3) %>%
  mutate(looking_time = ifelse(looking_time > .01, looking_time, NA), 
         Ln_LT = log(looking_time),
         trial_type = ifelse(str_detect(trial, "H"), "habituation",
                             ifelse(str_detect(trial, "R"), "random","familiar")), 
         trial_num = as.numeric(str_replace(trial, "[HRF]","")), 
         trial_num = ifelse(trial_type == "habituation", -trial_num, trial_num), 
         age_group = (as.numeric(age_group) * 3)-1)

Visualization

Look at the habituation data (as this indicates infants’ cognitive processing), and see how the looking time (LT) varies as a function of the trial number. Does the distribution look log-normally distributed?

Raw looking time (LT) and trial num.

The relation between raw LT and trail num does not look linear, so probably not linear, but unsure if this is log-normally distributed

ggplot(data = (df_kirk %>% filter(trial_type == "habituation")), aes(y = looking_time, x = trial_num)) + 
  geom_jitter() + 
  geom_smooth() +
  facet_wrap(~age_group, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 120 rows containing non-finite values (stat_smooth).
## Warning: Removed 120 rows containing missing values (geom_point).

# What if we collapse the age groups together
ggplot(data = (df_kirk %>% filter(trial_type == "habituation")), aes(y = looking_time, x = trial_num)) + 
  geom_jitter() + 
  geom_smooth() +
  #facet_wrap(~age_group, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 120 rows containing non-finite values (stat_smooth).

## Warning: Removed 120 rows containing missing values (geom_point).

Log-transformed LT and trial number.

The relation for looks a bit more linear

ggplot(data = (df_kirk %>% filter(trial_type == "habituation")), aes(y = Ln_LT, x = trial_num)) + 
  geom_jitter() + 
  geom_smooth() +
  facet_wrap(~age_group, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 120 rows containing non-finite values (stat_smooth).
## Warning: Removed 120 rows containing missing values (geom_point).

# What if we collapse the age groups together
ggplot(data = (df_kirk %>% filter(trial_type == "habituation")), aes(y = Ln_LT, x = trial_num)) + 
  geom_jitter() + 
  geom_smooth() +
  #facet_wrap(~age_group, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 120 rows containing non-finite values (stat_smooth).

## Warning: Removed 120 rows containing missing values (geom_point).

Mean and SD relation

df_kirk_m_sd <- df_kirk %>% 
  filter(trial_type == "habituation", !is.na(looking_time)) %>% 
  group_by(subnum) %>% 
  mutate(mean_LT = mean(looking_time),
          mean_Ln_LT = mean(Ln_LT),
          sd_LT = sd(looking_time),
          sd_Ln_LT = sd(Ln_LT)) %>% 
  select(subnum, mean_LT, sd_LT, mean_Ln_LT, sd_Ln_LT) %>% 
  unique()

plot mean and sd

So just like Csibra’s data, log-normal mean LT and log-normal SD have no relation at all, but raw mean LT and raw SD have some relations

ggplot(df_kirk_m_sd, aes(x = mean_LT, y = sd_LT)) +
  geom_jitter(alpha = 0.3) +
  geom_smooth(method = "lm") +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

ggplot(df_kirk_m_sd, aes(x = mean_Ln_LT, y = sd_Ln_LT)) +
  geom_jitter(alpha = 0.3) +
  geom_smooth(method = "lm") +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

MB 1 dataset

Let us just look at the relation between looking time and trial num. It looks really linear……

ggplot(data = Mb1, aes(y = looking_time, x = trial_num)) + 
  geom_jitter(alpha = 0.03) + 
  geom_smooth() +
  facet_wrap(~age_group, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Removed 5986 rows containing missing values (geom_point).

ggplot(data = Mb1, aes(y = looking_time, x = trial_num)) + 
  geom_jitter(alpha = 0.03) + 
  geom_smooth() +
  facet_wrap(~lab, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
## Warning: Removed 5986 rows containing missing values (geom_point).

# What if we collapse the age groups together
ggplot(data = Mb1, aes(y = looking_time, x = trial_num)) + 
  geom_jitter(alpha = 0.03) + 
  geom_smooth() +
  #facet_wrap(~age_group, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).

## Warning: Removed 5986 rows containing missing values (geom_point).

Log-transformed LT and trial number.

The relation for looks a bit more linear

ggplot(data = Mb1, aes(y = Ln_LT, x = trial_num)) + 
  geom_jitter(alpha = 0.03) + 
  geom_smooth() +
  facet_wrap(~age_group, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Removed 5986 rows containing missing values (geom_point).

ggplot(data = Mb1, aes(y = Ln_LT, x = trial_num)) + 
  geom_jitter(alpha = 0.03) + 
  geom_smooth() +
  facet_wrap(~lab, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
## Warning: Removed 5986 rows containing missing values (geom_point).

# What if we collapse the age groups together
ggplot(data = Mb1, aes(y = Ln_LT, x = trial_num)) + 
  geom_jitter(alpha = 0.03) + 
  geom_smooth() +
  #facet_wrap(~age_group, scales = "free_y") + 
  theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).

## Warning: Removed 5986 rows containing missing values (geom_point).

Mean and SD relation

MB1_m_sd <- Mb1 %>% 
  filter(!is.na(looking_time)) %>% 
  group_by(subid, age_group) %>% 
  mutate(mean_LT = mean(looking_time),
          mean_Ln_LT = mean(Ln_LT),
          sd_LT = sd(looking_time),
          sd_Ln_LT = sd(Ln_LT)) %>% 
  select(subid, age_group, mean_LT, sd_LT, mean_Ln_LT, sd_Ln_LT) %>% 
  unique()

plot mean and sd

The relation between mean and sd is attenuated after log-transforming LT

ggplot(MB1_m_sd, aes(x = mean_LT, y = sd_LT)) +
  geom_jitter(alpha = 0.3) +
  geom_smooth(method = "lm") +
  facet_grid(~age_group) +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 40 rows containing non-finite values (stat_smooth).
## Warning: Removed 40 rows containing missing values (geom_point).

ggplot(MB1_m_sd, aes(x = mean_Ln_LT, y = sd_Ln_LT)) +
  geom_jitter(alpha = 0.3) +
  geom_smooth(method = "lm") +
  facet_grid(~age_group) +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 40 rows containing non-finite values (stat_smooth).

## Warning: Removed 40 rows containing missing values (geom_point).