Load library
library(tidyverse)
## ── Attaching packages ────────────────────
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(haven)
library(here)
## here() starts at /Users/angelinetsui/Desktop/Projects/log_LT_revisit-master
Data and variable cleaning copied part of the codes from Mike:
df_kirk <- Kirkham %>%
rename(subnum = SUBNO,
age_group = AGEGROUP) %>%
select(subnum, age_group, H12:H1, F1:F3, R1:R3) %>%
gather(trial, looking_time, H12:H1, F1:F3, R1:R3) %>%
mutate(looking_time = ifelse(looking_time > .01, looking_time, NA),
Ln_LT = log(looking_time),
trial_type = ifelse(str_detect(trial, "H"), "habituation",
ifelse(str_detect(trial, "R"), "random","familiar")),
trial_num = as.numeric(str_replace(trial, "[HRF]","")),
trial_num = ifelse(trial_type == "habituation", -trial_num, trial_num),
age_group = (as.numeric(age_group) * 3)-1)
Look at the habituation data (as this indicates infants’ cognitive processing), and see how the looking time (LT) varies as a function of the trial number. Does the distribution look log-normally distributed?
The relation between raw LT and trail num does not look linear, so probably not linear, but unsure if this is log-normally distributed
ggplot(data = (df_kirk %>% filter(trial_type == "habituation")), aes(y = looking_time, x = trial_num)) +
geom_jitter() +
geom_smooth() +
facet_wrap(~age_group, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 120 rows containing non-finite values (stat_smooth).
## Warning: Removed 120 rows containing missing values (geom_point).
# What if we collapse the age groups together
ggplot(data = (df_kirk %>% filter(trial_type == "habituation")), aes(y = looking_time, x = trial_num)) +
geom_jitter() +
geom_smooth() +
#facet_wrap(~age_group, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 120 rows containing non-finite values (stat_smooth).
## Warning: Removed 120 rows containing missing values (geom_point).
The relation for looks a bit more linear
ggplot(data = (df_kirk %>% filter(trial_type == "habituation")), aes(y = Ln_LT, x = trial_num)) +
geom_jitter() +
geom_smooth() +
facet_wrap(~age_group, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 120 rows containing non-finite values (stat_smooth).
## Warning: Removed 120 rows containing missing values (geom_point).
# What if we collapse the age groups together
ggplot(data = (df_kirk %>% filter(trial_type == "habituation")), aes(y = Ln_LT, x = trial_num)) +
geom_jitter() +
geom_smooth() +
#facet_wrap(~age_group, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 120 rows containing non-finite values (stat_smooth).
## Warning: Removed 120 rows containing missing values (geom_point).
df_kirk_m_sd <- df_kirk %>%
filter(trial_type == "habituation", !is.na(looking_time)) %>%
group_by(subnum) %>%
mutate(mean_LT = mean(looking_time),
mean_Ln_LT = mean(Ln_LT),
sd_LT = sd(looking_time),
sd_Ln_LT = sd(Ln_LT)) %>%
select(subnum, mean_LT, sd_LT, mean_Ln_LT, sd_Ln_LT) %>%
unique()
So just like Csibra’s data, log-normal mean LT and log-normal SD have no relation at all, but raw mean LT and raw SD have some relations
ggplot(df_kirk_m_sd, aes(x = mean_LT, y = sd_LT)) +
geom_jitter(alpha = 0.3) +
geom_smooth(method = "lm") +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
ggplot(df_kirk_m_sd, aes(x = mean_Ln_LT, y = sd_Ln_LT)) +
geom_jitter(alpha = 0.3) +
geom_smooth(method = "lm") +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
Let us just look at the relation between looking time and trial num. It looks really linear……
ggplot(data = Mb1, aes(y = looking_time, x = trial_num)) +
geom_jitter(alpha = 0.03) +
geom_smooth() +
facet_wrap(~age_group, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Removed 5986 rows containing missing values (geom_point).
ggplot(data = Mb1, aes(y = looking_time, x = trial_num)) +
geom_jitter(alpha = 0.03) +
geom_smooth() +
facet_wrap(~lab, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
## Warning: Removed 5986 rows containing missing values (geom_point).
# What if we collapse the age groups together
ggplot(data = Mb1, aes(y = looking_time, x = trial_num)) +
geom_jitter(alpha = 0.03) +
geom_smooth() +
#facet_wrap(~age_group, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Removed 5986 rows containing missing values (geom_point).
The relation for looks a bit more linear
ggplot(data = Mb1, aes(y = Ln_LT, x = trial_num)) +
geom_jitter(alpha = 0.03) +
geom_smooth() +
facet_wrap(~age_group, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Removed 5986 rows containing missing values (geom_point).
ggplot(data = Mb1, aes(y = Ln_LT, x = trial_num)) +
geom_jitter(alpha = 0.03) +
geom_smooth() +
facet_wrap(~lab, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.
## Warning: Removed 5986 rows containing missing values (geom_point).
# What if we collapse the age groups together
ggplot(data = Mb1, aes(y = Ln_LT, x = trial_num)) +
geom_jitter(alpha = 0.03) +
geom_smooth() +
#facet_wrap(~age_group, scales = "free_y") +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 5986 rows containing non-finite values (stat_smooth).
## Warning: Removed 5986 rows containing missing values (geom_point).
MB1_m_sd <- Mb1 %>%
filter(!is.na(looking_time)) %>%
group_by(subid, age_group) %>%
mutate(mean_LT = mean(looking_time),
mean_Ln_LT = mean(Ln_LT),
sd_LT = sd(looking_time),
sd_Ln_LT = sd(Ln_LT)) %>%
select(subid, age_group, mean_LT, sd_LT, mean_Ln_LT, sd_Ln_LT) %>%
unique()
The relation between mean and sd is attenuated after log-transforming LT
ggplot(MB1_m_sd, aes(x = mean_LT, y = sd_LT)) +
geom_jitter(alpha = 0.3) +
geom_smooth(method = "lm") +
facet_grid(~age_group) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 40 rows containing non-finite values (stat_smooth).
## Warning: Removed 40 rows containing missing values (geom_point).
ggplot(MB1_m_sd, aes(x = mean_Ln_LT, y = sd_Ln_LT)) +
geom_jitter(alpha = 0.3) +
geom_smooth(method = "lm") +
facet_grid(~age_group) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 40 rows containing non-finite values (stat_smooth).
## Warning: Removed 40 rows containing missing values (geom_point).