library(tidyverse)
library(here)
library(ggthemes)
library(metafor)
library(kableExtra)
library(ggrepel)
d <- read_csv(here("data/metalab_data_mini.csv"))
author_d <- read_csv(here("data/clean_author.csv"))
source(here("helper/model_comparison_help.r"))
source(here("helper/method_mod_help.r"))
source(here("helper/author_help.r"))
best_fit_bmeasure_model_df <- readRDS(here("cached_data/best_fit_behavioral_measure_df.Rds"))
best_fit_ep_model_df <- readRDS(here("cached_data/best_fit_exposure_phase.Rds"))
best_fit_sn_model_df <- readRDS(here("cached_data/best_fit_stimuli_naturalness.Rds"))
best_fit_major_author_df <- readRDS(here("cached_data/best_fit_major_author_df.Rds"))
age_modesl_df <- readRDS(here("cached_data/age_models_df.Rds"))
major_author_fit_df <- readRDS(here("cached_data/major_author_model_ds.Rds"))
1.
behavioral_measure
running models for
comparison
model stats for
significant values
best_fit_bmeasure_model_df %>%
filter(grepl("behavioral_measure", term)) %>%
filter(p.value < .05) %>%
kableExtra::kable(digits = .5) %>%
kableExtra::kable_styling() %>%
kableExtra::scroll_box(width = "100%", height = "500px")
term
|
type
|
estimate
|
std.error
|
statistic
|
p.value
|
model_spec
|
dataset
|
model_spec_clean
|
ic
|
ML
|
REML
|
model_type
|
behavioral_measuremanual
|
summary
|
0.8
|
0.2
|
3.6
|
0
|
d_calc ~ 1 + behavioral_measure
|
Sound symbolism
|
Const
|
AICc
|
51.0
|
51.2
|
bmeasure
|
behavioral_measurelooking
|
summary
|
-0.8
|
0.2
|
-5.1
|
0
|
d_calc ~ 1 + behavioral_measure
|
Vowel discrimination (native)
|
Const
|
AICc
|
210.2
|
206.8
|
bmeasure
|
behavioral_measurelooking
|
summary
|
-1.5
|
0.4
|
-3.4
|
0
|
d_calc ~ 1 + behavioral_measure
|
Vowel discrimination (non-native)
|
Const
|
AICc
|
60.5
|
54.5
|
bmeasure
|
visualization
d %>%
filter(mean_age_months < 36) %>%
filter(ds_clean %in% unique(best_fit_bmeasure_model_df$dataset)) %>%
ggplot(aes(x = mean_age_months, y = d_calc, color = behavioral_measure)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
facet_wrap(~ds_clean)+
theme_few()

2. Exposure phase
running models for
comparison
model stats for
significant values
best_fit_ep_model_df %>%
filter(grepl("exposure_phase", term)) %>%
filter(p.value < .05) %>%
kableExtra::kable(digits = .2) %>%
kable_styling() %>%
kableExtra::scroll_box(width = "100%", height = "500px")
term
|
type
|
estimate
|
std.error
|
statistic
|
p.value
|
model_spec
|
dataset
|
model_spec_clean
|
ic
|
ML
|
REML
|
model_type
|
exposure_phasehabituation
|
summary
|
0
|
0
|
-3
|
0
|
d_calc ~ log(mean_age_months) + exposure_phase
|
Language discrimination and preference
|
Log
|
AICc
|
155
|
151
|
ep
|
exposure_phasefamiliarization
|
summary
|
2
|
1
|
2
|
0
|
d_calc ~ 1 + exposure_phase
|
Natural speech preference
|
Const
|
AICc
|
110
|
106
|
ep
|
exposure_phasenot_applicable
|
summary
|
0
|
0
|
-2
|
0
|
d_calc ~ 1 + exposure_phase
|
Neonatal Imitation
|
Const
|
AICc
|
58
|
53
|
ep
|
exposure_phasefamiliarization
|
summary
|
-1
|
0
|
-2
|
0
|
d_calc ~ 1 + exposure_phase
|
Vowel discrimination (native)
|
Const
|
AICc
|
230
|
223
|
ep
|
exposure_phasehabituation
|
summary
|
-1
|
0
|
-3
|
0
|
d_calc ~ 1 + exposure_phase
|
Vowel discrimination (native)
|
Const
|
AICc
|
230
|
223
|
ep
|
visualization
d %>%
filter(mean_age_months < 36) %>%
filter(ds_clean %in% unique(best_fit_ep_model_df$dataset)) %>%
ggplot(aes(x = mean_age_months, y = d_calc, color = exposure_phase)) +
geom_point(alpha = .5) +
geom_smooth(method = "lm") +
facet_wrap(~ds_clean)+
theme_few()

3. Stimuli
naturalness
## get subset of data
sn_d <- d %>%
distinct(ds_clean, stimuli_naturalness) %>%
filter(!is.na(stimuli_naturalness)) %>%
group_by(ds_clean) %>%
count() %>%
filter(!n == 1) %>%
pull(ds_clean)
## get model comparison stats
sn_models_df <- lapply(sn_d,
function(name){
print(name)
get_compare_IC_df(d %>% filter(!is.na(stimuli_naturalness)), name, c("stimuli_naturalness"))
}) %>%
bind_rows() %>%
mutate(model_type = "sn")
## [1] "Abstract rule learning"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Categorization bias"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Label advantage in concept learning"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Mutual exclusivity"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Prosocial agents"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Statistical word segmentation"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Vowel discrimination (native)"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Vowel discrimination (non-native)"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## find best fitting models
min_AICc_sn_models_df <- sn_models_df %>%
filter(ic == "AICc") %>%
group_by(dataset) %>%
filter(REML == min(REML))
## finding the stats of the models
sn_models_fits_df <- lapply(sn_d,
function(name){
print(name)
get_model_fit_df(d %>% filter(!is.na(stimuli_naturalness)), name, c("stimuli_naturalness"))
}) %>%
bind_rows()
## [1] "Abstract rule learning"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Categorization bias"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Label advantage in concept learning"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Mutual exclusivity"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Prosocial agents"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Statistical word segmentation"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Vowel discrimination (native)"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Vowel discrimination (non-native)"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## find the best fit model's stats
best_fit_sn_model_df <- sn_models_fits_df %>%
left_join(min_AICc_sn_models_df,
by = c("model_spec", "dataset", "model_spec_clean")) %>%
filter(!is.na(REML))
saveRDS(best_fit_sn_model_df, here("cached_data/best_fit_stimuli_naturalness.Rds"))
significant
models
best_fit_sn_model_df %>%
filter(grepl("naturalness", term)) %>%
filter(p.value < 0.05) %>%
kableExtra::kable(digits = .2) %>%
kable_styling() %>%
kableExtra::scroll_box(width = "100%", height = "500px")
term
|
type
|
estimate
|
std.error
|
statistic
|
p.value
|
model_spec
|
dataset
|
model_spec_clean
|
ic
|
ML
|
REML
|
model_type
|
stimuli_naturalnessnatural
|
summary
|
0
|
0
|
2
|
0
|
d_calc ~ I(mean_age_months^2) + stimuli_naturalness
|
Abstract rule learning
|
Quadratic
|
AICc
|
140
|
137
|
sn
|
stimuli_naturalnessnatural
|
summary
|
0
|
0
|
2
|
0
|
d_calc ~ 1 + stimuli_naturalness
|
Label advantage in concept learning
|
Const
|
AICc
|
170
|
169
|
sn
|
stimuli_naturalnessnatural
|
summary
|
0
|
0
|
4
|
0
|
d_calc ~ 1 + stimuli_naturalness
|
Statistical word segmentation
|
Const
|
AICc
|
118
|
118
|
sn
|
4. Major author
all_nested_d <-lapply(distinct(d, ds_clean) %>% pull(),
function(x){
get_major_author_effect(d, x, author_d)
}) %>%
bind_rows() %>%
group_by(ds_clean, major_author) %>%
nest() %>%
filter(!major_author %in% c("…SweigWilson", "deVilliers", "Fernald","GrafEstes", "HillairetdeBoisferon"
)) %>%
filter(!is.na(major_author) & ds_clean != "Prosocial Agents")
get the best fit
major author models
major_author_fit_df <- readRDS(here("cached_data/major_author_model_ds.Rds"))
major_author_model_fit_df <- readRDS(here("cached_data/major_author_fits_ds.Rds"))
get the significant
value
best_fit_major_author_model <- readRDS(here("cached_data/best_fit_major_author_df.Rds"))
best_fit_major_author_model %>%
filter(grepl("by_major_author", term),
p.value < 0.05)
## # A tibble: 12 × 13
## term type estim…¹ std.e…² stati…³ p.value model…⁴ dataset major…⁵ model…⁶
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 by_maj… summ… 2.04 0.924 2.21 2.72e-2 d_calc… Catego… Bauer Log
## 2 by_maj… summ… 2.04 0.924 2.21 2.72e-2 d_calc… Catego… Mandler Log
## 3 by_maj… summ… -0.388 0.190 -2.05 4.08e-2 d_calc… Cross-… Vlach Quadra…
## 4 by_maj… summ… -0.423 0.0731 -5.79 6.92e-9 d_calc… Famili… Keren-… Linear
## 5 by_maj… summ… 0.536 0.252 2.13 3.35e-2 d_calc… Gaze f… Mundy Quadra…
## 6 by_maj… summ… 0.650 0.230 2.83 4.66e-3 d_calc… Mispro… Swingl… Quadra…
## 7 by_maj… summ… 1.11 0.564 1.97 4.84e-2 d_calc… Online… Weisle… Const
## 8 by_maj… summ… -0.243 0.106 -2.29 2.20e-2 d_calc… Statis… Thiess… Const
## 9 by_maj… summ… 0.542 0.274 1.98 4.82e-2 d_calc… Vowel … Bohn Const
## 10 by_maj… summ… 0.542 0.274 1.98 4.82e-2 d_calc… Vowel … Polka Const
## 11 by_maj… summ… 0.241 0.0819 2.94 3.24e-3 d_calc… Word S… Singh Const
## 12 by_maj… summ… 0.522 0.170 3.07 2.13e-3 d_calc… Infant… Werker Const
## # … with 3 more variables: ic <chr>, ML <dbl>, REML <dbl>, and abbreviated
## # variable names ¹estimate, ²std.error, ³statistic, ⁴model_spec,
## # ⁵major_author, ⁶model_spec_clean
best_fit_major_author_model %>%
filter(grepl("by_major_author", term),
p.value < 0.05) %>%
kableExtra::kable(digits = .2) %>%
kable_styling() %>%
kableExtra::scroll_box(width = "100%", height = "500px")
term
|
type
|
estimate
|
std.error
|
statistic
|
p.value
|
model_spec
|
dataset
|
major_author
|
model_spec_clean
|
ic
|
ML
|
REML
|
by_major_authoryes
|
summary
|
2
|
1
|
2
|
0
|
d_calc ~ log(mean_age_months) + by_major_author
|
Categorization bias
|
Bauer
|
Log
|
AICc
|
284
|
273
|
by_major_authoryes
|
summary
|
2
|
1
|
2
|
0
|
d_calc ~ log(mean_age_months) + by_major_author
|
Categorization bias
|
Mandler
|
Log
|
AICc
|
284
|
273
|
by_major_authoryes
|
summary
|
0
|
0
|
-2
|
0
|
d_calc ~ I(mean_age_months^2) + by_major_author
|
Cross-situational word learning
|
Vlach
|
Quadratic
|
AICc
|
79
|
78
|
by_major_authoryes
|
summary
|
0
|
0
|
-6
|
0
|
d_calc ~ mean_age_months + by_major_author
|
Familiar word recognition
|
Keren-Portnoy
|
Linear
|
AICc
|
16
|
21
|
by_major_authoryes
|
summary
|
1
|
0
|
2
|
0
|
d_calc ~ I(mean_age_months^2) + by_major_author
|
Gaze following (combined)
|
Mundy
|
Quadratic
|
AICc
|
151
|
146
|
by_major_authoryes
|
summary
|
1
|
0
|
3
|
0
|
d_calc ~ I(mean_age_months^2) + by_major_author
|
Mispronunciation sensitivity
|
Swingley
|
Quadratic
|
AICc
|
616
|
609
|
by_major_authoryes
|
summary
|
1
|
1
|
2
|
0
|
d_calc ~ 1 + by_major_author
|
Online word recognition
|
Weisleder
|
Const
|
AICc
|
52
|
51
|
by_major_authoryes
|
summary
|
0
|
0
|
-2
|
0
|
d_calc ~ 1 + by_major_author
|
Statistical word segmentation
|
Thiessen
|
Const
|
AICc
|
125
|
125
|
by_major_authoryes
|
summary
|
1
|
0
|
2
|
0
|
d_calc ~ 1 + by_major_author
|
Vowel discrimination (native)
|
Bohn
|
Const
|
AICc
|
255
|
252
|
by_major_authoryes
|
summary
|
1
|
0
|
2
|
0
|
d_calc ~ 1 + by_major_author
|
Vowel discrimination (native)
|
Polka
|
Const
|
AICc
|
255
|
252
|
by_major_authoryes
|
summary
|
0
|
0
|
3
|
0
|
d_calc ~ 1 + by_major_author
|
Word Segmentation (combined)
|
Singh
|
Const
|
AICc
|
322
|
320
|
by_major_authoryes
|
summary
|
1
|
0
|
3
|
0
|
d_calc ~ 1 + by_major_author
|
Infant directed speech preference
|
Werker
|
Const
|
AICc
|
67
|
66
|
visualization: not
sure what’s the best way to do it