library(tidyverse)
library(here)
library(ggthemes)
library(metafor)
library(kableExtra)
library(ggrepel)

d <- read_csv(here("data/metalab_data_mini.csv"))
author_d <- read_csv(here("data/clean_author.csv"))

source(here("helper/model_comparison_help.r"))
source(here("helper/method_mod_help.r"))
source(here("helper/author_help.r"))



best_fit_bmeasure_model_df <- readRDS(here("cached_data/best_fit_behavioral_measure_df.Rds"))
best_fit_ep_model_df <- readRDS(here("cached_data/best_fit_exposure_phase.Rds"))
best_fit_sn_model_df <- readRDS(here("cached_data/best_fit_stimuli_naturalness.Rds"))
best_fit_major_author_df <- readRDS(here("cached_data/best_fit_major_author_df.Rds"))

age_modesl_df <- readRDS(here("cached_data/age_models_df.Rds"))

major_author_fit_df <- readRDS(here("cached_data/major_author_model_ds.Rds"))

1 1. behavioral_measure

1.1 running models for comparison

1.2 model stats for significant values

best_fit_bmeasure_model_df %>% 
  filter(grepl("behavioral_measure", term)) %>% 
  filter(p.value < .05) %>% 
  kableExtra::kable(digits = .5) %>%  
  kableExtra::kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "500px")

term	type	estimate	std.error	statistic	model_spec	dataset	model_spec_clean	ic	ML	REML	model_type
behavioral_measuremanual	summary	0.8	0.2	3.6	d_calc ~ 1 + behavioral_measure	Sound symbolism	Const	AICc	51.0	51.2	bmeasure
behavioral_measurelooking	summary	-0.8	0.2	-5.1	d_calc ~ 1 + behavioral_measure	Vowel discrimination (native)	Const	AICc	210.2	206.8	bmeasure
behavioral_measurelooking	summary	-1.5	0.4	-3.4	d_calc ~ 1 + behavioral_measure	Vowel discrimination (non-native)	Const	AICc	60.5	54.5	bmeasure

1.3 visualization

d %>% 
  filter(mean_age_months < 36) %>% 
  filter(ds_clean %in% unique(best_fit_bmeasure_model_df$dataset)) %>% 
  ggplot(aes(x = mean_age_months, y = d_calc, color = behavioral_measure)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm") + 
  facet_wrap(~ds_clean)+ 
  theme_few()

2 2. Exposure phase

2.1 running models for comparison

2.2 model stats for significant values

best_fit_ep_model_df %>% 
  filter(grepl("exposure_phase", term)) %>% 
  filter(p.value < .05) %>% 
  kableExtra::kable(digits = .2) %>%  
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "500px")

term	type	estimate	std.error	statistic	model_spec	dataset	model_spec_clean	ic	ML	REML	model_type
exposure_phasehabituation	summary	0	0	-3	d_calc ~ log(mean_age_months) + exposure_phase	Language discrimination and preference	Log	AICc	155	151	ep
exposure_phasefamiliarization	summary	2	1	2	d_calc ~ 1 + exposure_phase	Natural speech preference	Const	AICc	110	106	ep
exposure_phasenot_applicable	summary	0	0	-2	d_calc ~ 1 + exposure_phase	Neonatal Imitation	Const	AICc	58	53	ep
exposure_phasefamiliarization	summary	-1	0	-2	d_calc ~ 1 + exposure_phase	Vowel discrimination (native)	Const	AICc	230	223	ep
exposure_phasehabituation	summary	-1	0	-3	d_calc ~ 1 + exposure_phase	Vowel discrimination (native)	Const	AICc	230	223	ep

2.3 visualization

d %>% 
  filter(mean_age_months < 36) %>% 
  filter(ds_clean %in% unique(best_fit_ep_model_df$dataset)) %>% 
  ggplot(aes(x = mean_age_months, y = d_calc, color = exposure_phase)) + 
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm") + 
  facet_wrap(~ds_clean)+ 
  theme_few()

3 3. Stimuli naturalness

## get subset of data
sn_d <- d %>% 
  distinct(ds_clean, stimuli_naturalness) %>% 
  filter(!is.na(stimuli_naturalness)) %>% 
  group_by(ds_clean) %>% 
  count() %>% 
  filter(!n == 1) %>% 
  pull(ds_clean)

## get model comparison stats
sn_models_df <- lapply(sn_d, 
       function(name){
         print(name)
         get_compare_IC_df(d %>% filter(!is.na(stimuli_naturalness)), name, c("stimuli_naturalness"))
       }) %>% 
  bind_rows() %>% 
  mutate(model_type = "sn")

## [1] "Abstract rule learning"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Categorization bias"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Label advantage in concept learning"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Mutual exclusivity"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Prosocial agents"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Statistical word segmentation"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Vowel discrimination (native)"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "Vowel discrimination (non-native)"
## [1] "full!"
## [1] "full!"
## [1] "full!"
## [1] "full!"

## find best fitting models 
min_AICc_sn_models_df <- sn_models_df %>% 
  filter(ic == "AICc") %>% 
  group_by(dataset) %>% 
  filter(REML == min(REML)) 

## finding the stats of the models 
sn_models_fits_df <- lapply(sn_d, 
       function(name){
         print(name)
         get_model_fit_df(d %>% filter(!is.na(stimuli_naturalness)), name, c("stimuli_naturalness"))
       }) %>% 
  bind_rows()

## [1] "Abstract rule learning"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Categorization bias"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Label advantage in concept learning"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Mutual exclusivity"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Prosocial agents"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Statistical word segmentation"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Vowel discrimination (native)"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "Vowel discrimination (non-native)"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"
## [1] "~ 1 | short_cite/same_infant/unique_row"

## find the best fit model's stats
best_fit_sn_model_df <- sn_models_fits_df %>% 
  left_join(min_AICc_sn_models_df, 
            by =  c("model_spec", "dataset", "model_spec_clean")) %>% 
  filter(!is.na(REML)) 

saveRDS(best_fit_sn_model_df, here("cached_data/best_fit_stimuli_naturalness.Rds"))

3.0.1 significant models

best_fit_sn_model_df %>% 
  filter(grepl("naturalness", term)) %>% 
  filter(p.value < 0.05) %>% 
  kableExtra::kable(digits = .2) %>%  
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "500px")

term	type	statistic	model_spec	dataset	model_spec_clean	ic	ML	REML	model_type
stimuli_naturalnessnatural	summary	2	d_calc ~ I(mean_age_months^2) + stimuli_naturalness	Abstract rule learning	Quadratic	AICc	140	137	sn
stimuli_naturalnessnatural	summary	2	d_calc ~ 1 + stimuli_naturalness	Label advantage in concept learning	Const	AICc	170	169	sn
stimuli_naturalnessnatural	summary	4	d_calc ~ 1 + stimuli_naturalness	Statistical word segmentation	Const	AICc	118	118	sn

4 4. Major author

all_nested_d <-lapply(distinct(d, ds_clean) %>% pull(), 
       function(x){
         get_major_author_effect(d, x, author_d)
       }) %>% 
  bind_rows() %>% 
  group_by(ds_clean, major_author) %>% 
  nest() %>% 
  filter(!major_author %in% c("…SweigWilson", "deVilliers", "Fernald","GrafEstes", "HillairetdeBoisferon"
                              )) %>% 
  filter(!is.na(major_author) & ds_clean != "Prosocial Agents")

4.1 get the best fit major author models

major_author_fit_df <- readRDS(here("cached_data/major_author_model_ds.Rds"))

major_author_model_fit_df <- readRDS(here("cached_data/major_author_fits_ds.Rds"))

4.2 get the significant value

best_fit_major_author_model <- readRDS(here("cached_data/best_fit_major_author_df.Rds"))

best_fit_major_author_model %>% 
  filter(grepl("by_major_author", term), 
         p.value < 0.05)

## # A tibble: 12 × 13
##    term    type  estim…¹ std.e…² stati…³ p.value model…⁴ dataset major…⁵ model…⁶
##    <chr>   <chr>   <dbl>   <dbl>   <dbl>   <dbl> <chr>   <chr>   <chr>   <chr>  
##  1 by_maj… summ…   2.04   0.924     2.21 2.72e-2 d_calc… Catego… Bauer   Log    
##  2 by_maj… summ…   2.04   0.924     2.21 2.72e-2 d_calc… Catego… Mandler Log    
##  3 by_maj… summ…  -0.388  0.190    -2.05 4.08e-2 d_calc… Cross-… Vlach   Quadra…
##  4 by_maj… summ…  -0.423  0.0731   -5.79 6.92e-9 d_calc… Famili… Keren-… Linear 
##  5 by_maj… summ…   0.536  0.252     2.13 3.35e-2 d_calc… Gaze f… Mundy   Quadra…
##  6 by_maj… summ…   0.650  0.230     2.83 4.66e-3 d_calc… Mispro… Swingl… Quadra…
##  7 by_maj… summ…   1.11   0.564     1.97 4.84e-2 d_calc… Online… Weisle… Const  
##  8 by_maj… summ…  -0.243  0.106    -2.29 2.20e-2 d_calc… Statis… Thiess… Const  
##  9 by_maj… summ…   0.542  0.274     1.98 4.82e-2 d_calc… Vowel … Bohn    Const  
## 10 by_maj… summ…   0.542  0.274     1.98 4.82e-2 d_calc… Vowel … Polka   Const  
## 11 by_maj… summ…   0.241  0.0819    2.94 3.24e-3 d_calc… Word S… Singh   Const  
## 12 by_maj… summ…   0.522  0.170     3.07 2.13e-3 d_calc… Infant… Werker  Const  
## # … with 3 more variables: ic <chr>, ML <dbl>, REML <dbl>, and abbreviated
## #   variable names ¹estimate, ²std.error, ³statistic, ⁴model_spec,
## #   ⁵major_author, ⁶model_spec_clean

best_fit_major_author_model %>% 
  filter(grepl("by_major_author", term), 
         p.value < 0.05) %>% 
  kableExtra::kable(digits = .2) %>%  
  kable_styling() %>%
  kableExtra::scroll_box(width = "100%", height = "500px")

term	type	estimate	std.error	statistic	model_spec	dataset	major_author	model_spec_clean	ic	ML	REML
by_major_authoryes	summary	2	1	2	d_calc ~ log(mean_age_months) + by_major_author	Categorization bias	Bauer	Log	AICc	284	273
by_major_authoryes	summary	2	1	2	d_calc ~ log(mean_age_months) + by_major_author	Categorization bias	Mandler	Log	AICc	284	273
by_major_authoryes	summary	0	0	-2	d_calc ~ I(mean_age_months^2) + by_major_author	Cross-situational word learning	Vlach	Quadratic	AICc	79	78
by_major_authoryes	summary	0	0	-6	d_calc ~ mean_age_months + by_major_author	Familiar word recognition	Keren-Portnoy	Linear	AICc	16	21
by_major_authoryes	summary	1	0	2	d_calc ~ I(mean_age_months^2) + by_major_author	Gaze following (combined)	Mundy	Quadratic	AICc	151	146
by_major_authoryes	summary	1	0	3	d_calc ~ I(mean_age_months^2) + by_major_author	Mispronunciation sensitivity	Swingley	Quadratic	AICc	616	609
by_major_authoryes	summary	1	1	2	d_calc ~ 1 + by_major_author	Online word recognition	Weisleder	Const	AICc	52	51
by_major_authoryes	summary	0	0	-2	d_calc ~ 1 + by_major_author	Statistical word segmentation	Thiessen	Const	AICc	125	125
by_major_authoryes	summary	1	0	2	d_calc ~ 1 + by_major_author	Vowel discrimination (native)	Bohn	Const	AICc	255	252
by_major_authoryes	summary	1	0	2	d_calc ~ 1 + by_major_author	Vowel discrimination (native)	Polka	Const	AICc	255	252
by_major_authoryes	summary	0	0	3	d_calc ~ 1 + by_major_author	Word Segmentation (combined)	Singh	Const	AICc	322	320
by_major_authoryes	summary	1	0	3	d_calc ~ 1 + by_major_author	Infant directed speech preference	Werker	Const	AICc	67	66

03_moderators

anjie

2023-08-03

1 1. behavioral_measure

1.1 running models for comparison

1.2 model stats for significant values

1.3 visualization

2 2. Exposure phase

2.1 running models for comparison

2.2 model stats for significant values

2.3 visualization

3 3. Stimuli naturalness

3.0.1 significant models

4 4. Major author

4.1 get the best fit major author models

4.2 get the significant value

4.3 visualization: not sure what’s the best way to do it