library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(googlesheets4)
library(metafor)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading 'metafor' package (version 2.4-0). For an overview
## and introduction to the package please type: help(metafor).
library(knitr)
#FIX ME: KEEP THE FINAL SHEET UPDATED
MA_DATA_GOOGLE_SHEET_ID <- "1kSL5lpmHcaw9lOp2dhJ_RH15REFe7oZVwMO1vJc930o"
ma_data <- read_sheet(MA_DATA_GOOGLE_SHEET_ID, "MA data tidy with ES NEW")
## Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## https://gargle.r-lib.org/articles/non-interactive-auth.html
## The googlesheets4 package is using a cached token for acao@andrew.cmu.edu.
## Auto-refreshing stale OAuth token.
## Reading from "MA Data- Syntactic Bootstrapping"
## Range "'MA data tidy with ES NEW'"
#FIX ME: SOMETHING WRONG ABOUT THIS FUNCTION
shorter_reference <- function(ref){
ref_list <- as.list(strsplit(ref, ","))
first_author <- ref_list[[1]][1]
last_item <- sapply(ref_list, tail,1)
year <- as.character(as.numeric(gsub("\\D"," ",last_item)))
short <- paste(first_author, " et al.", "(", year, ")",sep = "")
return(short)
}
ma_data %>% mutate(short_cite =
ifelse(nchar(short_cite)>50,
shorter_reference(short_cite),
short_cite)) -> ma_data
ma_data
coder <chr> | unique_id <chr> | |
---|---|---|
alana/anjie | arunachalam2010 | |
alana/anjie | arunachalam2010 | |
alana/anjie | arunachalam2013b | |
alana/anjie | arunachalam2013b | |
alana/anjie | arunachalam2017 | |
anjie | arunachalam2017 | |
anjie | arunachalam2017 | |
alana | asplin2002 | |
alana | asplin2002 | |
alana | asplin2002 |
ma_model <- rma(ma_data$d_calc, ma_data$d_var_calc)
## Warning in rma(ma_data$d_calc, ma_data$d_var_calc): Studies with NAs omitted
## from model fitting.
ma_model
##
## Random-Effects Model (k = 96; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 1.5517 (SE = 0.2514)
## tau (square root of estimated tau^2 value): 1.2457
## I^2 (total heterogeneity / total variability): 93.52%
## H^2 (total variability / sampling variability): 15.44
##
## Test for Heterogeneity:
## Q(df = 95) = 700.5932, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.4206 0.1349 3.1176 0.0018 0.1562 0.6850 **
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
forest(ma_model,
header = T,
slab = ma_data$unique_id,
col = "red",
cex = .7)
Funnel plot filter(verb_type == “fake_verb” & abs(d_calc) < 5)
# if transitive, color red; intransitive, color blue
ma_model_funnel <- rma(ma_data$d_calc, ma_data$d_var_calc)
## Warning in rma(ma_data$d_calc, ma_data$d_var_calc): Studies with NAs omitted
## from model fitting.
f<- funnel(ma_model_funnel, xlab = "Effect Size")
#see a closer look (exclude outlier)
ma_data %>% filter(abs(d_calc) < 5) -> ma_data_no_outlier
ma_model_funnel_no_outlier <- rma(ma_data_no_outlier$d_calc, ma_data_no_outlier$d_var_calc)
f<- funnel(ma_model_funnel_no_outlier, xlab = "Effect Size")
ggplot(ma_data, aes(x = mean_age,
y = d_calc,
size = n_1)) +
geom_point() +
geom_smooth(method = "lm") +
ylab("Effect Size") +
xlab("Age (days)") +
ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
theme_classic() +
theme(legend.position = "none")
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
ggplot(ma_data, aes(x = (mean_age/30.44),
y = d_calc,
size = n_1)) +
geom_point() +
geom_smooth(method = "lm") +
ylab("Effect Size") +
xlab("Age (months)") +
ggtitle("Syntactical Bootstrapping effect size vs. Age (months)") +
theme_classic() +
theme(legend.position = "none")
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
ma_data %>% group_by(sentence_structure) %>% summarise(n = n())
sentence_structure <chr> | n <int> | |||
---|---|---|---|---|
bare_verb | 3 | |||
intransitive | 32 | |||
transitive | 64 |
ma_data_sentence_structure <- ma_data %>% filter(!is.na(d_calc) & sentence_structure != "bare_verb")
cis_by_structure <- ma_data_sentence_structure %>%
group_by(sentence_structure) %>%
summarize(mean = mean(d_calc),
sd = sd(d_calc),
n = n()) %>%
mutate(ci_range_95 = 1.96 * (sd/sqrt(n)),
ci_lower = mean - ci_range_95,
ci_upper = mean + ci_range_95)
ggplot(ma_data_sentence_structure, aes(x = sentence_structure,
y = d_calc,
color = sentence_structure)) +
geom_violin() +
geom_point(alpha = .4) +
ylab("Effect Size") +
xlab("Sentence Structure") +
ggtitle("SB effect size by Sentence Structure") +
geom_errorbar(data = cis_by_structure,
aes(x = sentence_structure,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_pointrange(data = cis_by_structure,
aes(x = sentence_structure,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic(base_size = 16) +
theme(legend.position = "none")
## Warning: Ignoring unknown aesthetics: y
ma_data %>% group_by(test_type) %>% summarise(n = n())
test_type <chr> | n <int> | |||
---|---|---|---|---|
action | 85 | |||
actor | 6 | |||
agent | 8 |
ma_data_test_type <- ma_data %>% filter(!is.na(d_calc))
cis_by_test_type <- ma_data_test_type %>%
group_by(test_type) %>%
summarize(mean = mean(d_calc),
sd = sd(d_calc),
n = n()) %>%
mutate(ci_range_95 = 1.96 * (sd/sqrt(n)),
ci_lower = mean - ci_range_95,
ci_upper = mean + ci_range_95)
ggplot(ma_data_test_type, aes(x = test_type,
y = d_calc,
color = test_type)) +
geom_violin() +
geom_point(alpha = .4) +
ylab("Effect Size") +
xlab("Test Type") +
ggtitle("SB effect size by Test type") +
geom_errorbar(data = cis_by_test_type,
aes(x = test_type,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_pointrange(data = cis_by_test_type,
aes(x = test_type,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic(base_size = 16) +
theme(legend.position = "none")
## Warning: Ignoring unknown aesthetics: y
ma_data %>% group_by(agent_argument_type,patient_argument_type) %>% summarise(n = n())
agent_argument_type <chr> | patient_argument_type <chr> | n <int> | ||
---|---|---|---|---|
2noun | NA | 1 | ||
2nouns | NA | 5 | ||
NA | NA | 3 | ||
noun | NA | 4 | ||
noun | noun | 24 | ||
noun | noun_and_dropping | 1 | ||
noun_and_dropping | NA | 2 | ||
noun_and_dropping | noun_and_dropping | 2 | ||
noun_and_pronoun | NA | 4 | ||
noun_and_pronoun | noun | 2 |
ma_data %>% group_by(stimuli_modality) %>% summarise(n = n())
stimuli_modality <chr> | n <int> | |||
---|---|---|---|---|
animation | 8 | |||
picture | 6 | |||
video | 85 |
ma_data %>% filter(!is.na(d_calc)) -> mata_data_noNAd
mata_data_noNAd %>% group_by(stimuli_actor) %>% summarise(n = n())
stimuli_actor <chr> | n <int> | |||
---|---|---|---|---|
non-person | 25 | |||
person | 71 |
cis_by_stimuli_actor <- mata_data_noNAd %>%
group_by(stimuli_actor) %>%
summarize(mean = mean(d_calc),
sd = sd(d_calc),
n = n()) %>%
mutate(ci_range_95 = 1.96 * (sd/sqrt(n)),
ci_lower = mean - ci_range_95,
ci_upper = mean + ci_range_95)
ggplot(mata_data_noNAd, aes(x = stimuli_actor,
y = d_calc,
color = stimuli_actor)) +
geom_violin() +
geom_point(alpha = .4) +
ylab("Effect Size") +
xlab("Actor Type") +
ggtitle("SB effect size by Stimuli Actor") +
geom_errorbar(data = cis_by_stimuli_actor,
aes(x = stimuli_actor,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_pointrange(data = cis_by_stimuli_actor,
aes(x = stimuli_actor,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic(base_size = 16) +
theme(legend.position = "none")
## Warning: Ignoring unknown aesthetics: y
#collapse no and NA
mata_data_noNAd %>%
mutate(character_identification = ifelse(character_identification == "NA","no",character_identification)) -> mata_data_noNAd
mata_data_noNAd %>% group_by(character_identification) %>% summarise(n = n())
character_identification <chr> | n <int> | |||
---|---|---|---|---|
no | 72 | |||
yes | 24 |
cis_by_character_identification <- mata_data_noNAd %>%
group_by(character_identification) %>%
summarize(mean = mean(d_calc),
sd = sd(d_calc),
n = n()) %>%
mutate(ci_range_95 = 1.96 * (sd/sqrt(n)),
ci_lower = mean - ci_range_95,
ci_upper = mean + ci_range_95)
ggplot(mata_data_noNAd, aes(x = character_identification,
y = d_calc,
color = character_identification)) +
geom_violin() +
geom_point(alpha = .4) +
ylab("Effect Size") +
xlab("Character Identification") +
ggtitle("SB effect size by Character Identification") +
geom_errorbar(data = cis_by_character_identification,
aes(x = character_identification,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_pointrange(data = cis_by_character_identification,
aes(x = character_identification,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic(base_size = 16) +
theme(legend.position = "none")
## Warning: Ignoring unknown aesthetics: y
mata_data_noNAd %>%
mutate(practice_phase = ifelse(practice_phase == "NA","no",practice_phase)) -> mata_data_noNAd
mata_data_noNAd %>% group_by(practice_phase) %>% summarise(n = n())
practice_phase <chr> | n <int> | |||
---|---|---|---|---|
no | 41 | |||
yes | 55 |
cis_by_practice_phase<- mata_data_noNAd %>%
group_by(practice_phase) %>%
summarize(mean = mean(d_calc),
sd = sd(d_calc),
n = n()) %>%
mutate(ci_range_95 = 1.96 * (sd/sqrt(n)),
ci_lower = mean - ci_range_95,
ci_upper = mean + ci_range_95)
ggplot(mata_data_noNAd, aes(x = practice_phase,
y = d_calc,
color = practice_phase)) +
geom_violin() +
geom_point(alpha = .4) +
ylab("Effect Size") +
xlab("Practice phase") +
ggtitle("SB effect size by practice phase") +
geom_errorbar(data = cis_by_practice_phase,
aes(x = practice_phase,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_pointrange(data = cis_by_practice_phase,
aes(x = practice_phase,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic(base_size = 16) +
theme(legend.position = "none")
## Warning: Ignoring unknown aesthetics: y
mata_data_noNAd %>% group_by(test_mass_or_distributed) %>% summarise(n = n())
test_mass_or_distributed <chr> | n <int> | |||
---|---|---|---|---|
distributed | 55 | |||
mass | 41 |
cis_by_test_mass_or_distributed<- mata_data_noNAd %>%
group_by(test_mass_or_distributed) %>%
summarize(mean = mean(d_calc),
sd = sd(d_calc),
n = n()) %>%
mutate(ci_range_95 = 1.96 * (sd/sqrt(n)),
ci_lower = mean - ci_range_95,
ci_upper = mean + ci_range_95)
ggplot(mata_data_noNAd, aes(x = test_mass_or_distributed,
y = d_calc,
color = test_mass_or_distributed)) +
geom_violin() +
geom_point(alpha = .4) +
ylab("Effect Size") +
xlab("Test mass or distributed") +
ggtitle("SB effect size by Test mass or distributed") +
geom_errorbar(data = cis_by_test_mass_or_distributed,
aes(x = test_mass_or_distributed,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_pointrange(data = cis_by_test_mass_or_distributed,
aes(x = test_mass_or_distributed,
y = mean, ymin = ci_lower,
ymax = ci_upper),
color = "black") +
geom_hline(aes(yintercept = 0), linetype = 2) +
theme_classic(base_size = 16) +
theme(legend.position = "none")
## Warning: Ignoring unknown aesthetics: y
mata_data_noNAd %>%
ggplot(mapping = aes(x = n_train_test_pair)) +
geom_histogram() +
ggtitle("Histogram of the train-test pair distribution")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mata_data_noNAd %>%
ggplot(mapping = aes(x = N_test_trial_per_pair)) +
geom_histogram() +
ggtitle("Histogram of the test_trial_per_pai")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mata_data_noNAd %>%
filter(!is.na(n_repetitions_sentence)) %>%
ggplot(mapping = aes(x = n_repetitions_sentence)) +
geom_histogram() +
ggtitle("Histogram of the n_repetitions_sentence")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mata_data_noNAd %>%
filter(!is.na(n_repetitions_video)) %>%
ggplot(mapping = aes(x = n_repetitions_video)) +
geom_histogram() +
ggtitle("Histogram of the n_repetitions_video")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.