library(tidyverse)

## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0

## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(googlesheets4)
library(metafor)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loading 'metafor' package (version 2.4-0). For an overview 
## and introduction to the package please type: help(metafor).

library(knitr)

#FIX ME: KEEP THE FINAL SHEET UPDATED
MA_DATA_GOOGLE_SHEET_ID <- "1kSL5lpmHcaw9lOp2dhJ_RH15REFe7oZVwMO1vJc930o"
ma_data <- read_sheet(MA_DATA_GOOGLE_SHEET_ID, "MA data tidy with ES NEW")

## Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## https://gargle.r-lib.org/articles/non-interactive-auth.html

## The googlesheets4 package is using a cached token for acao@andrew.cmu.edu.

## Auto-refreshing stale OAuth token.

## Reading from "MA Data- Syntactic Bootstrapping"

## Range "'MA data tidy with ES NEW'"

First trim the label

#FIX ME: SOMETHING WRONG ABOUT THIS FUNCTION
shorter_reference <- function(ref){
  ref_list <- as.list(strsplit(ref, ","))
  first_author <- ref_list[[1]][1]
  last_item <- sapply(ref_list, tail,1)
  year <- as.character(as.numeric(gsub("\\D"," ",last_item)))
  short <- paste(first_author, " et al.", "(", year, ")",sep = "")
  return(short)
}


ma_data %>%  mutate(short_cite = 
                     ifelse(nchar(short_cite)>50, 
                            shorter_reference(short_cite),
                            short_cite)) -> ma_data

ma_data

ma_model <- rma(ma_data$d_calc, ma_data$d_var_calc)

## Warning in rma(ma_data$d_calc, ma_data$d_var_calc): Studies with NAs omitted
## from model fitting.

ma_model

## 
## Random-Effects Model (k = 96; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 1.5517 (SE = 0.2514)
## tau (square root of estimated tau^2 value):      1.2457
## I^2 (total heterogeneity / total variability):   93.52%
## H^2 (total variability / sampling variability):  15.44
## 
## Test for Heterogeneity:
## Q(df = 95) = 700.5932, p-val < .0001
## 
## Model Results:
## 
## estimate      se    zval    pval   ci.lb   ci.ub 
##   0.4206  0.1349  3.1176  0.0018  0.1562  0.6850  ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

forest(ma_model,
       header = T,
       slab = ma_data$unique_id,
       col = "red",
       cex = .7)

Funnel plot filter(verb_type == “fake_verb” & abs(d_calc) < 5)

# if transitive, color red; intransitive, color blue 


ma_model_funnel <- rma(ma_data$d_calc, ma_data$d_var_calc)

## Warning in rma(ma_data$d_calc, ma_data$d_var_calc): Studies with NAs omitted
## from model fitting.

f<- funnel(ma_model_funnel, xlab = "Effect Size")

#see a closer look (exclude outlier)

ma_data %>% filter(abs(d_calc) < 5) -> ma_data_no_outlier 
ma_model_funnel_no_outlier <- rma(ma_data_no_outlier$d_calc, ma_data_no_outlier$d_var_calc)
f<- funnel(ma_model_funnel_no_outlier, xlab = "Effect Size")

Potential Moderators

Continuous

Age:

ggplot(ma_data, aes(x = mean_age, 
                                 y = d_calc, 
                                 size = n_1)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (days)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (days)") +
  theme_classic() +
  theme(legend.position = "none")

## Warning: Removed 4 rows containing non-finite values (stat_smooth).

## Warning: Removed 4 rows containing missing values (geom_point).

ggplot(ma_data, aes(x = (mean_age/30.44), 
                                 y = d_calc, 
                                 size = n_1)) +
  geom_point() +
  geom_smooth(method = "lm") +
  ylab("Effect Size") +
  xlab("Age (months)") +
  ggtitle("Syntactical Bootstrapping effect size vs. Age (months)") +
  theme_classic() +
  theme(legend.position = "none")

## Warning: Removed 4 rows containing non-finite values (stat_smooth).

## Warning: Removed 4 rows containing missing values (geom_point).

vocab [skip for now]

Categorical

sentence structure

ma_data %>% group_by(sentence_structure) %>% summarise(n = n())

ma_data_sentence_structure <- ma_data %>% filter(!is.na(d_calc) & sentence_structure != "bare_verb") 

cis_by_structure <- ma_data_sentence_structure %>%
    group_by(sentence_structure) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)


ggplot(ma_data_sentence_structure, aes(x = sentence_structure, 
                    y = d_calc, 
                    color = sentence_structure)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Sentence Structure") +
  ggtitle("SB effect size by Sentence Structure") +
  geom_errorbar(data = cis_by_structure, 
                  aes(x = sentence_structure,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_structure, 
                  aes(x = sentence_structure,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

## Warning: Ignoring unknown aesthetics: y

test type

ma_data %>% group_by(test_type) %>% summarise(n = n())

ma_data_test_type <- ma_data %>% filter(!is.na(d_calc)) 

cis_by_test_type <- ma_data_test_type %>%
    group_by(test_type) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)


ggplot(ma_data_test_type, aes(x = test_type, 
                    y = d_calc, 
                    color = test_type)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Test Type") +
  ggtitle("SB effect size by Test type") +
  geom_errorbar(data = cis_by_test_type, 
                  aes(x = test_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_test_type, 
                  aes(x = test_type,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

## Warning: Ignoring unknown aesthetics: y

agent argument type / patient argument type [show variances]

ma_data %>% group_by(agent_argument_type,patient_argument_type) %>% summarise(n = n())

stimuli_modality

ma_data %>% group_by(stimuli_modality) %>% summarise(n = n())

ma_data %>%  filter(!is.na(d_calc)) -> mata_data_noNAd

stimuli_actor

mata_data_noNAd %>% group_by(stimuli_actor) %>% summarise(n = n())

cis_by_stimuli_actor <- mata_data_noNAd %>%
    group_by(stimuli_actor) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)


ggplot(mata_data_noNAd, aes(x = stimuli_actor, 
                    y = d_calc, 
                    color = stimuli_actor)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Actor Type") +
  ggtitle("SB effect size by Stimuli Actor") +
  geom_errorbar(data = cis_by_stimuli_actor, 
                  aes(x = stimuli_actor,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_stimuli_actor, 
                  aes(x = stimuli_actor,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

## Warning: Ignoring unknown aesthetics: y

character identification

#collapse no and NA
mata_data_noNAd %>% 
  mutate(character_identification = ifelse(character_identification == "NA","no",character_identification)) -> mata_data_noNAd

mata_data_noNAd %>% group_by(character_identification) %>% summarise(n = n())

cis_by_character_identification <- mata_data_noNAd %>%
    group_by(character_identification) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)


ggplot(mata_data_noNAd, aes(x = character_identification, 
                    y = d_calc, 
                    color = character_identification)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Character Identification") +
  ggtitle("SB effect size by Character Identification") +
  geom_errorbar(data = cis_by_character_identification, 
                  aes(x = character_identification,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_character_identification, 
                  aes(x = character_identification,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

## Warning: Ignoring unknown aesthetics: y

practice phase

mata_data_noNAd %>% 
  mutate(practice_phase = ifelse(practice_phase == "NA","no",practice_phase)) -> mata_data_noNAd

mata_data_noNAd %>% group_by(practice_phase) %>% summarise(n = n())

cis_by_practice_phase<- mata_data_noNAd %>%
    group_by(practice_phase) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)


ggplot(mata_data_noNAd, aes(x = practice_phase, 
                    y = d_calc, 
                    color = practice_phase)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Practice phase") +
  ggtitle("SB effect size by practice phase") +
  geom_errorbar(data = cis_by_practice_phase, 
                  aes(x = practice_phase,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_practice_phase, 
                  aes(x = practice_phase,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

## Warning: Ignoring unknown aesthetics: y

mass_or_distribute

mata_data_noNAd %>% group_by(test_mass_or_distributed) %>% summarise(n = n())

cis_by_test_mass_or_distributed<- mata_data_noNAd %>%
    group_by(test_mass_or_distributed) %>%
    summarize(mean = mean(d_calc),
            sd = sd(d_calc),
            n = n()) %>%
    mutate(ci_range_95 =  1.96 * (sd/sqrt(n)),
         ci_lower = mean - ci_range_95,
         ci_upper = mean + ci_range_95)


ggplot(mata_data_noNAd, aes(x = test_mass_or_distributed, 
                    y = d_calc, 
                    color = test_mass_or_distributed)) +
  geom_violin() +
  geom_point(alpha = .4)  +
  ylab("Effect Size") +
  xlab("Test mass or distributed") +
  ggtitle("SB effect size by Test mass or distributed") +
  geom_errorbar(data = cis_by_test_mass_or_distributed, 
                  aes(x = test_mass_or_distributed,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_pointrange(data = cis_by_test_mass_or_distributed, 
                  aes(x = test_mass_or_distributed,
                      y = mean, ymin = ci_lower, 
                      ymax = ci_upper), 
                  color = "black") +
  geom_hline(aes(yintercept = 0), linetype = 2) +
  theme_classic(base_size = 16) +
  theme(legend.position = "none")

## Warning: Ignoring unknown aesthetics: y

show a summary of n_reptition_sentence / n_repetition_video

mata_data_noNAd %>% 
  ggplot(mapping = aes(x = n_train_test_pair)) + 
  geom_histogram() + 
  ggtitle("Histogram of the train-test pair distribution")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mata_data_noNAd %>% 
  ggplot(mapping = aes(x = N_test_trial_per_pair)) + 
  geom_histogram() + 
  ggtitle("Histogram of the test_trial_per_pai")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mata_data_noNAd %>% 
  filter(!is.na(n_repetitions_sentence)) %>% 
  ggplot(mapping = aes(x = n_repetitions_sentence)) + 
  geom_histogram() + 
  ggtitle("Histogram of the n_repetitions_sentence")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mata_data_noNAd %>% 
  filter(!is.na(n_repetitions_video)) %>% 
  ggplot(mapping = aes(x = n_repetitions_video)) + 
  geom_histogram() + 
  ggtitle("Histogram of the n_repetitions_video")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

sb DRAFT

First trim the label

Potential Moderators

Continuous

Age:

vocab [skip for now]

Categorical

sentence structure

test type

agent argument type / patient argument type [show variances]

stimuli_modality

stimuli_actor

character identification

practice phase

mass_or_distribute

show a summary of n_reptition_sentence / n_repetition_video