DATA_PATH <- here("data/processed/syntactic_bootstrapping_tidy_data.csv") # make all variables (i.e. things that might change) as capital letters at the top of the scripts

ma_data <- read_csv(DATA_PATH)   %>%
  filter(language == "English",
         population_type == "typically_developing", 
         stimuli_modality == "video",
         !is.na(mean_age))
ALL_CATEGORICAL_VARS <- c("test_type","presentation_type",
                      "agent_argument_type", "patient_argument_type", 
                     "stimuli_modality", "stimuli_actor", "character_identification",   "practice_phase", "test_mass_or_distributed")


get_cross_counts <- function(args, df){
  var1 = args[[1]]
  var2 = args[[2]]
  
  if (var1 != var2){
  
  df %>%
    select_(var1, var2) %>%
    rename(v1 = var1, 
          v2 = var2) %>%
    count(v1, v2) %>%
    mutate(v1_long = glue("{var1}/{v1}"),
           v2_long = glue("{var2}/{v2}"))  %>%
    select(v1_long, v2_long, n) 

  }
}

all_pair_counts <- list(ALL_CATEGORICAL_VARS,
                        ALL_CATEGORICAL_VARS) %>%
  cross() %>%
  map_df(get_cross_counts, ma_data) %>%
  complete(v1_long, v2_long, fill = list(n = 0)) %>%
  filter(v1_long != v2_long)


  all_counts_wide <- all_pair_counts %>%
    pivot_wider(names_from = v2_long, values_from = n) 
  

  all_counts_wide_matrix <- all_counts_wide %>%
    select(-v1_long) %>%
    as.matrix()
  
  row.names(all_counts_wide_matrix) <-   all_counts_wide$v1_long
  heatmaply(all_counts_wide_matrix,
            fontsize_row = 8,
            fontsize_col = 8)