# load packages
library(knitr)
library(rmarkdown)
library(tidyverse) 

#opts_chunk$set(error = F, cache = F)

This is an example of an r markdown file analyzing experimental data. It makes heavy use of the tidyverse package by Hadley Wikdham. The Tidverse packages is a set of packages are designed to work with “tidy data.” Tidy data is organized such that each row is an observation and each column is a variable. These properties make it easy to manipulate.

0.1 Preprocess data

Read in csv of raw data.

d_anonymized <- read_csv("../data/exp1D_raw.csv")
# head(d_anonymized)

Munge data. Get raw data into tidy format.

d_anonymized_long <-  d_anonymized %>%
  gather(variable, value, contains("_")) %>%
  mutate(trial_num =  unlist(lapply(strsplit(as.character(variable),
                                             "_T"),function(x) x[2])),
         variable = unlist(lapply(strsplit(as.character(variable),
                                           "_"),function(x) x[1]))) %>%
  spread(variable, value) %>%
  mutate(trial_num = as.numeric(trial_num)) %>%
  mutate_if(is.character, funs(as.factor)) 

d_anonymized_long_munged <-  d_anonymized_long %>%
  select(exp, subids, trial_num, category, condition, selected) %>%
  mutate(selected_cat = lapply(str_split(selected, ","), 
                               function(x) {str_sub(x, 2, 2)}), # category number of each selected item (1-3)
         selected = lapply(str_split(selected, ","), 
                           function(x) {str_sub(x, 4, 6)})) %>%  # category level type of each selected item (basic, superordinate, or subordinate)
  rowwise() %>%
  mutate(n_unique_selected_cat = length(unique(unlist(selected_cat))), # exemplars from how many categories were selected?
         first_cat = unlist(selected_cat)[1],
         cat_num = if_else(category == "animals", 3, # target category number of trial
                           if_else(category == "vehicles", 2, 1)),
         selected_filtered = list(lapply(selected_cat, function(x, y) {x == y[1]}, cat_num)), # test whether selections were in category
         selected_in_cat = list(unlist(selected)[unlist(selected_filtered)])) %>%
  ungroup()


# do proportions on target category only
d_anonymized_long_munged_clean <- d_anonymized_long_munged %>%  
  mutate(prop_sub = unlist(lapply(selected_in_cat, function(x){sum(x == "sub")/2})), # get proportions
         prop_bas = unlist(lapply(selected_in_cat, function(x){sum(x == "bas")/2})),
         prop_sup = unlist(lapply(selected_in_cat, function(x){sum(x == "sup")/4}))) %>%
  select(-selected, -selected_cat, -selected_in_cat, -selected_filtered) %>%
  mutate(only_responded_with_target_category = # code whether participant only selected target category exemplars on trial 
           if_else(n_unique_selected_cat == 1 & first_cat == cat_num, "only_target", "other"),
         only_responded_with_target_category = as.factor(only_responded_with_target_category))

Print readable tables with kable()

head(d_anonymized_long_munged_clean) %>%
  select(subids, trial_num, condition, category, prop_sub, prop_bas, prop_sup) %>%
  kable()
subids trial_num condition category prop_sub prop_bas prop_sup
1 9 three_subordinate vehicles 1.0 0 0
2 9 three_basic animals 1.0 1 0
3 9 three_superordinate animals 0.5 1 1
4 9 three_superordinate vehicles 1.0 1 1
5 9 three_superordinate animals 1.0 1 1
6 9 three_subordinate vegetables 1.0 0 0

You can include r code in the text. E.g., there are 600 observations in this dataset. You can also make things bold, italic, etc. Markdown has its own syntax.

0.2 Analyze data

0.2.1 Overall

Get means and CIS

# Subject means
mss <- d_anonymized_long_munged_clean %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  filter(variable == "prop_bas") %>%
  group_by(condition, exp, subids) %>% 
  summarize(value = mean(value))

# condition means
ms <- mss %>%
  group_by(condition) %>% 
  langcog::multi_boot_standard(col = "value") 

Plot

ggplot(ms, 
       aes(x = condition, y = mean,  fill = condition)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position = position_dodge(width = .9)) +
  ylim(0, 1) +
  ylab("Prop Generalization to Basic Level") +
  xlab("Learning Condition") +
  ggtitle("Exp 1 results") +
  theme_bw(base_size = 14) + 
  theme(strip.text = element_text(size = 10),
        strip.background = element_rect(fill = "grey"),
        legend.position = "none") +
  ggthemes::scale_fill_solarized()

0.2.2 By category

Get means and CIS

# Subject means
mss_cat <- d_anonymized_long_munged_clean %>%
  gather(variable, value, c(prop_sub, prop_bas, prop_sup)) %>%
  filter(variable == "prop_bas") %>%
  group_by(condition, exp, subids, category) %>% 
  summarize(value = mean(value))

# condition means
ms_cat <- mss_cat %>%
  group_by(condition, category) %>% 
  langcog::multi_boot_standard(col = "value") 

Plot

ggplot(ms_cat, 
       aes(x = condition, y = mean,  fill = condition)) +
  geom_bar(position = "dodge", stat = "identity") +
  geom_linerange(aes(ymin = ci_lower, 
                     ymax = ci_upper), 
                 position = position_dodge(width = .9)) +
  facet_grid(. ~category) +
  ylim(0, 1) +
  ylab("Prop Generalization to Basic Level") +
  xlab("Learning Condition") +
  ggtitle("Exp 1 results by category") +
  theme_bw(base_size = 14) + 
  theme(strip.text = element_text(size = 10),
        strip.background = element_rect(fill = "grey"),
        legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1))+ 
  ggthemes::scale_fill_solarized()