Get the data

We set the working directory to where your mock data is located.

Then we bind all datasets together into one.

setwd('~/downloads/mock_data')

dependent  <- read_csv('dependent.csv')
independent <- read_csv('independent.csv')
no_reward <- read_csv('no_reward.csv')
solo_reasoning <- read_csv('solo_reasoning.csv')
all_dfs <- list(dependent, independent, no_reward, solo_reasoning)
full_df <- bind_rows(all_dfs)

Data renaming

cols_to_keep <- list(
  'participant' = 'participant.code',
  'session' = 'session.code',
  'first_answer' = 'first.1.player.answer',
  'correct_answer' = 'first.1.subsession.correct',
  'id_in_group' = 'second.1.player.id_in_group',
  'treatment' = 'second.1.player.treatment',
  'group_id' = 'second.1.group.id_in_subsession',
  'second_answer' = 'second.1.player.answer'
)

cleaned_df <- full_df %>%
  select(!!!cols_to_keep) %>%
  mutate(unique_group_id = glue('{session}_{group_id}')) %>%
  arrange(treatment, session)

Data casting (pivoting wider)

Apart from pivoting, we also categorize the answers based on their changes in position.

PS: this typology is based on the fact that the mock data all use the same vignette where correct answer is \(1\). for the real data this one should be taken from correct_answer variable.

chat_treatments <- c('dependent', 'independent')


wider_df <- cleaned_df %>%
  select(-participant) %>%
  pivot_wider(names_from = id_in_group,
              values_from = c(first_answer, second_answer)) %>%
  mutate(
    typology = case_when(
      treatment %in% chat_treatments &
        second_answer_1 + second_answer_2 == 2 ~ 'Both changed to correct',
      treatment %in% chat_treatments &
        second_answer_1 + second_answer_2 == 0 ~ 'Both changed to incorrect',
      treatment %in% chat_treatments &
        second_answer_1 == 0 &
        second_answer_2 == 1 ~ 'Both kept old positions',
      treatment %in% chat_treatments &
        second_answer_1 == 1 & second_answer_2 == 0 ~ 'Position interchange',
      !(treatment %in% chat_treatments)  &
        first_answer_1 == second_answer_1 ~ 'Single old position kept',
      !(treatment %in% chat_treatments)  &
        first_answer_1 != second_answer_1 ~ 'Single position changed'
    )
  )

Plotting

bar_fun <- function(df) {
  df %>%
    group_by(treatment, typology) %>%
    tally() %>%
    group_by(treatment) %>%
    mutate(freq = n / sum(n)) %>%
    ggplot(aes(x = typology, y = freq, fill=freq)) + geom_bar(stat = 'identity',show.legend = FALSE) +
    facet_wrap(~ treatment) +
    theme(axis.text.x = element_text(angle = 90)) +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
    geom_text(
      aes(label = percent_format(accuracy = 1)(freq)),
      position = position_dodge(width = 0.9),
      vjust = -0.25
    )+
    ylab('Share of participants')+
    ylim(0,1)
  
}
bar_fun(wider_df%>% filter((treatment %in%chat_treatments)))

bar_fun(wider_df%>% filter(!(treatment %in%chat_treatments)))

Some tables

  wider_df %>%
    group_by(treatment, typology) %>%
    tally() %>%
    group_by(treatment) %>%
    mutate(freq = percent_format(accuracy = 1)(n / sum(n))) %>% 
    select(-n)%>%
    
    pivot_wider(names_from = treatment, values_from = freq)%>%
    
    kbl()%>%kable_material()
typology dependent independent no_reward solo_reasoning
Both changed to correct 58% 37% NA NA
Both changed to incorrect 16% 16% NA NA
Both kept old positions 15% 42% NA NA
Position interchange 11% 4% NA NA
Single old position kept NA NA 82% 68%
Single position changed NA NA 18% 32%