Socrates - mock data analysis

Get the data

We set the working directory to where your mock data is located.

Then we bind all datasets together into one.

setwd('~/downloads/mock_data')

dependent  <- read_csv('dependent.csv')
independent <- read_csv('independent.csv')
no_reward <- read_csv('no_reward.csv')
solo_reasoning <- read_csv('solo_reasoning.csv')
all_dfs <- list(dependent, independent, no_reward, solo_reasoning)
full_df <- bind_rows(all_dfs)

Data renaming

cols_to_keep <- list(
  'participant' = 'participant.code',
  'session' = 'session.code',
  'first_answer' = 'first.1.player.answer',
  'correct_answer' = 'first.1.subsession.correct',
  'id_in_group' = 'second.1.player.id_in_group',
  'treatment' = 'second.1.player.treatment',
  'group_id' = 'second.1.group.id_in_subsession',
  'second_answer' = 'second.1.player.answer'
)

cleaned_df <- full_df %>%
  select(!!!cols_to_keep) %>%
  mutate(unique_group_id = glue('{session}_{group_id}')) %>%
  arrange(treatment, session)

Data casting (pivoting wider)

Apart from pivoting, we also categorize the answers based on their changes in position.

PS: this typology is based on the fact that the mock data all use the same vignette where correct answer is \(1\). for the real data this one should be taken from correct_answer variable.

chat_treatments <- c('dependent', 'independent')


wider_df <- cleaned_df %>%
  select(-participant) %>%
  pivot_wider(names_from = id_in_group,
              values_from = c(first_answer, second_answer)) %>%
  mutate(
    typology = case_when(
      treatment %in% chat_treatments &
        second_answer_1 + second_answer_2 == 2 ~ 'Both changed to correct',
      treatment %in% chat_treatments &
        second_answer_1 + second_answer_2 == 0 ~ 'Both changed to incorrect',
      treatment %in% chat_treatments &
        second_answer_1 == 0 &
        second_answer_2 == 1 ~ 'Both kept old positions',
      treatment %in% chat_treatments &
        second_answer_1 == 1 & second_answer_2 == 0 ~ 'Position interchange',
      !(treatment %in% chat_treatments)  &
        first_answer_1 == second_answer_1 ~ 'Single old position kept',
      !(treatment %in% chat_treatments)  &
        first_answer_1 != second_answer_1 ~ 'Single position changed'
    )
  )

Plotting

bar_fun <- function(df) {
  df %>%
    group_by(treatment, typology) %>%
    tally() %>%
    group_by(treatment) %>%
    mutate(freq = n / sum(n)) %>%
    ggplot(aes(x = typology, y = freq, fill=freq)) + geom_bar(stat = 'identity',show.legend = FALSE) +
    facet_wrap(~ treatment) +
    theme(axis.text.x = element_text(angle = 90)) +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
    geom_text(
      aes(label = percent_format(accuracy = 1)(freq)),
      position = position_dodge(width = 0.9),
      vjust = -0.25
    )+
    ylab('Share of participants')+
    ylim(0,1)
  
}
bar_fun(wider_df%>% filter((treatment %in%chat_treatments)))

bar_fun(wider_df%>% filter(!(treatment %in%chat_treatments)))

Some tables

  wider_df %>%
    group_by(treatment, typology) %>%
    tally() %>%
    group_by(treatment) %>%
    mutate(freq = percent_format(accuracy = 1)(n / sum(n))) %>% 
    select(-n)%>%
    
    pivot_wider(names_from = treatment, values_from = freq)%>%
    
    kbl()%>%kable_material()

typology	dependent	independent	no_reward	solo_reasoning
Both changed to correct	58%	37%	NA	NA
Both changed to incorrect	16%	16%	NA	NA
Both kept old positions	15%	42%	NA	NA
Position interchange	11%	4%	NA	NA
Single old position kept	NA	NA	82%	68%
Single position changed	NA	NA	18%	32%