We set the working directory to where your mock data is located.
Then we bind all datasets together into one.
setwd('~/downloads/mock_data')
dependent <- read_csv('dependent.csv')
independent <- read_csv('independent.csv')
no_reward <- read_csv('no_reward.csv')
solo_reasoning <- read_csv('solo_reasoning.csv')
all_dfs <- list(dependent, independent, no_reward, solo_reasoning)
full_df <- bind_rows(all_dfs)
cols_to_keep <- list(
'participant' = 'participant.code',
'session' = 'session.code',
'first_answer' = 'first.1.player.answer',
'correct_answer' = 'first.1.subsession.correct',
'id_in_group' = 'second.1.player.id_in_group',
'treatment' = 'second.1.player.treatment',
'group_id' = 'second.1.group.id_in_subsession',
'second_answer' = 'second.1.player.answer'
)
cleaned_df <- full_df %>%
select(!!!cols_to_keep) %>%
mutate(unique_group_id = glue('{session}_{group_id}')) %>%
arrange(treatment, session)
Apart from pivoting, we also categorize the answers based on their changes in position.
PS: this typology is based on the fact that the mock data all use the same vignette where correct answer is \(1\). for the real data this one should be taken from correct_answer variable.
chat_treatments <- c('dependent', 'independent')
wider_df <- cleaned_df %>%
select(-participant) %>%
pivot_wider(names_from = id_in_group,
values_from = c(first_answer, second_answer)) %>%
mutate(
typology = case_when(
treatment %in% chat_treatments &
second_answer_1 + second_answer_2 == 2 ~ 'Both changed to correct',
treatment %in% chat_treatments &
second_answer_1 + second_answer_2 == 0 ~ 'Both changed to incorrect',
treatment %in% chat_treatments &
second_answer_1 == 0 &
second_answer_2 == 1 ~ 'Both kept old positions',
treatment %in% chat_treatments &
second_answer_1 == 1 & second_answer_2 == 0 ~ 'Position interchange',
!(treatment %in% chat_treatments) &
first_answer_1 == second_answer_1 ~ 'Single old position kept',
!(treatment %in% chat_treatments) &
first_answer_1 != second_answer_1 ~ 'Single position changed'
)
)
bar_fun <- function(df) {
df %>%
group_by(treatment, typology) %>%
tally() %>%
group_by(treatment) %>%
mutate(freq = n / sum(n)) %>%
ggplot(aes(x = typology, y = freq, fill=freq)) + geom_bar(stat = 'identity',show.legend = FALSE) +
facet_wrap(~ treatment) +
theme(axis.text.x = element_text(angle = 90)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
geom_text(
aes(label = percent_format(accuracy = 1)(freq)),
position = position_dodge(width = 0.9),
vjust = -0.25
)+
ylab('Share of participants')+
ylim(0,1)
}
bar_fun(wider_df%>% filter((treatment %in%chat_treatments)))
bar_fun(wider_df%>% filter(!(treatment %in%chat_treatments)))
wider_df %>%
group_by(treatment, typology) %>%
tally() %>%
group_by(treatment) %>%
mutate(freq = percent_format(accuracy = 1)(n / sum(n))) %>%
select(-n)%>%
pivot_wider(names_from = treatment, values_from = freq)%>%
kbl()%>%kable_material()
| typology | dependent | independent | no_reward | solo_reasoning |
|---|---|---|---|---|
| Both changed to correct | 58% | 37% | NA | NA |
| Both changed to incorrect | 16% | 16% | NA | NA |
| Both kept old positions | 15% | 42% | NA | NA |
| Position interchange | 11% | 4% | NA | NA |
| Single old position kept | NA | NA | 82% | 68% |
| Single position changed | NA | NA | 18% | 32% |