library(pacman)
p_load(tidyverse, fixest, gtsummary, lme4, lmtest, sandwich, sjPlot)first steps with tail project
loading libraries
Reading in data
setwd("/Users/chapkovski/Downloads/Data Backup 2/RemadeDump")
fileNames<-list.files(path = ".", pattern = "all*")
raw_df <- do.call(rbind, lapply(fileNames, function(x) read.csv(file=x) ) )Cleaning data
# select only unique participant.code values
raw_df %>%
filter(!is.na(participant.label),
participant._index_in_pages==62,
session.is_demo==FALSE) %>%
distinct(participant.code, .keep_all = T) -> dfSplitting into demographic data and round-based data then converint round-based to long format
We split into three different tables 1. Individual (demographics) -vary on participant.code level 2. Round based - vary on participant/round level 3. Decision based - vary on participant/round/decision level
# let's first check that all endowments across rounds contain the same values for each participant
df %>% select(starts_with("bigrisk."), participant.code, tail=session.config.tail,
negative=session.config.negative) -> df_round
df_round %>% select(
ends_with('.endowment'),
participant.code) %>%
pivot_longer(
cols = contains('endowment'),
names_to = c("round_number", "round_item"), # Use a single character vector with two placeholders
names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>%
group_by(participant.code) %>%
dplyr::summarize(all_values_equal = min(value) == max(value)) %>% group_by(all_values_equal) %>% tally# A tibble: 1 × 2
all_values_equal n
<lgl> <int>
1 TRUE 471
# the answer is yes: all values are equal, so we can safely drop all but one column
df %>% select(starts_with("bigrisk."),
participant.code,
tail=session.config.tail,
negative=session.config.negative,
endowment=bigrisk.1.player.endowment,
) %>%
select(contains('num_tickets_'),
contains('ticket_price_'),
participant.code,
tail, negative, endowment) %>%
pivot_longer(
cols = contains('num_tickets_') | contains('ticket_price_'),
names_to = c("round_number", "round_item"),
names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>%
separate(round_item, into = c("item", "number"), sep = "_(?=[^_]+$)") -> df_decision_long
df_decision_num_tickets <- df_decision_long %>%
filter(item=="num_tickets") %>%
select(-item) %>%
rename(num_tickets=value)
df_decision_ticket_price <- df_decision_long %>%
filter(item=="ticket_price") %>%
select(-c(item, negative, endowment, tail)) %>%
rename(ticket_price=value)
df_decision_num_tickets %>%
left_join(df_decision_ticket_price, by=c("participant.code", "round_number", "number")) %>%
mutate(total_expenditure=num_tickets*ticket_price,
relative_expenditure=total_expenditure/endowment) %>%
rename(decision_number=number) -> df_decision_long
# now let's retrieve personal_outcome and intermediary_payoff to a separate tibble and then attach them to df_decision
df %>%
select(
participant.code,
contains('personal_outcome'),
ends_with('.player.intermediary_payoff'),
) %>%
pivot_longer(
cols = contains('personal_outcome') | ends_with('.player.intermediary_payoff'),
names_to = c("round_number", "round_item"),
names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>%
pivot_wider(names_from = round_item, values_from = value) -> df_round_long
df_decision_long %>%
left_join(df_round_long, by=c("participant.code", "round_number")) -> df_decision_long
# now let's clean participant-level df before joining in with df_decision_long
df %>% select(!starts_with("bigrisk."), participant.code) %>%
select(participant.code, final_payoff=participant.payoff, session_code = session.code, session.is_demo, starts_with('q.')) %>%
rename_with(~ str_extract(., "[^.]+$")) %>%
rename(participant.code=code) %>%
select(-c(id_in_group, role, payoff, id_in_subsession, round_number, education)) -> individual_df
# now let's join in the individual-level data
df_decision_long %>%
mutate(round_number=as.numeric(round_number)) %>%
left_join(individual_df, by="participant.code") -> df_decision_long
df_decision_long %>% write_csv("df_decision_long.csv")Some regressions now
I use random effects for participants. It seems that using individual participant codes straigh in the model leads to model overfitting.
# random effects:
# Basic model with random intercepts for participants
m1 <- df_decision_long %>%
lmer(relative_expenditure ~ tail * negative + round_number +
(1 | participant.code), data = .)
# let's add demographic
m2 <- df_decision_long %>%
lmer(relative_expenditure ~ tail * negative + age+gender+marital+employment+income+round_number +
(1 | participant.code), data = .)
tab_model(m1, m2, title="DV: relative expenditure")| relative_expenditure | relative_expenditure | |||||
|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 0.54 | 0.51 – 0.58 | <0.001 | 0.28 | -0.19 – 0.76 | 0.243 |
| tail | 0.01 | -0.05 – 0.06 | 0.834 | -0.00 | -0.06 – 0.05 | 0.859 |
| negative | -0.17 | -0.21 – -0.12 | <0.001 | -0.14 | -0.19 – -0.09 | <0.001 |
| round number | -0.00 | -0.01 – -0.00 | <0.001 | -0.00 | -0.01 – -0.00 | <0.001 |
| tail × negative | 0.00 | -0.07 – 0.07 | 0.982 | -0.00 | -0.08 – 0.08 | 0.982 |
| age [25 - 34] | -0.03 | -0.08 – 0.03 | 0.319 | |||
| age [35 - 44] | 0.20 | 0.05 – 0.34 | 0.008 | |||
| age [45 - 54] | -0.13 | -0.52 – 0.26 | 0.524 | |||
| gender [Không muốn tiết lộ] |
0.39 | -0.05 – 0.82 | 0.082 | |||
| gender [Nam] | 0.27 | -0.11 – 0.65 | 0.168 | |||
| gender [Nữ] | 0.36 | -0.02 – 0.74 | 0.063 | |||
| marital [Độc thân (chưa kết hôn)] |
-0.05 | -0.13 – 0.03 | 0.244 | |||
| marital [Ly dị] | -0.17 | -0.40 – 0.07 | 0.167 | |||
| marital [Ly thân] | -0.14 | -0.52 – 0.25 | 0.494 | |||
| employment [Có công việc toàn thời gian (35 giờ làm mỗi tuần)] |
-0.04 | -0.10 – 0.01 | 0.134 | |||
| employment [Học sinh] | 0.02 | -0.03 – 0.07 | 0.458 | |||
| employment [Thất nghiệp và hiện tại đang KHÔNG tìm việc] |
0.01 | -0.15 – 0.17 | 0.920 | |||
| employment [Thất nghiệp và hiện tại đang tìm việc] |
0.00 | -0.11 – 0.12 | 0.943 | |||
| employment [Tự kinh doanh] |
-0.05 | -0.28 – 0.18 | 0.671 | |||
| income [1,875,000,000 VND - 2,500,000,000 VND] |
-0.12 | -0.44 – 0.20 | 0.469 | |||
| income [125,000,000 VND - 250,000,000 VND] |
-0.02 | -0.29 – 0.25 | 0.882 | |||
| income [2,500,000,000 VND - 3,750,000,000 VND] |
0.08 | -0.24 – 0.40 | 0.615 | |||
| income [25,000,000 VND - 50,000,000 VND] |
-0.01 | -0.28 – 0.26 | 0.944 | |||
| income [250,000,000 VND - 625,000,000 VND] |
-0.02 | -0.29 – 0.26 | 0.907 | |||
| income [50,000,000 VND - 125,000,000 VND] |
-0.03 | -0.31 – 0.24 | 0.810 | |||
| income [625,000,000 VND - 1,250,000,000 VND] |
-0.03 | -0.31 – 0.25 | 0.844 | |||
| income [hơn 3,750,000,000 VND] |
-0.21 | -0.55 – 0.14 | 0.239 | |||
| income [Không muốn tiết lộ] |
-0.03 | -0.30 – 0.24 | 0.835 | |||
| Random Effects | ||||||
| σ2 | 0.07 | 0.07 | ||||
| τ00 | 0.04 participant.code | 0.04 participant.code | ||||
| ICC | 0.33 | 0.32 | ||||
| N | 471 participant.code | 471 participant.code | ||||
| Observations | 23550 | 23550 | ||||
| Marginal R2 / Conditional R2 | 0.058 / 0.373 | 0.091 / 0.382 | ||||
# random effects:
# Basic model with random intercepts for participants
m1 <- df_decision_long %>%
lmer(total_expenditure ~ tail * negative + round_number +
(1 | participant.code), data = .)
# let's add demographic
m2 <- df_decision_long %>%
lmer(total_expenditure ~ tail * negative + age+gender+marital+employment+income+round_number +
(1 | participant.code), data = .)
tab_model(m1, m2, title="DV: total expenditure")| total_expenditure | total_expenditure | |||||
|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 5.72 | 4.44 – 7.00 | <0.001 | 8.88 | -8.71 – 26.48 | 0.322 |
| tail | 0.06 | -1.91 – 2.03 | 0.955 | -0.16 | -2.18 – 1.86 | 0.876 |
| negative | 12.48 | 10.71 – 14.24 | <0.001 | 13.34 | 11.48 – 15.20 | <0.001 |
| round number | -0.10 | -0.14 – -0.05 | <0.001 | -0.10 | -0.14 – -0.05 | <0.001 |
| tail × negative | 0.27 | -2.39 – 2.93 | 0.841 | 0.01 | -2.79 – 2.82 | 0.992 |
| age [25 - 34] | 0.00 | -2.04 – 2.05 | 0.999 | |||
| age [35 - 44] | 11.18 | 5.88 – 16.48 | <0.001 | |||
| age [45 - 54] | -6.25 | -20.67 – 8.18 | 0.396 | |||
| gender [Không muốn tiết lộ] |
3.44 | -12.66 – 19.54 | 0.675 | |||
| gender [Nam] | 1.74 | -12.28 – 15.76 | 0.808 | |||
| gender [Nữ] | 4.01 | -9.98 – 18.00 | 0.574 | |||
| marital [Độc thân (chưa kết hôn)] |
-0.62 | -3.68 – 2.45 | 0.694 | |||
| marital [Ly dị] | -9.15 | -17.89 – -0.41 | 0.040 | |||
| marital [Ly thân] | -8.06 | -22.41 – 6.28 | 0.271 | |||
| employment [Có công việc toàn thời gian (35 giờ làm mỗi tuần)] |
-1.33 | -3.45 – 0.79 | 0.220 | |||
| employment [Học sinh] | 0.36 | -1.51 – 2.23 | 0.709 | |||
| employment [Thất nghiệp và hiện tại đang KHÔNG tìm việc] |
-0.86 | -6.81 – 5.08 | 0.776 | |||
| employment [Thất nghiệp và hiện tại đang tìm việc] |
-0.25 | -4.55 – 4.06 | 0.910 | |||
| employment [Tự kinh doanh] |
-3.95 | -12.52 – 4.63 | 0.367 | |||
| income [1,875,000,000 VND - 2,500,000,000 VND] |
-8.19 | -19.93 – 3.55 | 0.172 | |||
| income [125,000,000 VND - 250,000,000 VND] |
-6.64 | -16.71 – 3.43 | 0.196 | |||
| income [2,500,000,000 VND - 3,750,000,000 VND] |
-0.83 | -12.54 – 10.88 | 0.889 | |||
| income [25,000,000 VND - 50,000,000 VND] |
-4.84 | -14.93 – 5.24 | 0.347 | |||
| income [250,000,000 VND - 625,000,000 VND] |
-5.75 | -15.80 – 4.30 | 0.262 | |||
| income [50,000,000 VND - 125,000,000 VND] |
-6.80 | -16.89 – 3.30 | 0.187 | |||
| income [625,000,000 VND - 1,250,000,000 VND] |
-6.82 | -17.26 – 3.62 | 0.200 | |||
| income [hơn 3,750,000,000 VND] |
-14.68 | -27.45 – -1.92 | 0.024 | |||
| income [Không muốn tiết lộ] |
-5.96 | -15.86 – 3.94 | 0.238 | |||
| Random Effects | ||||||
| σ2 | 92.99 | 92.99 | ||||
| τ00 | 50.92 participant.code | 48.30 participant.code | ||||
| ICC | 0.35 | 0.34 | ||||
| N | 471 participant.code | 471 participant.code | ||||
| Observations | 23550 | 23550 | ||||
| Marginal R2 / Conditional R2 | 0.216 / 0.493 | 0.240 / 0.500 | ||||
let’s check the number of tickets
# random effects:
# Basic model with random intercepts for participants
m1 <- df_decision_long %>%
lmer(num_tickets ~ tail * negative + round_number +
(1 | participant.code), data = .)
# let's add demographic
m2 <- df_decision_long %>%
lmer(num_tickets ~ tail * negative + age+gender+marital+employment+income+round_number + (1 | participant.code), data = .)
tab_model(m1, m2, title="DV: number of tickets purchased")| num_tickets | num_tickets | |||||
|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 7.88 | 5.11 – 10.66 | <0.001 | 26.05 | -7.81 – 59.92 | 0.132 |
| tail | 0.77 | -2.99 – 4.52 | 0.689 | 0.05 | -3.84 – 3.94 | 0.980 |
| negative | 8.53 | 5.17 – 11.89 | <0.001 | 10.44 | 6.86 – 14.02 | <0.001 |
| round number | -0.09 | -0.35 – 0.16 | 0.469 | -0.09 | -0.35 – 0.16 | 0.469 |
| tail × negative | -1.23 | -6.30 – 3.84 | 0.634 | -0.60 | -6.00 – 4.80 | 0.827 |
| age [25 - 34] | -0.57 | -4.50 – 3.36 | 0.777 | |||
| age [35 - 44] | 8.40 | -1.80 – 18.59 | 0.106 | |||
| age [45 - 54] | -15.07 | -42.82 – 12.68 | 0.287 | |||
| gender [Không muốn tiết lộ] |
7.98 | -22.99 – 38.94 | 0.614 | |||
| gender [Nam] | 7.25 | -19.72 – 34.22 | 0.598 | |||
| gender [Nữ] | 7.37 | -19.54 – 34.28 | 0.592 | |||
| marital [Độc thân (chưa kết hôn)] |
-5.25 | -11.15 – 0.65 | 0.081 | |||
| marital [Ly dị] | -11.32 | -28.14 – 5.49 | 0.187 | |||
| marital [Ly thân] | -13.13 | -40.73 – 14.47 | 0.351 | |||
| employment [Có công việc toàn thời gian (35 giờ làm mỗi tuần)] |
-3.02 | -7.11 – 1.06 | 0.147 | |||
| employment [Học sinh] | 3.11 | -0.49 – 6.70 | 0.090 | |||
| employment [Thất nghiệp và hiện tại đang KHÔNG tìm việc] |
-1.78 | -13.22 – 9.66 | 0.761 | |||
| employment [Thất nghiệp và hiện tại đang tìm việc] |
0.59 | -7.69 – 8.87 | 0.889 | |||
| employment [Tự kinh doanh] |
-8.72 | -25.22 – 7.78 | 0.300 | |||
| income [1,875,000,000 VND - 2,500,000,000 VND] |
-26.10 | -48.69 – -3.52 | 0.023 | |||
| income [125,000,000 VND - 250,000,000 VND] |
-20.36 | -39.73 – -0.98 | 0.039 | |||
| income [2,500,000,000 VND - 3,750,000,000 VND] |
-21.71 | -44.24 – 0.82 | 0.059 | |||
| income [25,000,000 VND - 50,000,000 VND] |
-15.80 | -35.21 – 3.60 | 0.110 | |||
| income [250,000,000 VND - 625,000,000 VND] |
-21.58 | -40.91 – -2.25 | 0.029 | |||
| income [50,000,000 VND - 125,000,000 VND] |
-22.33 | -41.75 – -2.91 | 0.024 | |||
| income [625,000,000 VND - 1,250,000,000 VND] |
-21.44 | -41.52 – -1.35 | 0.036 | |||
| income [hơn 3,750,000,000 VND] |
-32.37 | -56.93 – -7.81 | 0.010 | |||
| income [Không muốn tiết lộ] |
-22.27 | -41.31 – -3.23 | 0.022 | |||
| Random Effects | ||||||
| σ2 | 3272.89 | 3272.89 | ||||
| τ00 | 126.10 participant.code | 120.14 participant.code | ||||
| ICC | 0.04 | 0.04 | ||||
| N | 471 participant.code | 471 participant.code | ||||
| Observations | 23550 | 23550 | ||||
| Marginal R2 / Conditional R2 | 0.005 / 0.042 | 0.009 / 0.044 | ||||