first steps with tail project

loading libraries

library(pacman)
p_load(tidyverse, fixest, gtsummary, lme4, lmtest, sandwich, sjPlot)

Reading in data

setwd("/Users/chapkovski/Downloads/Data Backup 2/RemadeDump")

fileNames<-list.files(path = ".", pattern = "all*")
raw_df <- do.call(rbind, lapply(fileNames, function(x) read.csv(file=x) ) )

Cleaning data

# select only unique participant.code values
raw_df %>% 
  filter(!is.na(participant.label),
         participant._index_in_pages==62,
         session.is_demo==FALSE) %>% 
  distinct(participant.code, .keep_all = T) -> df

Splitting into demographic data and round-based data then converint round-based to long format

We split into three different tables 1. Individual (demographics) -vary on participant.code level 2. Round based - vary on participant/round level 3. Decision based - vary on participant/round/decision level

# let's first check that all endowments across rounds contain the same values for each participant

df %>% select(starts_with("bigrisk."), participant.code, tail=session.config.tail,
              negative=session.config.negative) -> df_round
df_round %>% select(
                    ends_with('.endowment'),
                    participant.code) %>%
  pivot_longer(
               cols = contains('endowment'),
               names_to = c("round_number", "round_item"),  # Use a single character vector with two placeholders
               names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>% 
  group_by(participant.code) %>%
  dplyr::summarize(all_values_equal = min(value) == max(value)) %>% group_by(all_values_equal) %>% tally
# A tibble: 1 × 2
  all_values_equal     n
  <lgl>            <int>
1 TRUE               471
# the answer is yes: all values are equal, so we can safely drop all but one column


df %>% select(starts_with("bigrisk."), 
              participant.code,
              tail=session.config.tail,
              negative=session.config.negative,
              endowment=bigrisk.1.player.endowment,
              ) %>% 
  select(contains('num_tickets_'), 
                    contains('ticket_price_'), 
                    participant.code,
         tail, negative, endowment) %>%
  pivot_longer(
               cols = contains('num_tickets_') | contains('ticket_price_'),
               names_to = c("round_number", "round_item"), 
               names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>% 
  separate(round_item, into = c("item", "number"), sep = "_(?=[^_]+$)") -> df_decision_long

df_decision_num_tickets <- df_decision_long %>% 
  filter(item=="num_tickets") %>%
  select(-item) %>%
  rename(num_tickets=value)

df_decision_ticket_price <- df_decision_long %>%
  filter(item=="ticket_price") %>% 
  select(-c(item, negative, endowment, tail)) %>% 
  rename(ticket_price=value)

df_decision_num_tickets %>% 
  left_join(df_decision_ticket_price, by=c("participant.code", "round_number", "number")) %>% 
  mutate(total_expenditure=num_tickets*ticket_price, 
         relative_expenditure=total_expenditure/endowment) %>% 
  rename(decision_number=number)  -> df_decision_long

# now let's retrieve personal_outcome and intermediary_payoff to a separate tibble and then attach them to df_decision

df %>%
  select(
    participant.code,
    contains('personal_outcome'),
    ends_with('.player.intermediary_payoff'),
  ) %>% 
  pivot_longer(
    cols = contains('personal_outcome') | ends_with('.player.intermediary_payoff'),
    names_to = c("round_number", "round_item"),  
    names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>% 
  pivot_wider(names_from = round_item, values_from = value) -> df_round_long


df_decision_long %>% 
  left_join(df_round_long, by=c("participant.code", "round_number")) -> df_decision_long

# now let's clean participant-level df before joining in with df_decision_long
df %>% select(!starts_with("bigrisk."), participant.code) %>%  
  select(participant.code, final_payoff=participant.payoff, session_code = session.code, session.is_demo, starts_with('q.')) %>% 
  rename_with(~ str_extract(., "[^.]+$")) %>% 
  rename(participant.code=code) %>% 
  select(-c(id_in_group, role, payoff, id_in_subsession, round_number, education))  -> individual_df

# now let's join in the individual-level data
df_decision_long %>% 
  mutate(round_number=as.numeric(round_number)) %>% 
  left_join(individual_df, by="participant.code") -> df_decision_long

df_decision_long %>% write_csv("df_decision_long.csv")

Some regressions now

I use random effects for participants. It seems that using individual participant codes straigh in the model leads to model overfitting.

# random effects:

# Basic model with random intercepts for participants
m1 <- df_decision_long %>% 
  lmer(relative_expenditure ~ tail * negative + round_number + 
           (1 | participant.code), data = .)

# let's add demographic
m2 <- df_decision_long %>% 
  lmer(relative_expenditure ~ tail * negative + age+gender+marital+employment+income+round_number + 
           (1 | participant.code), data = .)
tab_model(m1, m2, title="DV: relative expenditure")
DV: relative expenditure
  relative_expenditure relative_expenditure
Predictors Estimates CI p Estimates CI p
(Intercept) 0.54 0.51 – 0.58 <0.001 0.28 -0.19 – 0.76 0.243
tail 0.01 -0.05 – 0.06 0.834 -0.00 -0.06 – 0.05 0.859
negative -0.17 -0.21 – -0.12 <0.001 -0.14 -0.19 – -0.09 <0.001
round number -0.00 -0.01 – -0.00 <0.001 -0.00 -0.01 – -0.00 <0.001
tail × negative 0.00 -0.07 – 0.07 0.982 -0.00 -0.08 – 0.08 0.982
age [25 - 34] -0.03 -0.08 – 0.03 0.319
age [35 - 44] 0.20 0.05 – 0.34 0.008
age [45 - 54] -0.13 -0.52 – 0.26 0.524
gender [Không muốn tiết
lộ]
0.39 -0.05 – 0.82 0.082
gender [Nam] 0.27 -0.11 – 0.65 0.168
gender [Nữ] 0.36 -0.02 – 0.74 0.063
marital [Độc thân (chưa
kết hôn)]
-0.05 -0.13 – 0.03 0.244
marital [Ly dị] -0.17 -0.40 – 0.07 0.167
marital [Ly thân] -0.14 -0.52 – 0.25 0.494
employment [Có công việc
toàn thời gian (35 giờ
làm mỗi tuần)]
-0.04 -0.10 – 0.01 0.134
employment [Học sinh] 0.02 -0.03 – 0.07 0.458
employment [Thất nghiệp
và hiện tại đang KHÔNG
tìm việc]
0.01 -0.15 – 0.17 0.920
employment [Thất nghiệp
và hiện tại đang tìm
việc]
0.00 -0.11 – 0.12 0.943
employment [Tự kinh
doanh]
-0.05 -0.28 – 0.18 0.671
income [1,875,000,000 VND
- 2,500,000,000 VND]
-0.12 -0.44 – 0.20 0.469
income [125,000,000 VND -
250,000,000 VND]
-0.02 -0.29 – 0.25 0.882
income [2,500,000,000 VND
- 3,750,000,000 VND]
0.08 -0.24 – 0.40 0.615
income [25,000,000 VND -
50,000,000 VND]
-0.01 -0.28 – 0.26 0.944
income [250,000,000 VND -
625,000,000 VND]
-0.02 -0.29 – 0.26 0.907
income [50,000,000 VND -
125,000,000 VND]
-0.03 -0.31 – 0.24 0.810
income [625,000,000 VND -
1,250,000,000 VND]
-0.03 -0.31 – 0.25 0.844
income [hơn 3,750,000,000
VND]
-0.21 -0.55 – 0.14 0.239
income [Không muốn tiết
lộ]
-0.03 -0.30 – 0.24 0.835
Random Effects
σ2 0.07 0.07
τ00 0.04 participant.code 0.04 participant.code
ICC 0.33 0.32
N 471 participant.code 471 participant.code
Observations 23550 23550
Marginal R2 / Conditional R2 0.058 / 0.373 0.091 / 0.382
# random effects:

# Basic model with random intercepts for participants
m1 <- df_decision_long %>% 
  lmer(total_expenditure ~ tail * negative + round_number +
           (1 | participant.code), data = .)

# let's add demographic
m2 <- df_decision_long %>% 
  lmer(total_expenditure ~ tail * negative + age+gender+marital+employment+income+round_number + 
           (1 | participant.code), data = .)
tab_model(m1, m2, title="DV: total expenditure")
DV: total expenditure
  total_expenditure total_expenditure
Predictors Estimates CI p Estimates CI p
(Intercept) 5.72 4.44 – 7.00 <0.001 8.88 -8.71 – 26.48 0.322
tail 0.06 -1.91 – 2.03 0.955 -0.16 -2.18 – 1.86 0.876
negative 12.48 10.71 – 14.24 <0.001 13.34 11.48 – 15.20 <0.001
round number -0.10 -0.14 – -0.05 <0.001 -0.10 -0.14 – -0.05 <0.001
tail × negative 0.27 -2.39 – 2.93 0.841 0.01 -2.79 – 2.82 0.992
age [25 - 34] 0.00 -2.04 – 2.05 0.999
age [35 - 44] 11.18 5.88 – 16.48 <0.001
age [45 - 54] -6.25 -20.67 – 8.18 0.396
gender [Không muốn tiết
lộ]
3.44 -12.66 – 19.54 0.675
gender [Nam] 1.74 -12.28 – 15.76 0.808
gender [Nữ] 4.01 -9.98 – 18.00 0.574
marital [Độc thân (chưa
kết hôn)]
-0.62 -3.68 – 2.45 0.694
marital [Ly dị] -9.15 -17.89 – -0.41 0.040
marital [Ly thân] -8.06 -22.41 – 6.28 0.271
employment [Có công việc
toàn thời gian (35 giờ
làm mỗi tuần)]
-1.33 -3.45 – 0.79 0.220
employment [Học sinh] 0.36 -1.51 – 2.23 0.709
employment [Thất nghiệp
và hiện tại đang KHÔNG
tìm việc]
-0.86 -6.81 – 5.08 0.776
employment [Thất nghiệp
và hiện tại đang tìm
việc]
-0.25 -4.55 – 4.06 0.910
employment [Tự kinh
doanh]
-3.95 -12.52 – 4.63 0.367
income [1,875,000,000 VND
- 2,500,000,000 VND]
-8.19 -19.93 – 3.55 0.172
income [125,000,000 VND -
250,000,000 VND]
-6.64 -16.71 – 3.43 0.196
income [2,500,000,000 VND
- 3,750,000,000 VND]
-0.83 -12.54 – 10.88 0.889
income [25,000,000 VND -
50,000,000 VND]
-4.84 -14.93 – 5.24 0.347
income [250,000,000 VND -
625,000,000 VND]
-5.75 -15.80 – 4.30 0.262
income [50,000,000 VND -
125,000,000 VND]
-6.80 -16.89 – 3.30 0.187
income [625,000,000 VND -
1,250,000,000 VND]
-6.82 -17.26 – 3.62 0.200
income [hơn 3,750,000,000
VND]
-14.68 -27.45 – -1.92 0.024
income [Không muốn tiết
lộ]
-5.96 -15.86 – 3.94 0.238
Random Effects
σ2 92.99 92.99
τ00 50.92 participant.code 48.30 participant.code
ICC 0.35 0.34
N 471 participant.code 471 participant.code
Observations 23550 23550
Marginal R2 / Conditional R2 0.216 / 0.493 0.240 / 0.500

let’s check the number of tickets

# random effects:

# Basic model with random intercepts for participants
m1 <- df_decision_long %>% 
  lmer(num_tickets ~ tail * negative + round_number + 
           (1 | participant.code), data = .)

# let's add demographic
m2 <- df_decision_long %>% 
  lmer(num_tickets ~ tail * negative + age+gender+marital+employment+income+round_number +  (1 | participant.code), data = .)
tab_model(m1, m2, title="DV: number of tickets purchased")
DV: number of tickets purchased
  num_tickets num_tickets
Predictors Estimates CI p Estimates CI p
(Intercept) 7.88 5.11 – 10.66 <0.001 26.05 -7.81 – 59.92 0.132
tail 0.77 -2.99 – 4.52 0.689 0.05 -3.84 – 3.94 0.980
negative 8.53 5.17 – 11.89 <0.001 10.44 6.86 – 14.02 <0.001
round number -0.09 -0.35 – 0.16 0.469 -0.09 -0.35 – 0.16 0.469
tail × negative -1.23 -6.30 – 3.84 0.634 -0.60 -6.00 – 4.80 0.827
age [25 - 34] -0.57 -4.50 – 3.36 0.777
age [35 - 44] 8.40 -1.80 – 18.59 0.106
age [45 - 54] -15.07 -42.82 – 12.68 0.287
gender [Không muốn tiết
lộ]
7.98 -22.99 – 38.94 0.614
gender [Nam] 7.25 -19.72 – 34.22 0.598
gender [Nữ] 7.37 -19.54 – 34.28 0.592
marital [Độc thân (chưa
kết hôn)]
-5.25 -11.15 – 0.65 0.081
marital [Ly dị] -11.32 -28.14 – 5.49 0.187
marital [Ly thân] -13.13 -40.73 – 14.47 0.351
employment [Có công việc
toàn thời gian (35 giờ
làm mỗi tuần)]
-3.02 -7.11 – 1.06 0.147
employment [Học sinh] 3.11 -0.49 – 6.70 0.090
employment [Thất nghiệp
và hiện tại đang KHÔNG
tìm việc]
-1.78 -13.22 – 9.66 0.761
employment [Thất nghiệp
và hiện tại đang tìm
việc]
0.59 -7.69 – 8.87 0.889
employment [Tự kinh
doanh]
-8.72 -25.22 – 7.78 0.300
income [1,875,000,000 VND
- 2,500,000,000 VND]
-26.10 -48.69 – -3.52 0.023
income [125,000,000 VND -
250,000,000 VND]
-20.36 -39.73 – -0.98 0.039
income [2,500,000,000 VND
- 3,750,000,000 VND]
-21.71 -44.24 – 0.82 0.059
income [25,000,000 VND -
50,000,000 VND]
-15.80 -35.21 – 3.60 0.110
income [250,000,000 VND -
625,000,000 VND]
-21.58 -40.91 – -2.25 0.029
income [50,000,000 VND -
125,000,000 VND]
-22.33 -41.75 – -2.91 0.024
income [625,000,000 VND -
1,250,000,000 VND]
-21.44 -41.52 – -1.35 0.036
income [hơn 3,750,000,000
VND]
-32.37 -56.93 – -7.81 0.010
income [Không muốn tiết
lộ]
-22.27 -41.31 – -3.23 0.022
Random Effects
σ2 3272.89 3272.89
τ00 126.10 participant.code 120.14 participant.code
ICC 0.04 0.04
N 471 participant.code 471 participant.code
Observations 23550 23550
Marginal R2 / Conditional R2 0.005 / 0.042 0.009 / 0.044