first steps with tail project

loading libraries

library(pacman)
p_load(tidyverse, fixest, gtsummary, lme4, lmtest, sandwich, sjPlot)

Reading in data

setwd("/Users/chapkovski/Downloads/Data Backup 2/RemadeDump")

fileNames<-list.files(path = ".", pattern = "all*")
raw_df <- do.call(rbind, lapply(fileNames, function(x) read.csv(file=x) ) )

Cleaning data

# select only unique participant.code values
raw_df %>% 
  filter(!is.na(participant.label),
         participant._index_in_pages==62,
         session.is_demo==FALSE) %>% 
  distinct(participant.code, .keep_all = T) -> df

Splitting into demographic data and round-based data then converint round-based to long format

We split into three different tables 1. Individual (demographics) -vary on participant.code level 2. Round based - vary on participant/round level 3. Decision based - vary on participant/round/decision level

# let's first check that all endowments across rounds contain the same values for each participant

df %>% select(starts_with("bigrisk."), participant.code, tail=session.config.tail,
              negative=session.config.negative) -> df_round
df_round %>% select(
                    ends_with('.endowment'),
                    participant.code) %>%
  pivot_longer(
               cols = contains('endowment'),
               names_to = c("round_number", "round_item"),  # Use a single character vector with two placeholders
               names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>% 
  group_by(participant.code) %>%
  dplyr::summarize(all_values_equal = min(value) == max(value)) %>% group_by(all_values_equal) %>% tally

# A tibble: 1 × 2
  all_values_equal     n
  <lgl>            <int>
1 TRUE               471

# the answer is yes: all values are equal, so we can safely drop all but one column


df %>% select(starts_with("bigrisk."), 
              participant.code,
              tail=session.config.tail,
              negative=session.config.negative,
              endowment=bigrisk.1.player.endowment,
              ) %>% 
  select(contains('num_tickets_'), 
                    contains('ticket_price_'), 
                    participant.code,
         tail, negative, endowment) %>%
  pivot_longer(
               cols = contains('num_tickets_') | contains('ticket_price_'),
               names_to = c("round_number", "round_item"), 
               names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>% 
  separate(round_item, into = c("item", "number"), sep = "_(?=[^_]+$)") -> df_decision_long

df_decision_num_tickets <- df_decision_long %>% 
  filter(item=="num_tickets") %>%
  select(-item) %>%
  rename(num_tickets=value)

df_decision_ticket_price <- df_decision_long %>%
  filter(item=="ticket_price") %>% 
  select(-c(item, negative, endowment, tail)) %>% 
  rename(ticket_price=value)

df_decision_num_tickets %>% 
  left_join(df_decision_ticket_price, by=c("participant.code", "round_number", "number")) %>% 
  mutate(total_expenditure=num_tickets*ticket_price, 
         relative_expenditure=total_expenditure/endowment) %>% 
  rename(decision_number=number)  -> df_decision_long

# now let's retrieve personal_outcome and intermediary_payoff to a separate tibble and then attach them to df_decision

df %>%
  select(
    participant.code,
    contains('personal_outcome'),
    ends_with('.player.intermediary_payoff'),
  ) %>% 
  pivot_longer(
    cols = contains('personal_outcome') | ends_with('.player.intermediary_payoff'),
    names_to = c("round_number", "round_item"),  
    names_pattern = "bigrisk\\.(\\d+)\\.player\\.(.*)") %>% 
  pivot_wider(names_from = round_item, values_from = value) -> df_round_long


df_decision_long %>% 
  left_join(df_round_long, by=c("participant.code", "round_number")) -> df_decision_long

# now let's clean participant-level df before joining in with df_decision_long
df %>% select(!starts_with("bigrisk."), participant.code) %>%  
  select(participant.code, final_payoff=participant.payoff, session_code = session.code, session.is_demo, starts_with('q.')) %>% 
  rename_with(~ str_extract(., "[^.]+$")) %>% 
  rename(participant.code=code) %>% 
  select(-c(id_in_group, role, payoff, id_in_subsession, round_number, education))  -> individual_df

# now let's join in the individual-level data
df_decision_long %>% 
  mutate(round_number=as.numeric(round_number)) %>% 
  left_join(individual_df, by="participant.code") -> df_decision_long

df_decision_long %>% write_csv("df_decision_long.csv")

Some regressions now

I use random effects for participants. It seems that using individual participant codes straigh in the model leads to model overfitting.

# random effects:

# Basic model with random intercepts for participants
m1 <- df_decision_long %>% 
  lmer(relative_expenditure ~ tail * negative + round_number + 
           (1 | participant.code), data = .)

# let's add demographic
m2 <- df_decision_long %>% 
  lmer(relative_expenditure ~ tail * negative + age+gender+marital+employment+income+round_number + 
           (1 | participant.code), data = .)
tab_model(m1, m2, title="DV: relative expenditure")

DV: relative expenditure
	relative_expenditure			relative_expenditure
Predictors	Estimates	CI	p	Estimates	CI	p
(Intercept)	0.54	0.51 – 0.58	<0.001	0.28	-0.19 – 0.76	0.243
tail	0.01	-0.05 – 0.06	0.834	-0.00	-0.06 – 0.05	0.859
negative	-0.17	-0.21 – -0.12	<0.001	-0.14	-0.19 – -0.09	<0.001
round number	-0.00	-0.01 – -0.00	<0.001	-0.00	-0.01 – -0.00	<0.001
tail × negative	0.00	-0.07 – 0.07	0.982	-0.00	-0.08 – 0.08	0.982
age [25 - 34]				-0.03	-0.08 – 0.03	0.319
age [35 - 44]				0.20	0.05 – 0.34	0.008
age [45 - 54]				-0.13	-0.52 – 0.26	0.524
gender [Không muốn tiết lộ]				0.39	-0.05 – 0.82	0.082
gender [Nam]				0.27	-0.11 – 0.65	0.168
gender [Nữ]				0.36	-0.02 – 0.74	0.063
marital [Độc thân (chưa kết hôn)]				-0.05	-0.13 – 0.03	0.244
marital [Ly dị]				-0.17	-0.40 – 0.07	0.167
marital [Ly thân]				-0.14	-0.52 – 0.25	0.494
employment [Có công việc toàn thời gian (35 giờ làm mỗi tuần)]				-0.04	-0.10 – 0.01	0.134
employment [Học sinh]				0.02	-0.03 – 0.07	0.458
employment [Thất nghiệp và hiện tại đang KHÔNG tìm việc]				0.01	-0.15 – 0.17	0.920
employment [Thất nghiệp và hiện tại đang tìm việc]				0.00	-0.11 – 0.12	0.943
employment [Tự kinh doanh]				-0.05	-0.28 – 0.18	0.671
income [1,875,000,000 VND - 2,500,000,000 VND]				-0.12	-0.44 – 0.20	0.469
income [125,000,000 VND - 250,000,000 VND]				-0.02	-0.29 – 0.25	0.882
income [2,500,000,000 VND - 3,750,000,000 VND]				0.08	-0.24 – 0.40	0.615
income [25,000,000 VND - 50,000,000 VND]				-0.01	-0.28 – 0.26	0.944
income [250,000,000 VND - 625,000,000 VND]				-0.02	-0.29 – 0.26	0.907
income [50,000,000 VND - 125,000,000 VND]				-0.03	-0.31 – 0.24	0.810
income [625,000,000 VND - 1,250,000,000 VND]				-0.03	-0.31 – 0.25	0.844
income [hơn 3,750,000,000 VND]				-0.21	-0.55 – 0.14	0.239
income [Không muốn tiết lộ]				-0.03	-0.30 – 0.24	0.835
Random Effects
σ²	0.07			0.07
τ₀₀	0.04 _{participant.code}			0.04 _{participant.code}
ICC	0.33			0.32
N	471 _{participant.code}			471 _{participant.code}
Observations	23550			23550
Marginal R² / Conditional R²	0.058 / 0.373			0.091 / 0.382

# random effects:

# Basic model with random intercepts for participants
m1 <- df_decision_long %>% 
  lmer(total_expenditure ~ tail * negative + round_number +
           (1 | participant.code), data = .)

# let's add demographic
m2 <- df_decision_long %>% 
  lmer(total_expenditure ~ tail * negative + age+gender+marital+employment+income+round_number + 
           (1 | participant.code), data = .)
tab_model(m1, m2, title="DV: total expenditure")

DV: total expenditure
	total_expenditure			total_expenditure
Predictors	Estimates	CI	p	Estimates	CI	p
(Intercept)	5.72	4.44 – 7.00	<0.001	8.88	-8.71 – 26.48	0.322
tail	0.06	-1.91 – 2.03	0.955	-0.16	-2.18 – 1.86	0.876
negative	12.48	10.71 – 14.24	<0.001	13.34	11.48 – 15.20	<0.001
round number	-0.10	-0.14 – -0.05	<0.001	-0.10	-0.14 – -0.05	<0.001
tail × negative	0.27	-2.39 – 2.93	0.841	0.01	-2.79 – 2.82	0.992
age [25 - 34]				0.00	-2.04 – 2.05	0.999
age [35 - 44]				11.18	5.88 – 16.48	<0.001
age [45 - 54]				-6.25	-20.67 – 8.18	0.396
gender [Không muốn tiết lộ]				3.44	-12.66 – 19.54	0.675
gender [Nam]				1.74	-12.28 – 15.76	0.808
gender [Nữ]				4.01	-9.98 – 18.00	0.574
marital [Độc thân (chưa kết hôn)]				-0.62	-3.68 – 2.45	0.694
marital [Ly dị]				-9.15	-17.89 – -0.41	0.040
marital [Ly thân]				-8.06	-22.41 – 6.28	0.271
employment [Có công việc toàn thời gian (35 giờ làm mỗi tuần)]				-1.33	-3.45 – 0.79	0.220
employment [Học sinh]				0.36	-1.51 – 2.23	0.709
employment [Thất nghiệp và hiện tại đang KHÔNG tìm việc]				-0.86	-6.81 – 5.08	0.776
employment [Thất nghiệp và hiện tại đang tìm việc]				-0.25	-4.55 – 4.06	0.910
employment [Tự kinh doanh]				-3.95	-12.52 – 4.63	0.367
income [1,875,000,000 VND - 2,500,000,000 VND]				-8.19	-19.93 – 3.55	0.172
income [125,000,000 VND - 250,000,000 VND]				-6.64	-16.71 – 3.43	0.196
income [2,500,000,000 VND - 3,750,000,000 VND]				-0.83	-12.54 – 10.88	0.889
income [25,000,000 VND - 50,000,000 VND]				-4.84	-14.93 – 5.24	0.347
income [250,000,000 VND - 625,000,000 VND]				-5.75	-15.80 – 4.30	0.262
income [50,000,000 VND - 125,000,000 VND]				-6.80	-16.89 – 3.30	0.187
income [625,000,000 VND - 1,250,000,000 VND]				-6.82	-17.26 – 3.62	0.200
income [hơn 3,750,000,000 VND]				-14.68	-27.45 – -1.92	0.024
income [Không muốn tiết lộ]				-5.96	-15.86 – 3.94	0.238
Random Effects
σ²	92.99			92.99
τ₀₀	50.92 _{participant.code}			48.30 _{participant.code}
ICC	0.35			0.34
N	471 _{participant.code}			471 _{participant.code}
Observations	23550			23550
Marginal R² / Conditional R²	0.216 / 0.493			0.240 / 0.500

let’s check the number of tickets

# random effects:

# Basic model with random intercepts for participants
m1 <- df_decision_long %>% 
  lmer(num_tickets ~ tail * negative + round_number + 
           (1 | participant.code), data = .)

# let's add demographic
m2 <- df_decision_long %>% 
  lmer(num_tickets ~ tail * negative + age+gender+marital+employment+income+round_number +  (1 | participant.code), data = .)
tab_model(m1, m2, title="DV: number of tickets purchased")

DV: number of tickets purchased
	num_tickets			num_tickets
Predictors	Estimates	CI	p	Estimates	CI	p
(Intercept)	7.88	5.11 – 10.66	<0.001	26.05	-7.81 – 59.92	0.132
tail	0.77	-2.99 – 4.52	0.689	0.05	-3.84 – 3.94	0.980
negative	8.53	5.17 – 11.89	<0.001	10.44	6.86 – 14.02	<0.001
round number	-0.09	-0.35 – 0.16	0.469	-0.09	-0.35 – 0.16	0.469
tail × negative	-1.23	-6.30 – 3.84	0.634	-0.60	-6.00 – 4.80	0.827
age [25 - 34]				-0.57	-4.50 – 3.36	0.777
age [35 - 44]				8.40	-1.80 – 18.59	0.106
age [45 - 54]				-15.07	-42.82 – 12.68	0.287
gender [Không muốn tiết lộ]				7.98	-22.99 – 38.94	0.614
gender [Nam]				7.25	-19.72 – 34.22	0.598
gender [Nữ]				7.37	-19.54 – 34.28	0.592
marital [Độc thân (chưa kết hôn)]				-5.25	-11.15 – 0.65	0.081
marital [Ly dị]				-11.32	-28.14 – 5.49	0.187
marital [Ly thân]				-13.13	-40.73 – 14.47	0.351
employment [Có công việc toàn thời gian (35 giờ làm mỗi tuần)]				-3.02	-7.11 – 1.06	0.147
employment [Học sinh]				3.11	-0.49 – 6.70	0.090
employment [Thất nghiệp và hiện tại đang KHÔNG tìm việc]				-1.78	-13.22 – 9.66	0.761
employment [Thất nghiệp và hiện tại đang tìm việc]				0.59	-7.69 – 8.87	0.889
employment [Tự kinh doanh]				-8.72	-25.22 – 7.78	0.300
income [1,875,000,000 VND - 2,500,000,000 VND]				-26.10	-48.69 – -3.52	0.023
income [125,000,000 VND - 250,000,000 VND]				-20.36	-39.73 – -0.98	0.039
income [2,500,000,000 VND - 3,750,000,000 VND]				-21.71	-44.24 – 0.82	0.059
income [25,000,000 VND - 50,000,000 VND]				-15.80	-35.21 – 3.60	0.110
income [250,000,000 VND - 625,000,000 VND]				-21.58	-40.91 – -2.25	0.029
income [50,000,000 VND - 125,000,000 VND]				-22.33	-41.75 – -2.91	0.024
income [625,000,000 VND - 1,250,000,000 VND]				-21.44	-41.52 – -1.35	0.036
income [hơn 3,750,000,000 VND]				-32.37	-56.93 – -7.81	0.010
income [Không muốn tiết lộ]				-22.27	-41.31 – -3.23	0.022
Random Effects
σ²	3272.89			3272.89
τ₀₀	126.10 _{participant.code}			120.14 _{participant.code}
ICC	0.04			0.04
N	471 _{participant.code}			471 _{participant.code}
Observations	23550			23550
Marginal R² / Conditional R²	0.005 / 0.042			0.009 / 0.044