library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read.csv("C:/Users/nasimy/OneDrive - University Of Oregon/Second-year-paper/R_experiments/session_229.csv")
df %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "var", values_to = "n_missing") %>%
arrange(desc(n_missing))
## # A tibble: 22 × 2
## var n_missing
## <chr> <int>
## 1 final_pay 80
## 2 final_total_payment 80
## 3 chosen_option 80
## 4 investment_level 77
## 5 investment_raw 77
## 6 net_savings 77
## 7 saving_pct 77
## 8 rt_s 77
## 9 alpha 75
## 10 electricity_price 75
## # ℹ 12 more rows
df <- df %>%
group_by(participant) %>%
filter(sum(!is.na(investment_level)) == 40) %>%
ungroup()
avg_investment <- df %>% group_by(participant, realized_price) %>% # realized_price should be 17000 / 23000
summarise(avg_invest = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
arrange(participant, realized_price) %>%
mutate(realized_price = paste0("avg_inv_", realized_price)) %>%
pivot_wider(names_from = realized_price, values_from = avg_invest)
dev <- df %>%
group_by(participant, realized_price) %>%
summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
mutate(optimal = case_when(
realized_price == 17000 ~ 800,
realized_price == 23000 ~ 2400,
TRUE ~ NA_real_
)) %>%
mutate(diff = mean_inv - optimal )
dev
## # A tibble: 62 × 5
## participant realized_price mean_inv optimal diff
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 P.1 17000 2400 800 1600
## 2 P.1 23000 2400 2400 0
## 3 P.10 17000 2320 800 1520
## 4 P.10 23000 2440 2400 40
## 5 P.11 17000 3160 800 2360
## 6 P.11 23000 2400 2400 0
## 7 P.12 17000 2400 800 1600
## 8 P.12 23000 2400 2400 0
## 9 P.13 17000 960 800 160
## 10 P.13 23000 2400 2400 0
## # ℹ 52 more rows
# 1) Participant-level means by price
by_person <- df %>%
group_by(participant, realized_price) %>%
summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop")
# 2) Average of those participant means, separately for each price
avg_by_price <- by_person %>%
group_by(realized_price) %>%
summarise(
n_participants = n(),
avg_of_person_means = mean(mean_inv, na.rm = TRUE),
sd_of_person_means = sd(mean_inv, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(realized_price)
avg_by_price
## # A tibble: 2 × 4
## realized_price n_participants avg_of_person_means sd_of_person_means
## <dbl> <int> <dbl> <dbl>
## 1 17000 31 1786. 809.
## 2 23000 31 2203. 382.
opt_tests <- df %>%
group_by(participant, realized_price) %>%
summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
mutate(optimal = case_when(
realized_price == 17000 ~ 800,
realized_price == 23000 ~ 2400,
TRUE ~ NA_real_
)) %>%
mutate(diff = mean_inv - optimal) %>% # + => over-invest, - => under-invest
group_by(realized_price) %>%
summarise(
n = n(),
mean_diff = mean(diff, na.rm = TRUE),
direction = case_when(
mean_diff > 0 ~ "Over-invest (above optimal)",
mean_diff < 0 ~ "Under-invest (below optimal)",
TRUE ~ "Exactly optimal (on average)"
),
t = t.test(diff, mu = 0)$statistic,
p_value = t.test(diff, mu = 0)$p.value,
conf_low = t.test(diff, mu = 0)$conf.int[1],
conf_high = t.test(diff, mu = 0)$conf.int[2],
.groups = "drop"
) %>%
arrange(realized_price)
opt_tests
## # A tibble: 2 × 8
## realized_price n mean_diff direction t p_value conf_low conf_high
## <dbl> <int> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 17000 31 986. Over-invest (… 6.78 1.61e-7 689. 1283.
## 2 23000 31 -197. Under-invest … -2.88 7.35e-3 -338. -57.2
ci_df <- df %>%
group_by(participant, realized_price) %>%
summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
mutate(optimal = case_when(
realized_price == 17000 ~ 800,
realized_price == 23000 ~ 2400,
TRUE ~ NA_real_
)) %>%
mutate(diff = mean_inv - optimal) %>%
group_by(realized_price) %>%
summarise(
mean_diff = mean(diff, na.rm = TRUE),
ci_low = t.test(diff, mu = 0)$conf.int[1],
ci_high = t.test(diff, mu = 0)$conf.int[2],
.groups = "drop"
) %>%
mutate(price = factor(realized_price, levels = c(17000, 23000)))
ggplot(ci_df, aes(x = price, y = mean_diff)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.6) +
labs(
x = "Realized electricity price",
y = "Mean deviation from optimal investment ($)",
title = "Deviation from optimal investment with 95% confidence intervals"
) +
theme_minimal(base_size = 12)

ci_person <- df %>%
filter(realized_price %in% c(17000, 23000)) %>%
group_by(participant, realized_price) %>%
summarise(
n = sum(!is.na(investment_level)),
mean_inv = mean(investment_level, na.rm = TRUE),
sd_inv = sd(investment_level, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
se = sd_inv / sqrt(n),
t_crit = qt(0.975, df = n - 1),
ci_low = mean_inv - t_crit * se,
ci_high = mean_inv + t_crit * se
)
ci_person
## # A tibble: 62 × 9
## participant realized_price n mean_inv sd_inv se t_crit ci_low ci_high
## <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 P.1 17000 20 2400 0 0 2.09 2400 2400
## 2 P.1 23000 20 2400 0 0 2.09 2400 2400
## 3 P.10 17000 20 2320 630. 141. 2.09 2025. 2615.
## 4 P.10 23000 20 2440 840. 188. 2.09 2047. 2833.
## 5 P.11 17000 20 3160 1021. 228. 2.09 2682. 3638.
## 6 P.11 23000 20 2400 971. 217. 2.09 1945. 2855.
## 7 P.12 17000 20 2400 0 0 2.09 2400 2400
## 8 P.12 23000 20 2400 0 0 2.09 2400 2400
## 9 P.13 17000 20 960 492. 110. 2.09 730. 1190.
## 10 P.13 23000 20 2400 0 0 2.09 2400 2400
## # ℹ 52 more rows
p17 <- ci_person %>%
filter(realized_price == 17000) %>%
ggplot(aes(x = reorder(participant, mean_inv), y = mean_inv)) +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.3) +
geom_hline(yintercept = 800, linetype = "dashed") +
coord_flip() +
labs(
x = NULL,
y = "Mean investment ($) with 95% CI",
title = "Participant mean investment (17k realized price)",
subtitle = "Dashed line = optimal investment (800)"
) +
theme_minimal(base_size = 12)
p17

p23 <- ci_person %>%
filter(realized_price == 23000) %>%
ggplot(aes(x = reorder(participant, mean_inv), y = mean_inv)) +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.3) +
geom_hline(yintercept = 2400, linetype = "dashed") +
coord_flip() +
labs(
x = NULL,
y = "Mean investment ($) with 95% CI",
title = "Participant mean investment (23k realized price)",
subtitle = "Dashed line = optimal investment (2400)"
) +
theme_minimal(base_size = 12)
p23

# --- 17k: split each participant's 20 occurrences into first 10 vs second 10 ---
ci_17k <- df %>%
filter(realized_price == 17000, !is.na(investment_level)) %>%
arrange(participant, round) %>%
group_by(participant) %>%
mutate(idx_17k = row_number(),
half_10 = if_else(idx_17k <= 10, "First 10 rounds", "Second 10 rounds")) %>%
group_by(participant, half_10) %>%
summarise(
n = n(),
mean_inv = mean(investment_level),
sd_inv = sd(investment_level),
.groups = "drop"
) %>%
mutate(
se = sd_inv / sqrt(n),
tcrit = qt(0.975, df = n - 1),
ci_low = mean_inv - tcrit * se,
ci_high = mean_inv + tcrit * se
)
# Optional: keep participant order consistent across both panels
participant_order <- ci_17k %>%
group_by(participant) %>%
summarise(overall_mean = mean(mean_inv), .groups = "drop") %>%
arrange(overall_mean) %>%
pull(participant)
ci_17k <- ci_17k %>%
mutate(
participant = factor(participant, levels = participant_order),
half_10 = factor(half_10, levels = c("First 10 rounds", "Second 10 rounds"))
)
# --- Plot: two panels side-by-side, CI per participant, color = participant ---
ggplot(ci_17k, aes(x = participant, y = mean_inv, color = participant)) +
geom_hline(yintercept = 800, linetype = "dashed") +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.35) +
coord_flip() +
facet_wrap(~ half_10, nrow = 1) +
labs(
x = NULL,
y = "Mean investment ($) with 95% CI",
title = "Investment behavior under 17k electricity price",
subtitle = "Participant-level 95% CIs for first vs second 10 occurrences (dashed line = optimal 800)"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
panel.grid.minor = element_blank(),
strip.text = element_text(face = "bold")
)

# --- 23k: split each participant's 20 occurrences into first 10 vs second 10 ---
ci_23k <- df%>%
filter(realized_price == 23000, !is.na(investment_level)) %>%
arrange(participant, round) %>%
group_by(participant) %>%
mutate(idx_23k = row_number(),
half_10 = if_else(idx_23k <= 10, "First 10 rounds", "Second 10 rounds")) %>%
group_by(participant, half_10) %>%
summarise(
n = n(),
mean_inv = mean(investment_level),
sd_inv = sd(investment_level),
.groups = "drop"
) %>%
mutate(
se = sd_inv / sqrt(n),
tcrit = qt(0.975, df = n - 1),
ci_low = mean_inv - tcrit * se,
ci_high = mean_inv + tcrit * se
)
# Optional: keep participant order consistent across both panels
participant_order_23 <- ci_23k %>%
group_by(participant) %>%
summarise(overall_mean = mean(mean_inv), .groups = "drop") %>%
arrange(overall_mean) %>%
pull(participant)
ci_23k <- ci_23k %>%
mutate(
participant = factor(participant, levels = participant_order_23),
half_10 = factor(half_10, levels = c("First 10 rounds", "Second 10 rounds"))
)
# --- Plot: two panels side-by-side, CI per participant, color = participant ---
ggplot(ci_23k, aes(x = participant, y = mean_inv, color = participant)) +
geom_hline(yintercept = 2400, linetype = "dashed") +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.35) +
coord_flip() +
facet_wrap(~ half_10, nrow = 1) +
labs(
x = NULL,
y = "Mean investment ($) with 95% CI",
title = "Investment behavior under 23k electricity price",
subtitle = "Participant-level 95% CIs for first vs second 10 occurrences (dashed line = optimal 2400)"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
panel.grid.minor = element_blank(),
strip.text = element_text(face = "bold")
)

ci_half <- df %>%
filter(realized_price %in% c(17000, 23000), !is.na(investment_level)) %>%
arrange(participant, realized_price, round) %>%
group_by(participant, realized_price) %>%
mutate(idx = row_number(),
half = if_else(idx <= 10, "First half", "Second half")) %>%
group_by(participant, realized_price, half) %>%
summarise(
n = n(),
mean_inv = mean(investment_level),
sd_inv = sd(investment_level),
.groups = "drop"
) %>%
mutate(
se = sd_inv / sqrt(n),
tcrit = qt(0.975, df = n - 1),
ci_low = mean_inv - tcrit * se,
ci_high = mean_inv + tcrit * se
)
# ---- 17k plots (same style as your p17) ----
p17_first <- ci_half %>%
filter(realized_price == 17000, half == "First half") %>%
ggplot(aes(x = reorder(participant, mean_inv), y = mean_inv)) +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.3) +
geom_hline(yintercept = 800, linetype = "dashed") +
coord_flip() +
labs(
x = NULL,
y = "Mean investment ($) with 95% CI",
title = "Participant mean investment (17k) — First half",
subtitle = "Dashed line = optimal investment (800)"
) +
theme_minimal(base_size = 12)
p17_second <- ci_half %>%
filter(realized_price == 17000, half == "Second half") %>%
ggplot(aes(x = reorder(participant, mean_inv), y = mean_inv)) +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.3) +
geom_hline(yintercept = 800, linetype = "dashed") +
coord_flip() +
labs(
x = NULL,
y = "Mean investment ($) with 95% CI",
title = "Participant mean investment (17k) — Second half",
subtitle = "Dashed line = optimal investment (800)"
) +
theme_minimal(base_size = 12)
# ---- 23k plots (same style) ----
p23_first <- ci_half %>%
filter(realized_price == 23000, half == "First half") %>%
ggplot(aes(x = reorder(participant, mean_inv), y = mean_inv)) +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.3) +
geom_hline(yintercept = 2400, linetype = "dashed") +
coord_flip() +
labs(
x = NULL,
y = "Mean investment ($) with 95% CI",
title = "Participant mean investment (23k) — First half",
subtitle = "Dashed line = optimal investment (2400)"
) +
theme_minimal(base_size = 12)
p23_second <- ci_half %>%
filter(realized_price == 23000, half == "Second half") %>%
ggplot(aes(x = reorder(participant, mean_inv), y = mean_inv)) +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high), size = 0.3) +
geom_hline(yintercept = 2400, linetype = "dashed") +
coord_flip() +
labs(
x = NULL,
y = "Mean investment ($) with 95% CI",
title = "Participant mean investment (23k) — Second half",
subtitle = "Dashed line = optimal investment (2400)"
) +
theme_minimal(base_size = 12)
# Print any plot you want:
p17_first

# p17_second
# p23_first
# p23_second
p17_second

p23_first

p23_second

p17_first_data <- ci_half %>%
filter(realized_price == 17000, half == "First half") %>%
select(participant, mean_inv)
t.test(p17_first_data$mean_inv, mu = 800)
##
## One Sample t-test
##
## data: p17_first_data$mean_inv
## t = 6.7702, df = 30, p-value = 1.657e-07
## alternative hypothesis: true mean is not equal to 800
## 95 percent confidence interval:
## 1493.837 2093.260
## sample estimates:
## mean of x
## 1793.548
p17_first_data %>%
summarise(
n = n(),
mean_of_means = mean(mean_inv),
sd_of_means = sd(mean_inv),
direction = case_when(
mean_of_means > 800 ~ "Over-invest (above 800)",
mean_of_means < 800 ~ "Under-invest (below 800)",
TRUE ~ "Exactly 800 (on average)"
)
)
## # A tibble: 1 × 4
## n mean_of_means sd_of_means direction
## <int> <dbl> <dbl> <chr>
## 1 31 1794. 817. Over-invest (above 800)
# Participant means used in p17_second
p17_second_data <- ci_half %>%
filter(realized_price == 17000, half == "Second half") %>%
select(participant, mean_inv)
# One-sample t-test vs optimal (800)
t.test(p17_second_data$mean_inv, mu = 800)
##
## One Sample t-test
##
## data: p17_second_data$mean_inv
## t = 6.6436, df = 30, p-value = 2.343e-07
## alternative hypothesis: true mean is not equal to 800
## 95 percent confidence interval:
## 1477.403 2078.726
## sample estimates:
## mean of x
## 1778.065
# Direction summary
p17_second_data %>%
summarise(
n = n(),
mean_of_means = mean(mean_inv),
sd_of_means = sd(mean_inv),
direction = case_when(
mean_of_means > 800 ~ "Over-invest (above 800)",
mean_of_means < 800 ~ "Under-invest (below 800)",
TRUE ~ "Exactly 800 (on average)"
)
)
## # A tibble: 1 × 4
## n mean_of_means sd_of_means direction
## <int> <dbl> <dbl> <chr>
## 1 31 1778. 820. Over-invest (above 800)
# Participant means used in p23_first
p23_first_data <- ci_half %>%
filter(realized_price == 23000, half == "First half") %>%
select(participant, mean_inv)
# One-sample t-test vs optimal (2400)
t.test(p23_first_data$mean_inv, mu = 2400)
##
## One Sample t-test
##
## data: p23_first_data$mean_inv
## t = -2.422, df = 30, p-value = 0.02169
## alternative hypothesis: true mean is not equal to 2400
## 95 percent confidence interval:
## 2071.790 2372.081
## sample estimates:
## mean of x
## 2221.935
# Direction summary
p23_first_data %>%
summarise(
n = n(),
mean_of_means = mean(mean_inv),
sd_of_means = sd(mean_inv),
direction = case_when(
mean_of_means > 2400 ~ "Over-invest (above 2400)",
mean_of_means < 2400 ~ "Under-invest (below 2400)",
TRUE ~ "Exactly 2400 (on average)"
)
)
## # A tibble: 1 × 4
## n mean_of_means sd_of_means direction
## <int> <dbl> <dbl> <chr>
## 1 31 2222. 409. Under-invest (below 2400)
# Participant means used in p23_second
p23_second_data <- ci_half %>%
filter(realized_price == 23000, half == "Second half") %>%
select(participant, mean_inv)
# One-sample t-test vs optimal (2400)
t.test(p23_second_data$mean_inv, mu = 2400)
##
## One Sample t-test
##
## data: p23_second_data$mean_inv
## t = -2.9967, df = 30, p-value = 0.005435
## alternative hypothesis: true mean is not equal to 2400
## 95 percent confidence interval:
## 2035.492 2330.960
## sample estimates:
## mean of x
## 2183.226
# Direction summary
p23_second_data %>%
summarise(
n = n(),
mean_of_means = mean(mean_inv),
sd_of_means = sd(mean_inv),
direction = case_when(
mean_of_means > 2400 ~ "Over-invest (above 2400)",
mean_of_means < 2400 ~ "Under-invest (below 2400)",
TRUE ~ "Exactly 2400 (on average)"
)
)
## # A tibble: 1 × 4
## n mean_of_means sd_of_means direction
## <int> <dbl> <dbl> <chr>
## 1 31 2183. 403. Under-invest (below 2400)
# One mean per participant at 17k (across all 17k rounds), then test vs optimal = 800
p17_all <- df %>%
filter(realized_price == 17000, !is.na(investment_level)) %>%
group_by(participant) %>%
summarise(mean_inv = mean(investment_level), .groups = "drop")
# One-sample t-test
t.test(p17_all$mean_inv, mu = 800)
##
## One Sample t-test
##
## data: p17_all$mean_inv
## t = 6.7808, df = 30, p-value = 1.61e-07
## alternative hypothesis: true mean is not equal to 800
## 95 percent confidence interval:
## 1488.899 2082.714
## sample estimates:
## mean of x
## 1785.806
# Direction summary
p17_all %>%
summarise(
n = n(),
mean_of_means = mean(mean_inv),
sd_of_means = sd(mean_inv),
direction = case_when(
mean_of_means > 800 ~ "Over-invest (above 800)",
mean_of_means < 800 ~ "Under-invest (below 800)",
TRUE ~ "Exactly 800 (on average)"
)
)
## # A tibble: 1 × 4
## n mean_of_means sd_of_means direction
## <int> <dbl> <dbl> <chr>
## 1 31 1786. 809. Over-invest (above 800)
# One mean per participant at 23k (across all 23k rounds), then test vs optimal = 2400
p23_all <- df %>%
filter(realized_price == 23000, !is.na(investment_level)) %>%
group_by(participant) %>%
summarise(mean_inv = mean(investment_level), .groups = "drop")
# One-sample t-test
t.test(p23_all$mean_inv, mu = 2400)
##
## One Sample t-test
##
## data: p23_all$mean_inv
## t = -2.8755, df = 30, p-value = 0.007354
## alternative hypothesis: true mean is not equal to 2400
## 95 percent confidence interval:
## 2062.365 2342.797
## sample estimates:
## mean of x
## 2202.581
# Direction summary
p23_all %>%
summarise(
n = n(),
mean_of_means = mean(mean_inv),
sd_of_means = sd(mean_inv),
direction = case_when(
mean_of_means > 2400 ~ "Over-invest (above 2400)",
mean_of_means < 2400 ~ "Under-invest (below 2400)",
TRUE ~ "Exactly 2400 (on average)"
)
)
## # A tibble: 1 × 4
## n mean_of_means sd_of_means direction
## <int> <dbl> <dbl> <chr>
## 1 31 2203. 382. Under-invest (below 2400)
# One mean per participant per price
by_person <- df %>%
filter(realized_price %in% c(17000, 23000), !is.na(investment_level)) %>%
group_by(realized_price, participant) %>%
summarise(mean_inv = mean(investment_level), .groups = "drop")
# CI of the mean of participant-means (one CI per price)
ci_two <- by_person %>%
group_by(realized_price) %>%
summarise(
n = n(),
mean_of_means = mean(mean_inv),
ci_low = t.test(mean_inv)$conf.int[1],
ci_high = t.test(mean_inv)$conf.int[2],
.groups = "drop"
) %>%
mutate(
realized_price = factor(realized_price, levels = c(17000, 23000)),
optimal = if_else(realized_price == 17000, 800, 2400)
)
ggplot(ci_two, aes(x = 1, y = mean_of_means)) +
geom_hline(aes(yintercept = optimal, linetype = "Optimal"), linewidth = 0.8) +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high, shape = "Mean ± 95% CI"), size = 0.7) +
facet_wrap(~ realized_price, nrow = 1) +
scale_x_continuous(breaks = NULL) +
labs(
x = NULL,
y = "Mean of participant means ($)",
title = "Average investment vs optimal (participant-level means)",
linetype = NULL,
shape = NULL
) +
theme_minimal(base_size = 12) +
theme(
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
strip.text = element_text(face = "bold")
)
