library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df_228 <- read.csv("C:/Users/nasimy/OneDrive - University Of Oregon/Second-year-paper/R_experiments/session_228_v2.csv")
Remove participants who did not finish the game.
# 1) Find participants who are missing investment in any round
bad_ids <- df_228 %>%
group_by(Participant) %>%
summarise(
n_rounds = n_distinct(round),
n_invest = sum(!is.na(investment_level)),
.groups = "drop"
) %>%
filter(n_rounds < 40 | n_invest < 40) %>%
pull(Participant)
# 2) Remove them
df_228_clean <- df_228 %>%
filter(!Participant %in% bad_ids)
# (Optional) quick check
df_228_clean %>%
group_by(Participant) %>%
summarise(n_invest = sum(!is.na(investment_level)), .groups = "drop") %>%
summarise(min_invest = min(n_invest), max_invest = max(n_invest))
## # A tibble: 1 × 2
## min_invest max_invest
## <int> <int>
## 1 40 40
avg_by_price <- df_228_clean %>%
filter(electricity_price %in% c(17000, 23000)) %>% # adjust name if yours differs
group_by(Participant, electricity_price) %>%
summarise(avg_investment = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
arrange(Participant, electricity_price)
avg_by_price
## # A tibble: 52 × 3
## Participant electricity_price avg_investment
## <chr> <dbl> <dbl>
## 1 P.10 17000 1280
## 2 P.10 23000 2360
## 3 P.11 17000 1160
## 4 P.11 23000 2320
## 5 P.12 17000 920
## 6 P.12 23000 2360
## 7 P.13 17000 1360
## 8 P.13 23000 880
## 9 P.14 17000 2480
## 10 P.14 23000 2400
## # ℹ 42 more rows
avg_by_price_wide <- avg_by_price %>%
mutate(electricity_price = paste0("p", electricity_price)) %>%
pivot_wider(names_from = electricity_price, values_from = avg_investment)
avg_by_price_wide
## # A tibble: 26 × 3
## Participant p17000 p23000
## <chr> <dbl> <dbl>
## 1 P.10 1280 2360
## 2 P.11 1160 2320
## 3 P.12 920 2360
## 4 P.13 1360 880
## 5 P.14 2480 2400
## 6 P.15 2080 1840
## 7 P.16 760 800
## 8 P.17 1880 1520
## 9 P.18 800 2400
## 10 P.20 2120 1960
## # ℹ 16 more rows
ci_by_participant <- function(data) {
data %>%
group_by(Participant) %>%
summarise(
n = sum(!is.na(investment_level)),
mean = mean(investment_level, na.rm = TRUE),
se = sd(investment_level, na.rm = TRUE) / sqrt(n),
ci_low = mean - 1.96 * se,
ci_high = mean + 1.96 * se,
.groups = "drop"
)
}
# ---- 17k ----
ci_17k <- df_228_clean %>%
filter(electricity_price == 17000) %>%
ci_by_participant()
p_17k <- ci_17k %>%
ggplot(aes(x = reorder(Participant, mean), y = mean)) +
geom_point(size = 1.8) +
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.15) +
geom_hline(yintercept = 800, linetype = "dashed", linewidth = 0.8) +
coord_flip() +
labs(
title = "Participant investment (95% CI) — Price = 17k",
x = NULL, y = "Mean investment"
) +
theme_minimal(base_size = 12)
p_17k
# ---- 23k ----
ci_23k <- df_228_clean %>%
filter(electricity_price == 23000) %>%
ci_by_participant()
p_23k <- ci_23k %>%
ggplot(aes(x = reorder(Participant, mean), y = mean)) +
geom_point(size = 1.8) +
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.15) +
geom_hline(yintercept = 2400, linetype = "dashed", linewidth = 0.8) +
coord_flip() +
labs(
title = "Participant investment (95% CI) — Price = 23k",
x = NULL, y = "Mean investment"
) +
theme_minimal(base_size = 12)
p_23k
# 1) Build participant-level CI data for 17k, split into first/second 10 rounds
ci_17k_halves <- df_228_clean %>%
filter(electricity_price == 17000) %>%
group_by(Participant) %>%
arrange(round, .by_group = TRUE) %>% # order rounds within participant
mutate(
half = if_else(row_number() <= 10, "First 10", "Second 10")
) %>%
group_by(Participant, half) %>%
summarise(
n = sum(!is.na(investment_level)),
mean = mean(investment_level, na.rm = TRUE),
se = sd(investment_level, na.rm = TRUE) / sqrt(n),
ci_low = mean - 1.96 * se,
ci_high = mean + 1.96 * se,
.groups = "drop"
)
# 2) Plot: two panels next to each other + participant colors + optimal line
p_17k_halves <- ci_17k_halves %>%
ggplot(aes(x = reorder(Participant, mean), y = mean, color = Participant)) +
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.15, alpha = 0.9) +
geom_point(size = 1.8) +
geom_hline(yintercept = 800, linetype = "dashed", linewidth = 0.8) +
coord_flip() +
facet_wrap(~ half, nrow = 1) +
labs(
title = "Investment by participant (95% CI) — Price = 17k",
x = NULL, y = "Mean investment"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "none") # legend would be massive; remove for aesthetics
p_17k_halves
# 1) Build participant-level CI data for 23k, split into first/second 10 rounds
ci_23k_halves <- df_228_clean %>%
filter(electricity_price == 23000) %>%
group_by(Participant) %>%
arrange(round, .by_group = TRUE) %>% # order rounds within participant
mutate(
half = if_else(row_number() <= 10, "First 10", "Second 10")
) %>%
group_by(Participant, half) %>%
summarise(
n = sum(!is.na(investment_level)),
mean = mean(investment_level, na.rm = TRUE),
se = sd(investment_level, na.rm = TRUE) / sqrt(n),
ci_low = mean - 1.96 * se,
ci_high = mean + 1.96 * se,
.groups = "drop"
)
# 2) Plot: two panels next to each other + participant colors + optimal line
p_23k_halves <- ci_23k_halves %>%
ggplot(aes(x = reorder(Participant, mean), y = mean, color = Participant)) +
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.15, alpha = 0.9) +
geom_point(size = 1.8) +
geom_hline(yintercept = 2400, linetype = "dashed", linewidth = 0.8) +
coord_flip() +
facet_wrap(~ half, nrow = 1) +
labs(
title = "Investment by participant (95% CI) — Price = 23k",
x = NULL, y = "Mean investment"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "none") # legend would be massive; remove for aesthetics
p_23k_halves
Do subjects move closer to 800 (optimal investment level for 17K electricity price) over time, within the same price condition, within the same individuals?
opt <- 800
dist_17k <- df_228_clean %>%
filter(electricity_price == 17000) %>%
group_by(Participant) %>%
arrange(round, .by_group = TRUE) %>%
mutate(half = if_else(row_number() <= 10, "first", "second")) %>%
group_by(Participant, half) %>%
summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
mutate(dist = abs(mean_inv - opt)) %>%
select(Participant, half, dist) %>%
pivot_wider(names_from = half, values_from = dist) %>%
mutate(improvement = first - second) # >0 means closer in second half
t.test(dist_17k$second, dist_17k$first, paired = TRUE, alternative = "less")
##
## Paired t-test
##
## data: dist_17k$second and dist_17k$first
## t = 1.8105, df = 25, p-value = 0.9589
## alternative hypothesis: true mean difference is less than 0
## 95 percent confidence interval:
## -Inf 215.2741
## sample estimates:
## mean difference
## 110.7692
Rejected.
dist_17k %>%
pivot_longer(c(first, second), names_to = "half", values_to = "dist") %>%
mutate(half = factor(half, levels = c("first", "second"))) %>%
ggplot(aes(x = half, y = dist)) +
geom_boxplot(outlier.alpha = 0.2) +
geom_line(aes(group = Participant), alpha = 0.2) +
labs(
title = "Distance to optimal investment (800) — Price 17k",
x = NULL, y = "|Mean investment − 800|"
) +
theme_minimal(base_size = 12)
Do subjects move closer to 2400 (optimal investment level for 23K electricity price) over time, within the same price condition, within the same individuals?
opt <- 2400
dist_23k <- df_228_clean %>%
filter(electricity_price == 23000) %>%
group_by(Participant) %>%
arrange(round, .by_group = TRUE) %>%
mutate(half = if_else(row_number() <= 10, "first", "second")) %>%
group_by(Participant, half) %>%
summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
mutate(dist = abs(mean_inv - opt)) %>%
select(Participant, half, dist) %>%
pivot_wider(names_from = half, values_from = dist) %>%
mutate(improvement = first - second) # >0 means closer in second half
t.test(dist_23k$second, dist_23k$first, paired = TRUE, alternative = "less")
##
## Paired t-test
##
## data: dist_23k$second and dist_23k$first
## t = 2.6093, df = 25, p-value = 0.9924
## alternative hypothesis: true mean difference is less than 0
## 95 percent confidence interval:
## -Inf 244.3765
## sample estimates:
## mean difference
## 147.6923
Rejected.
dist_23k %>%
pivot_longer(c(first, second), names_to = "half", values_to = "dist") %>%
mutate(half = factor(half, levels = c("first", "second"))) %>%
ggplot(aes(x = half, y = dist)) +
geom_boxplot(outlier.alpha = 0.2) +
geom_line(aes(group = Participant), alpha = 0.2) +
labs(
title = "Distance to optimal investment (2400) — Price 23k",
x = NULL, y = "|Mean investment − 2400|"
) +
theme_minimal(base_size = 12)
# 1) Mean investment per participant within each price
pm <- df_228_clean %>%
filter(electricity_price %in% c(17000, 23000)) %>%
group_by(Participant, electricity_price) %>%
summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop")
# 2) Average of those participant means (per price)
pm_summary <- pm %>%
group_by(electricity_price) %>%
summarise(
n_participants = n(),
avg_of_participant_means = mean(mean_inv),
sd_of_participant_means = sd(mean_inv),
.groups = "drop"
)
pm_summary
## # A tibble: 2 × 4
## electricity_price n_participants avg_of_participant_m…¹ sd_of_participant_me…²
## <dbl> <int> <dbl> <dbl>
## 1 17000 26 1231. 709.
## 2 23000 26 1625. 790.
## # ℹ abbreviated names: ¹avg_of_participant_means, ²sd_of_participant_means
# 3) Test: are participant means different from the optimal level?
test_17k <- t.test(pm %>% filter(electricity_price == 17000) %>% pull(mean_inv),
mu = 800)
test_23k <- t.test(pm %>% filter(electricity_price == 23000) %>% pull(mean_inv),
mu = 2400)
test_17k
##
## One Sample t-test
##
## data: pm %>% filter(electricity_price == 17000) %>% pull(mean_inv)
## t = 3.0988, df = 25, p-value = 0.004757
## alternative hypothesis: true mean is not equal to 800
## 95 percent confidence interval:
## 944.4668 1517.0717
## sample estimates:
## mean of x
## 1230.769
test_23k
##
## One Sample t-test
##
## data: pm %>% filter(electricity_price == 23000) %>% pull(mean_inv)
## t = -5.0076, df = 25, p-value = 3.654e-05
## alternative hypothesis: true mean is not equal to 2400
## 95 percent confidence interval:
## 1305.714 1943.517
## sample estimates:
## mean of x
## 1624.615
# 1) Participant means within each price
pm <- df_228_clean %>%
filter(electricity_price %in% c(17000, 23000)) %>%
group_by(Participant, electricity_price) %>%
summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop")
# 2) Report table: mean of participant means + direction + 95% CI + p-value
opt_tbl <- tibble(
electricity_price = c(17000, 23000),
optimal = c(800, 2400)
)
report_tbl <- opt_tbl %>%
mutate(
x = map(electricity_price, ~ pm %>% filter(electricity_price == .x) %>% pull(mean_inv)),
test = map2(x, optimal, ~ t.test(.x, mu = .y)) # two-sided by default
) %>%
transmute(
electricity_price,
optimal,
n_participants = map_int(x, length),
avg_of_participant_means = map_dbl(x, mean),
diff_from_optimal = avg_of_participant_means - optimal,
direction = case_when(
diff_from_optimal > 0 ~ "Above optimal",
diff_from_optimal < 0 ~ "Below optimal",
TRUE ~ "Exactly optimal"
),
ci_low = map_dbl(test, ~ .x$conf.int[1]),
ci_high = map_dbl(test, ~ .x$conf.int[2]),
p_value = map_dbl(test, ~ .x$p.value)
)
report_tbl
## # A tibble: 2 × 9
## electricity_price optimal n_participants avg_of_participant_means
## <dbl> <dbl> <int> <dbl>
## 1 17000 800 26 1231.
## 2 23000 2400 26 1625.
## # ℹ 5 more variables: diff_from_optimal <dbl>, direction <chr>, ci_low <dbl>,
## # ci_high <dbl>, p_value <dbl>
library(tidyverse)
# average investment per participant (within each price)
by_person_228 <- df_228_clean %>%
filter(electricity_price %in% c(17000, 23000), !is.na(investment_level)) %>%
group_by(electricity_price, Participant) %>%
summarise(avg_investment = mean(investment_level), .groups = "drop")
# mean + SD of those participant averages (per price)
summary_tbl <- by_person_228 %>%
group_by(electricity_price) %>%
summarise(
n_participants = n(),
mean_avg_investment = mean(avg_investment),
sd_avg_investment = sd(avg_investment),
.groups = "drop"
) %>%
mutate(electricity_price = factor(electricity_price, levels = c(17000, 23000))) %>%
arrange(electricity_price)
summary_tbl
## # A tibble: 2 × 4
## electricity_price n_participants mean_avg_investment sd_avg_investment
## <fct> <int> <dbl> <dbl>
## 1 17000 26 1231. 709.
## 2 23000 26 1625. 790.
library(tidyverse)
# One mean per participant per price
by_person_228 <- df_228_clean %>%
filter(electricity_price %in% c(17000, 23000), !is.na(investment_level)) %>%
group_by(electricity_price, Participant) %>%
summarise(mean_inv = mean(investment_level), .groups = "drop")
# CI of the mean of participant-means (one CI per price)
ci_two_228 <- by_person_228 %>%
group_by(electricity_price) %>%
summarise(
n = n(),
mean_of_means = mean(mean_inv),
ci_low = t.test(mean_inv)$conf.int[1],
ci_high = t.test(mean_inv)$conf.int[2],
.groups = "drop"
) %>%
mutate(
electricity_price = factor(electricity_price, levels = c(17000, 23000)),
optimal = if_else(electricity_price == 17000, 800, 2400)
)
ggplot(ci_two_228, aes(x = 1, y = mean_of_means)) +
geom_hline(aes(yintercept = optimal, linetype = "Optimal"), linewidth = 0.8) +
geom_pointrange(aes(ymin = ci_low, ymax = ci_high, shape = "Mean ± 95% CI"), size = 0.7) +
facet_wrap(~ electricity_price, nrow = 1) +
scale_x_continuous(breaks = NULL) +
labs(
x = NULL,
y = "Mean of participant means ($)",
title = "Average investment vs optimal (participant-level means)",
linetype = NULL,
shape = NULL
) +
theme_minimal(base_size = 12) +
theme(
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
strip.text = element_text(face = "bold")
)