library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df_228 <- read.csv("C:/Users/nasimy/OneDrive - University Of Oregon/Second-year-paper/R_experiments/session_228_v2.csv")

Remove participants who did not finish the game.

# 1) Find participants who are missing investment in any round
bad_ids <- df_228 %>%
  group_by(Participant) %>%
  summarise(
    n_rounds = n_distinct(round),
    n_invest = sum(!is.na(investment_level)),
    .groups = "drop"
  ) %>%
  filter(n_rounds < 40 | n_invest < 40) %>%
  pull(Participant)

# 2) Remove them
df_228_clean <- df_228 %>%
  filter(!Participant %in% bad_ids)

# (Optional) quick check
df_228_clean %>%
  group_by(Participant) %>%
  summarise(n_invest = sum(!is.na(investment_level)), .groups = "drop") %>%
  summarise(min_invest = min(n_invest), max_invest = max(n_invest))
## # A tibble: 1 × 2
##   min_invest max_invest
##        <int>      <int>
## 1         40         40
avg_by_price <- df_228_clean %>%
  filter(electricity_price %in% c(17000, 23000)) %>%   # adjust name if yours differs
  group_by(Participant, electricity_price) %>%
  summarise(avg_investment = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
  arrange(Participant, electricity_price)

avg_by_price
## # A tibble: 52 × 3
##    Participant electricity_price avg_investment
##    <chr>                   <dbl>          <dbl>
##  1 P.10                    17000           1280
##  2 P.10                    23000           2360
##  3 P.11                    17000           1160
##  4 P.11                    23000           2320
##  5 P.12                    17000            920
##  6 P.12                    23000           2360
##  7 P.13                    17000           1360
##  8 P.13                    23000            880
##  9 P.14                    17000           2480
## 10 P.14                    23000           2400
## # ℹ 42 more rows
avg_by_price_wide <- avg_by_price %>%
  mutate(electricity_price = paste0("p", electricity_price)) %>%
  pivot_wider(names_from = electricity_price, values_from = avg_investment)

avg_by_price_wide
## # A tibble: 26 × 3
##    Participant p17000 p23000
##    <chr>        <dbl>  <dbl>
##  1 P.10          1280   2360
##  2 P.11          1160   2320
##  3 P.12           920   2360
##  4 P.13          1360    880
##  5 P.14          2480   2400
##  6 P.15          2080   1840
##  7 P.16           760    800
##  8 P.17          1880   1520
##  9 P.18           800   2400
## 10 P.20          2120   1960
## # ℹ 16 more rows
ci_by_participant <- function(data) {
  data %>%
    group_by(Participant) %>%
    summarise(
      n = sum(!is.na(investment_level)),
      mean = mean(investment_level, na.rm = TRUE),
      se = sd(investment_level, na.rm = TRUE) / sqrt(n),
      ci_low = mean - 1.96 * se,
      ci_high = mean + 1.96 * se,
      .groups = "drop"
    )
}

# ---- 17k ----
ci_17k <- df_228_clean %>%
  filter(electricity_price == 17000) %>%
  ci_by_participant()

p_17k <- ci_17k %>%
  ggplot(aes(x = reorder(Participant, mean), y = mean)) +
  geom_point(size = 1.8) +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.15) +
  geom_hline(yintercept = 800, linetype = "dashed", linewidth = 0.8) +
  coord_flip() +
  labs(
    title = "Participant investment (95% CI) — Price = 17k",
    x = NULL, y = "Mean investment"
  ) +
  theme_minimal(base_size = 12)

p_17k

# ---- 23k ----
ci_23k <- df_228_clean %>%
  filter(electricity_price == 23000) %>%
  ci_by_participant()

p_23k <- ci_23k %>%
  ggplot(aes(x = reorder(Participant, mean), y = mean)) +
  geom_point(size = 1.8) +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.15) +
  geom_hline(yintercept = 2400, linetype = "dashed", linewidth = 0.8) +
  coord_flip() +
  labs(
    title = "Participant investment (95% CI) — Price = 23k",
    x = NULL, y = "Mean investment"
  ) +
  theme_minimal(base_size = 12)

p_23k

# 1) Build participant-level CI data for 17k, split into first/second 10 rounds
ci_17k_halves <- df_228_clean %>%
  filter(electricity_price == 17000) %>%
  group_by(Participant) %>%
  arrange(round, .by_group = TRUE) %>%                       # order rounds within participant
  mutate(
    half = if_else(row_number() <= 10, "First 10", "Second 10")
  ) %>%
  group_by(Participant, half) %>%
  summarise(
    n = sum(!is.na(investment_level)),
    mean = mean(investment_level, na.rm = TRUE),
    se = sd(investment_level, na.rm = TRUE) / sqrt(n),
    ci_low  = mean - 1.96 * se,
    ci_high = mean + 1.96 * se,
    .groups = "drop"
  )

# 2) Plot: two panels next to each other + participant colors + optimal line
p_17k_halves <- ci_17k_halves %>%
  ggplot(aes(x = reorder(Participant, mean), y = mean, color = Participant)) +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.15, alpha = 0.9) +
  geom_point(size = 1.8) +
  geom_hline(yintercept = 800, linetype = "dashed", linewidth = 0.8) +
  coord_flip() +
  facet_wrap(~ half, nrow = 1) +
  labs(
    title = "Investment by participant (95% CI) — Price = 17k",
    x = NULL, y = "Mean investment"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none")  # legend would be massive; remove for aesthetics

p_17k_halves

# 1) Build participant-level CI data for 23k, split into first/second 10 rounds
ci_23k_halves <- df_228_clean %>%
  filter(electricity_price == 23000) %>%
  group_by(Participant) %>%
  arrange(round, .by_group = TRUE) %>%                       # order rounds within participant
  mutate(
    half = if_else(row_number() <= 10, "First 10", "Second 10")
  ) %>%
  group_by(Participant, half) %>%
  summarise(
    n = sum(!is.na(investment_level)),
    mean = mean(investment_level, na.rm = TRUE),
    se = sd(investment_level, na.rm = TRUE) / sqrt(n),
    ci_low  = mean - 1.96 * se,
    ci_high = mean + 1.96 * se,
    .groups = "drop"
  )

# 2) Plot: two panels next to each other + participant colors + optimal line
p_23k_halves <- ci_23k_halves %>%
  ggplot(aes(x = reorder(Participant, mean), y = mean, color = Participant)) +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.15, alpha = 0.9) +
  geom_point(size = 1.8) +
  geom_hline(yintercept = 2400, linetype = "dashed", linewidth = 0.8) +
  coord_flip() +
  facet_wrap(~ half, nrow = 1) +
  labs(
    title = "Investment by participant (95% CI) — Price = 23k",
    x = NULL, y = "Mean investment"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none")  # legend would be massive; remove for aesthetics

p_23k_halves

Do subjects move closer to 800 (optimal investment level for 17K electricity price) over time, within the same price condition, within the same individuals?

opt <- 800

dist_17k <- df_228_clean %>%
  filter(electricity_price == 17000) %>%
  group_by(Participant) %>%
  arrange(round, .by_group = TRUE) %>%
  mutate(half = if_else(row_number() <= 10, "first", "second")) %>%
  group_by(Participant, half) %>%
  summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
  mutate(dist = abs(mean_inv - opt)) %>%
  select(Participant, half, dist) %>%
  pivot_wider(names_from = half, values_from = dist) %>%
  mutate(improvement = first - second)  # >0 means closer in second half
t.test(dist_17k$second, dist_17k$first, paired = TRUE, alternative = "less")
## 
##  Paired t-test
## 
## data:  dist_17k$second and dist_17k$first
## t = 1.8105, df = 25, p-value = 0.9589
## alternative hypothesis: true mean difference is less than 0
## 95 percent confidence interval:
##      -Inf 215.2741
## sample estimates:
## mean difference 
##        110.7692

Rejected.

dist_17k %>%
  pivot_longer(c(first, second), names_to = "half", values_to = "dist") %>%
  mutate(half = factor(half, levels = c("first", "second"))) %>%
  ggplot(aes(x = half, y = dist)) +
  geom_boxplot(outlier.alpha = 0.2) +
  geom_line(aes(group = Participant), alpha = 0.2) +
  labs(
    title = "Distance to optimal investment (800) — Price 17k",
    x = NULL, y = "|Mean investment − 800|"
  ) +
  theme_minimal(base_size = 12)

Do subjects move closer to 2400 (optimal investment level for 23K electricity price) over time, within the same price condition, within the same individuals?

opt <- 2400

dist_23k <- df_228_clean %>%
  filter(electricity_price == 23000) %>%
  group_by(Participant) %>%
  arrange(round, .by_group = TRUE) %>%
  mutate(half = if_else(row_number() <= 10, "first", "second")) %>%
  group_by(Participant, half) %>%
  summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop") %>%
  mutate(dist = abs(mean_inv - opt)) %>%
  select(Participant, half, dist) %>%
  pivot_wider(names_from = half, values_from = dist) %>%
  mutate(improvement = first - second)  # >0 means closer in second half
t.test(dist_23k$second, dist_23k$first, paired = TRUE, alternative = "less")
## 
##  Paired t-test
## 
## data:  dist_23k$second and dist_23k$first
## t = 2.6093, df = 25, p-value = 0.9924
## alternative hypothesis: true mean difference is less than 0
## 95 percent confidence interval:
##      -Inf 244.3765
## sample estimates:
## mean difference 
##        147.6923

Rejected.

dist_23k %>%
  pivot_longer(c(first, second), names_to = "half", values_to = "dist") %>%
  mutate(half = factor(half, levels = c("first", "second"))) %>%
  ggplot(aes(x = half, y = dist)) +
  geom_boxplot(outlier.alpha = 0.2) +
  geom_line(aes(group = Participant), alpha = 0.2) +
  labs(
    title = "Distance to optimal investment (2400) — Price 23k",
    x = NULL, y = "|Mean investment − 2400|"
  ) +
  theme_minimal(base_size = 12)

# 1) Mean investment per participant within each price
pm <- df_228_clean %>%
  filter(electricity_price %in% c(17000, 23000)) %>%
  group_by(Participant, electricity_price) %>%
  summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop")

# 2) Average of those participant means (per price)
pm_summary <- pm %>%
  group_by(electricity_price) %>%
  summarise(
    n_participants = n(),
    avg_of_participant_means = mean(mean_inv),
    sd_of_participant_means  = sd(mean_inv),
    .groups = "drop"
  )

pm_summary
## # A tibble: 2 × 4
##   electricity_price n_participants avg_of_participant_m…¹ sd_of_participant_me…²
##               <dbl>          <int>                  <dbl>                  <dbl>
## 1             17000             26                  1231.                   709.
## 2             23000             26                  1625.                   790.
## # ℹ abbreviated names: ¹​avg_of_participant_means, ²​sd_of_participant_means
# 3) Test: are participant means different from the optimal level?
test_17k <- t.test(pm %>% filter(electricity_price == 17000) %>% pull(mean_inv),
                   mu = 800)

test_23k <- t.test(pm %>% filter(electricity_price == 23000) %>% pull(mean_inv),
                   mu = 2400)

test_17k
## 
##  One Sample t-test
## 
## data:  pm %>% filter(electricity_price == 17000) %>% pull(mean_inv)
## t = 3.0988, df = 25, p-value = 0.004757
## alternative hypothesis: true mean is not equal to 800
## 95 percent confidence interval:
##   944.4668 1517.0717
## sample estimates:
## mean of x 
##  1230.769
test_23k
## 
##  One Sample t-test
## 
## data:  pm %>% filter(electricity_price == 23000) %>% pull(mean_inv)
## t = -5.0076, df = 25, p-value = 3.654e-05
## alternative hypothesis: true mean is not equal to 2400
## 95 percent confidence interval:
##  1305.714 1943.517
## sample estimates:
## mean of x 
##  1624.615
# 1) Participant means within each price
pm <- df_228_clean %>%
  filter(electricity_price %in% c(17000, 23000)) %>%
  group_by(Participant, electricity_price) %>%
  summarise(mean_inv = mean(investment_level, na.rm = TRUE), .groups = "drop")

# 2) Report table: mean of participant means + direction + 95% CI + p-value
opt_tbl <- tibble(
  electricity_price = c(17000, 23000),
  optimal = c(800, 2400)
)

report_tbl <- opt_tbl %>%
  mutate(
    x = map(electricity_price, ~ pm %>% filter(electricity_price == .x) %>% pull(mean_inv)),
    test = map2(x, optimal, ~ t.test(.x, mu = .y))  # two-sided by default
  ) %>%
  transmute(
    electricity_price,
    optimal,
    n_participants = map_int(x, length),
    avg_of_participant_means = map_dbl(x, mean),
    diff_from_optimal = avg_of_participant_means - optimal,
    direction = case_when(
      diff_from_optimal > 0 ~ "Above optimal",
      diff_from_optimal < 0 ~ "Below optimal",
      TRUE ~ "Exactly optimal"
    ),
    ci_low = map_dbl(test, ~ .x$conf.int[1]),
    ci_high = map_dbl(test, ~ .x$conf.int[2]),
    p_value = map_dbl(test, ~ .x$p.value)
  )

report_tbl
## # A tibble: 2 × 9
##   electricity_price optimal n_participants avg_of_participant_means
##               <dbl>   <dbl>          <int>                    <dbl>
## 1             17000     800             26                    1231.
## 2             23000    2400             26                    1625.
## # ℹ 5 more variables: diff_from_optimal <dbl>, direction <chr>, ci_low <dbl>,
## #   ci_high <dbl>, p_value <dbl>
library(tidyverse)

# average investment per participant (within each price)
by_person_228 <- df_228_clean %>%
  filter(electricity_price %in% c(17000, 23000), !is.na(investment_level)) %>%
  group_by(electricity_price, Participant) %>%
  summarise(avg_investment = mean(investment_level), .groups = "drop")

# mean + SD of those participant averages (per price)
summary_tbl <- by_person_228 %>%
  group_by(electricity_price) %>%
  summarise(
    n_participants = n(),
    mean_avg_investment = mean(avg_investment),
    sd_avg_investment = sd(avg_investment),
    .groups = "drop"
  ) %>%
  mutate(electricity_price = factor(electricity_price, levels = c(17000, 23000))) %>%
  arrange(electricity_price)

summary_tbl
## # A tibble: 2 × 4
##   electricity_price n_participants mean_avg_investment sd_avg_investment
##   <fct>                      <int>               <dbl>             <dbl>
## 1 17000                         26               1231.              709.
## 2 23000                         26               1625.              790.
library(tidyverse)

# One mean per participant per price
by_person_228 <- df_228_clean %>%
  filter(electricity_price %in% c(17000, 23000), !is.na(investment_level)) %>%
  group_by(electricity_price, Participant) %>%
  summarise(mean_inv = mean(investment_level), .groups = "drop")

# CI of the mean of participant-means (one CI per price)
ci_two_228 <- by_person_228 %>%
  group_by(electricity_price) %>%
  summarise(
    n = n(),
    mean_of_means = mean(mean_inv),
    ci_low  = t.test(mean_inv)$conf.int[1],
    ci_high = t.test(mean_inv)$conf.int[2],
    .groups = "drop"
  ) %>%
  mutate(
    electricity_price = factor(electricity_price, levels = c(17000, 23000)),
    optimal = if_else(electricity_price == 17000, 800, 2400)
  )

ggplot(ci_two_228, aes(x = 1, y = mean_of_means)) +
  geom_hline(aes(yintercept = optimal, linetype = "Optimal"), linewidth = 0.8) +
  geom_pointrange(aes(ymin = ci_low, ymax = ci_high, shape = "Mean ± 95% CI"), size = 0.7) +
  facet_wrap(~ electricity_price, nrow = 1) +
  scale_x_continuous(breaks = NULL) +
  labs(
    x = NULL,
    y = "Mean of participant means ($)",
    title = "Average investment vs optimal (participant-level means)",
    linetype = NULL,
    shape = NULL
  ) +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.minor = element_blank(),
    axis.text.x = element_blank(),
    strip.text = element_text(face = "bold")
  )