timing bandit simulation

Read data:

library(readr)
library(jsonlite)
library(tidyr)
library(tidyverse)
library(lubridate)
library(dplyr)
library(chron)
library(gridExtra)
read_dist <- function(file_path){
  data <- read_tsv(file_path, col_types = cols(.default = "c")) 
  return(data)
}

dist <- read_dist('participant_metadata.txt')
head(dist)

# A tibble: 6 × 9
  Identifier `Messages Start` `Messages End` `Prefer Early Mo…` `Prefer Late M…`
  <chr>      <chr>            <chr>          <chr>              <chr>           
1 00007721   10:00            20:00          0                  0               
2 00057327   08:00            20:00          1                  1               
3 00095506   09:00            22:00          1                  0               
4 00107901   08:00            22:00          1                  0               
5 00111806   07:00            22:00          1                  0               
6 00138231   05:00            21:00          0                  0               
# … with 4 more variables: `Prefer Afternoon (12pm to 3pm)` <chr>,
#   `Prefer Early Evening (3pm to 6pm)` <chr>,
#   `Prefer Evening (6pm to 9pm)` <chr>,
#   `Prefer Late Night (9pm to midnight)` <chr>

dist |> 
  pivot_longer(cols=`Prefer Early Morning (6am to 9am)`:`Prefer Late Night (9pm to midnight)`,
               names_to='time_slot',
               values_to='indicator') |>
  group_by(time_slot) |>
  summarize(distribution = sum(as.numeric(indicator))/length(indicator),
            number_of_yes = sum(as.numeric(indicator)),
            total_number = length(indicator))

# A tibble: 6 × 4
  time_slot                           distribution number_of_yes total_number
  <chr>                                      <dbl>         <dbl>        <int>
1 Prefer Afternoon (12pm to 3pm)             0.471          1678         3562
2 Prefer Early Evening (3pm to 6pm)          0.434          1547         3562
3 Prefer Early Morning (6am to 9am)          0.343          1220         3562
4 Prefer Evening (6pm to 9pm)                0.484          1725         3562
5 Prefer Late Morning (9am to 12pm)          0.516          1838         3562
6 Prefer Late Night (9pm to midnight)        0.277           985         3562

# Define a custom function to convert time strings
convert_to_time <- function(time_str) {
  strftime(strptime(time_str, "%H:%M"), "%H:%M:%S")
}

df <- dist %>%
  drop_na(`Messages Start`, `Messages End`) %>%
  mutate(across(c(`Messages Start`, `Messages End`), convert_to_time)) %>%
  pivot_longer(cols=`Prefer Early Morning (6am to 9am)`:`Prefer Late Night (9pm to midnight)`,
               names_to='time_slot',
               values_to='is_preferred')

time_slot_ranges <- tibble(
  time_slot = c('Prefer Early Morning (6am to 9am)',
                'Prefer Late Morning (9am to 12pm)',
                'Prefer Afternoon (12pm to 3pm)',
                'Prefer Early Evening (3pm to 6pm)',
                'Prefer Evening (6pm to 9pm)',
                'Prefer Late Night (9pm to midnight)'),
  start_time = map(c('06:00', '09:00', '12:00', '15:00', '18:00', '21:00'), convert_to_time),
  end_time = map(c('09:00', '12:00', '15:00', '18:00', '21:00', '24:00'), convert_to_time)
)

df <- left_join(df, time_slot_ranges, by="time_slot") # join the time ranges

df <- df %>%
  mutate(is_allowed = case_when(
    (`Messages Start` <= start_time & `Messages End` >= end_time) ~ 1,
    TRUE ~ 0
  ))

# Calculate the number of preferred and allowed responses for each time slot
df_summary <- df %>%
  mutate(is_preferred = as.numeric(is_preferred), 
         is_allowed = as.numeric(is_allowed)) %>%
  group_by(time_slot) %>%
  summarise(
    is_preferred = sum(is_preferred, na.rm = TRUE),
    is_allowed = sum(is_allowed, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(is_preferred, is_allowed),
               names_to = 'response_type',
               values_to = 'count')

# Calculate the total count for each time slot
total_count <- df_summary %>%
  group_by(time_slot) %>%
  summarise(total = sum(count))

# Join total count to summary data
df_summary <- left_join(df_summary, total_count)

# Calculate percentage
df_summary <- df_summary %>%
  mutate(percentage = count / total * 100)

# Set plot size
options(repr.plot.width = 20, repr.plot.height = 15)

# Calculate the maximum count to adjust y limits
max_count <- max(df_summary$count)

# Draw a bar plot
ggplot(df_summary, aes(x = time_slot, y = count, fill = response_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6) +
  geom_text(aes(label = paste0(round(percentage, 1), "%")), 
            position = position_dodge(width = 0.6), vjust = -0.25, size = 4) +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(limits = c(0, max_count * 1.1)) + # Add 10% extra space for labels
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
        axis.text.y = element_text(size = 12),
        axis.title = element_text(size = 14, face = "bold"),
        plot.title = element_text(size = 16, face = "bold"),
        legend.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12)) +
  labs(x = "Time Slot", y = "Count", fill = "Response Type", 
       title = "Comparison of Preferred vs Allowed Responses by Time Slot")

df %>%
  group_by(time_slot) %>%
  summarize(distribution_preferred = sum(as.numeric(is_preferred)) / n(),
            distribution_allowed = sum(is_allowed)/n(),
            total_number = n(),
            .groups = "drop")

# A tibble: 6 × 4
  time_slot                       distribution_pr… distribution_al… total_number
  <chr>                                      <dbl>            <dbl>        <int>
1 Prefer Afternoon (12pm to 3pm)             0.471           0.908          3562
2 Prefer Early Evening (3pm to 6…            0.434           0.903          3562
3 Prefer Early Morning (6am to 9…            0.343           0.0999         3562
4 Prefer Evening (6pm to 9pm)                0.484           0.742          3562
5 Prefer Late Morning (9am to 12…            0.516           0.762          3562
6 Prefer Late Night (9pm to midn…            0.277           0.999          3562