timing bandit simulation

Read data:

library(readr)
library(jsonlite)
library(tidyr)
library(tidyverse)
library(lubridate)
library(dplyr)
library(chron)
library(gridExtra)
read_dist <- function(file_path){
  data <- read_tsv(file_path, col_types = cols(.default = "c")) 
  return(data)
}
dist <- read_dist('participant_metadata.txt')
# Define a custom function to convert time strings
convert_to_time <- function(time_str) {
  strftime(strptime(time_str, "%H:%M"), "%H:%M:%S")
}

df <- dist %>%
  drop_na(`Messages Start`, `Messages End`) %>%
  mutate(across(c(`Messages Start`, `Messages End`), convert_to_time)) %>%
  pivot_longer(cols=`Prefer Early Morning (6am to 9am)`:`Prefer Late Night (9pm to midnight)`,
               names_to='time_slot',
               values_to='is_preferred')

time_slot_ranges <- tibble(
  time_slot = c('Prefer Early Morning (6am to 9am)',
                'Prefer Late Morning (9am to 12pm)',
                'Prefer Afternoon (12pm to 3pm)',
                'Prefer Early Evening (3pm to 6pm)',
                'Prefer Evening (6pm to 9pm)',
                'Prefer Late Night (9pm to midnight)'),
  start_time = map(c('06:00', '09:00', '12:00', '15:00', '18:00', '21:00'), convert_to_time),
  end_time = map(c('09:00', '12:00', '15:00', '18:00', '21:00', '24:00'), convert_to_time)
)

df <- left_join(df, time_slot_ranges, by="time_slot") # join the time ranges

# Define the order of the time slots
time_slots_order <- c('Prefer Early Morning (6am to 9am)',
                      'Prefer Late Morning (9am to 12pm)',
                      'Prefer Afternoon (12pm to 3pm)',
                      'Prefer Early Evening (3pm to 6pm)',
                      'Prefer Evening (6pm to 9pm)',
                      'Prefer Late Night (9pm to midnight)')

df <- df %>%
  mutate(time_slot = factor(time_slot, levels = time_slots_order),
         is_allowed = case_when(
           (`Messages Start` >= start_time & `Messages Start` <= end_time) ~ 1,
           (`Messages End` >= start_time & `Messages End` <= end_time) ~ 1,
           (`Messages Start` <= start_time & `Messages End` >= end_time) ~ 1,
           TRUE ~ 0
         ))

# Calculate the number of preferred and allowed responses for each time slot
df_summary <- df %>%
  mutate(is_preferred = as.numeric(is_preferred), 
         is_allowed = as.numeric(is_allowed)) %>%
  group_by(time_slot) %>%
  summarise(
    is_preferred = sum(is_preferred, na.rm = TRUE),
    is_allowed = sum(is_allowed, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(cols = c(is_preferred, is_allowed),
               names_to = 'response_type',
               values_to = 'count')

# Calculate the total count for each time slot
total_count <- df_summary %>%
  group_by(time_slot) %>%
  summarise(total = sum(count))

# Join total count to summary data
df_summary <- left_join(df_summary, total_count)

# Calculate percentage
df_summary <- df_summary %>%
  mutate(percentage = count / total * 100)

# Set plot size
options(repr.plot.width = 20, repr.plot.height = 15)

# Calculate the maximum count to adjust y limits
max_count <- max(df_summary$count)

# Create a mapping of time slot names to abbreviations
time_slot_abbreviations <- c(
  'Prefer Early Morning (6am to 9am)' = '6-9am',
  'Prefer Late Morning (9am to 12pm)' = '9am-12pm',
  'Prefer Afternoon (12pm to 3pm)' = '12-3pm',
  'Prefer Early Evening (3pm to 6pm)' = '3-6pm',
  'Prefer Evening (6pm to 9pm)' = '6-9pm',
  'Prefer Late Night (9pm to midnight)' = '9pm-12am'
)

# Update the time_slot column in the df_summary dataframe with the abbreviations
df_summary$time_slot <- time_slot_abbreviations[df_summary$time_slot]

# Define the order of the levels
time_slot_levels <- c('6-9am', '9am-12pm', '12-3pm', '3-6pm', '6-9pm', '9pm-12am')

# Convert the time_slot column to a factor and specify the order of the levels
df_summary$time_slot <- factor(df_summary$time_slot, levels = time_slot_levels)

# Now you can draw your bar plot
ggplot(df_summary, aes(x = time_slot, y = percentage, fill = response_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.6) +
  geom_text(aes(label = paste0(round(percentage, 1), "%")), 
            position = position_dodge(width = 0.6), vjust = -0.5, size = 4) +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(limits = c(0, 100)) + # Add 10% extra space for labels
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), # 45 degree angle for x-axis labels
        axis.text.y = element_text(size = 12),
        axis.title = element_text(size = 14, face = "bold"),
        plot.title = element_text(size = 16, face = "bold"),
        legend.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12)) +
  labs(x = "Time Slot", y = "Percentage", fill = "Response Type", 
       title = "Comparison of PreferredvsAllowed Responses by TimeSlot")

Also Look at the distribution table

df_summary 
# A tibble: 12 × 5
   time_slot response_type count total percentage
   <fct>     <chr>         <dbl> <dbl>      <dbl>
 1 6-9am     is_preferred   1220  4038       30.2
 2 6-9am     is_allowed     2818  4038       69.8
 3 9am-12pm  is_preferred   1838  5309       34.6
 4 9am-12pm  is_allowed     3471  5309       65.4
 5 12-3pm    is_preferred   1678  5029       33.4
 6 12-3pm    is_allowed     3351  5029       66.6
 7 3-6pm     is_preferred   1547  4808       32.2
 8 3-6pm     is_allowed     3261  4808       67.8
 9 6-9pm     is_preferred   1725  4950       34.8
10 6-9pm     is_allowed     3225  4950       65.2
11 9pm-12am  is_preferred    985  4543       21.7
12 9pm-12am  is_allowed     3558  4543       78.3