library(readr)
library(jsonlite)
library(tidyr)
library(tidyverse)
library(lubridate)
library(dplyr)
library(chron)
library(gridExtra)
read_dist <- function(file_path){
data <- read_tsv(file_path, col_types = cols(.default = "c"))
return(data)
}timing bandit simulation
Read data:
dist <- read_dist('participant_metadata.txt')
head(dist)# A tibble: 6 × 9
Identifier `Messages Start` `Messages End` `Prefer Early Mo…` `Prefer Late M…`
<chr> <chr> <chr> <chr> <chr>
1 00007721 10:00 20:00 0 0
2 00057327 08:00 20:00 1 1
3 00095506 09:00 22:00 1 0
4 00107901 08:00 22:00 1 0
5 00111806 07:00 22:00 1 0
6 00138231 05:00 21:00 0 0
# … with 4 more variables: `Prefer Afternoon (12pm to 3pm)` <chr>,
# `Prefer Early Evening (3pm to 6pm)` <chr>,
# `Prefer Evening (6pm to 9pm)` <chr>,
# `Prefer Late Night (9pm to midnight)` <chr>
dist |>
pivot_longer(cols=`Prefer Early Morning (6am to 9am)`:`Prefer Late Night (9pm to midnight)`,
names_to='time_slot',
values_to='indicator') |>
group_by(time_slot) |>
summarize(distribution = sum(as.numeric(indicator))/length(indicator),
number_of_yes = sum(as.numeric(indicator)),
total_number = length(indicator))# A tibble: 6 × 4
time_slot distribution number_of_yes total_number
<chr> <dbl> <dbl> <int>
1 Prefer Afternoon (12pm to 3pm) 0.471 1678 3562
2 Prefer Early Evening (3pm to 6pm) 0.434 1547 3562
3 Prefer Early Morning (6am to 9am) 0.343 1220 3562
4 Prefer Evening (6pm to 9pm) 0.484 1725 3562
5 Prefer Late Morning (9am to 12pm) 0.516 1838 3562
6 Prefer Late Night (9pm to midnight) 0.277 985 3562
# Define a custom function to convert time strings
convert_to_time <- function(time_str) {
strftime(strptime(time_str, "%H:%M"), "%H:%M:%S")
}
df <- dist %>%
drop_na(`Messages Start`, `Messages End`) %>%
mutate(across(c(`Messages Start`, `Messages End`), convert_to_time)) %>%
pivot_longer(cols=`Prefer Early Morning (6am to 9am)`:`Prefer Late Night (9pm to midnight)`,
names_to='time_slot',
values_to='is_preferred')
time_slot_ranges <- tibble(
time_slot = c('Prefer Early Morning (6am to 9am)',
'Prefer Late Morning (9am to 12pm)',
'Prefer Afternoon (12pm to 3pm)',
'Prefer Early Evening (3pm to 6pm)',
'Prefer Evening (6pm to 9pm)',
'Prefer Late Night (9pm to midnight)'),
start_time = map(c('06:00', '09:00', '12:00', '15:00', '18:00', '21:00'), convert_to_time),
end_time = map(c('09:00', '12:00', '15:00', '18:00', '21:00', '24:00'), convert_to_time)
)
df <- left_join(df, time_slot_ranges, by="time_slot") # join the time ranges
# Define the order of the time slots
time_slots_order <- c('Prefer Early Morning (6am to 9am)',
'Prefer Late Morning (9am to 12pm)',
'Prefer Afternoon (12pm to 3pm)',
'Prefer Early Evening (3pm to 6pm)',
'Prefer Evening (6pm to 9pm)',
'Prefer Late Night (9pm to midnight)')
df <- df %>%
mutate(time_slot = factor(time_slot, levels = time_slots_order),
is_allowed = case_when(
(`Messages Start` >= start_time & `Messages Start` <= end_time) ~ 1,
(`Messages End` >= start_time & `Messages End` <= end_time) ~ 1,
(`Messages Start` <= start_time & `Messages End` >= end_time) ~ 1,
TRUE ~ 0
))# Calculate the number of preferred and allowed responses for each time slot
df_summary <- df %>%
mutate(is_preferred = as.numeric(is_preferred),
is_allowed = as.numeric(is_allowed)) %>%
group_by(time_slot) %>%
summarise(
is_preferred = sum(is_preferred, na.rm = TRUE),
is_allowed = sum(is_allowed, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_longer(cols = c(is_preferred, is_allowed),
names_to = 'response_type',
values_to = 'count')
# Calculate the total count for each time slot
total_count <- df_summary %>%
group_by(time_slot) %>%
summarise(total = sum(count))
# Join total count to summary data
df_summary <- left_join(df_summary, total_count)
# Calculate percentage
df_summary <- df_summary %>%
mutate(percentage = count / total * 100)
# Set plot size
options(repr.plot.width = 20, repr.plot.height = 15)
# Calculate the maximum count to adjust y limits
max_count <- max(df_summary$count)
# Create a mapping of time slot names to abbreviations
time_slot_abbreviations <- c(
'Prefer Early Morning (6am to 9am)' = '6-9am',
'Prefer Late Morning (9am to 12pm)' = '9am-12pm',
'Prefer Afternoon (12pm to 3pm)' = '12-3pm',
'Prefer Early Evening (3pm to 6pm)' = '3-6pm',
'Prefer Evening (6pm to 9pm)' = '6-9pm',
'Prefer Late Night (9pm to midnight)' = '9pm-12am'
)
# Update the time_slot column in the df_summary dataframe with the abbreviations
df_summary$time_slot <- time_slot_abbreviations[df_summary$time_slot]
# Define the order of the levels
time_slot_levels <- c('6-9am', '9am-12pm', '12-3pm', '3-6pm', '6-9pm', '9pm-12am')
# Convert the time_slot column to a factor and specify the order of the levels
df_summary$time_slot <- factor(df_summary$time_slot, levels = time_slot_levels)
# Now you can draw your bar plot
ggplot(df_summary, aes(x = time_slot, y = percentage, fill = response_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.6) +
geom_text(aes(label = paste0(round(percentage, 1), "%")),
position = position_dodge(width = 0.6), vjust = -0.5, size = 4) +
scale_fill_brewer(palette = "Set2") +
scale_y_continuous(limits = c(0, 100)) + # Add 10% extra space for labels
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), # 45 degree angle for x-axis labels
axis.text.y = element_text(size = 12),
axis.title = element_text(size = 14, face = "bold"),
plot.title = element_text(size = 16, face = "bold"),
legend.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12)) +
labs(x = "Time Slot", y = "Percentage", fill = "Response Type",
title = "Comparison of PreferredvsAllowed Responses by TimeSlot")#
#
# # Draw a bar plot
# ggplot(df_summary, aes(x = time_slot, y = percentage, fill = response_type)) +
# geom_bar(stat = "identity", position = "dodge", width = 0.6) +
# geom_text(aes(label = paste0(round(percentage, 1), "%")),
# position = position_dodge(width = 0.6), vjust = -0.5, size = 4) +
# scale_fill_brewer(palette = "Set2") +
# scale_y_continuous(limits = c(0, max_percentage * 1.1)) + # Add 10% extra space for labels
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), # 45 degree angle for x-axis labels
# axis.text.y = element_text(size = 12),
# axis.title = element_text(size = 14, face = "bold"),
# plot.title = element_text(size = 16, face = "bold"),
# legend.title = element_text(size = 14, face = "bold"),
# legend.text = element_text(size = 12)) +
# labs(x = "Time Slot", y = "Percentage", fill = "Response Type",
# title = "Comparison of Preferred vs Allowed Responses by Time Slot")
# # Draw a bar plot
# ggplot(df_summary, aes(x = time_slot, y = count, fill = response_type)) +
# geom_bar(stat = "identity", position = "dodge", width = 0.6) +
# geom_text(aes(label = paste0(round(percentage, 1), "%")),
# position = position_dodge(width = 0.6), vjust = -0.25, size = 4) +
# scale_fill_brewer(palette = "Set2") +
# scale_y_continuous(limits = c(0, max_count * 1.1)) + # Add 10% extra space for labels
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
# axis.text.y = element_text(size = 12),
# axis.title = element_text(size = 14, face = "bold"),
# plot.title = element_text(size = 16, face = "bold"),
# legend.title = element_text(size = 14, face = "bold"),
# legend.text = element_text(size = 12)) +
# labs(x = "Time Slot", y = "Count", fill = "Response Type",
# title = "Comparison of Preferred vs Allowed Responses by Time Slot")
# df %>%
# mutate(is_preferred = as.numeric(is_preferred),
# is_allowed = as.numeric(is_allowed)) %>%
# group_by(time_slot) %>%
# summarise(
# is_preferred = sum(is_preferred, na.rm = TRUE)/n(),
# is_allowed = sum(is_allowed, na.rm = TRUE)/n(),
# total_numer = n(),
# .groups = "drop"
# )
# # df %>%
# # group_by(time_slot) %>%
# # summarize(distribution_preferred = sum(as.numeric(is_preferred)) / n(),
# # distribution_allowed = sum(is_allowed)/n(),
# # total_number = n(),
# # .groups = "drop")# # Calculate the percentage of preferred and allowed responses for each time slot
# df_summary <- df %>%
# mutate(is_preferred = as.numeric(is_preferred),
# is_allowed = as.numeric(is_allowed)) %>%
# group_by(time_slot) %>%
# summarise(
# percentage_preferred = sum(is_preferred, na.rm = TRUE) / n() * 100,
# percentage_allowed = sum(is_allowed, na.rm = TRUE) / n() * 100,
# .groups = "drop"
# ) %>%
# pivot_longer(cols = c(percentage_preferred, percentage_allowed),
# names_to = 'response_type',
# values_to = 'percentage')
#
# # Set plot size
# options(repr.plot.width = 20, repr.plot.height = 15)
#
# # Get maximum percentage value
# max_percentage <- max(df_summary$percentage, na.rm = TRUE)
#
# # Create a mapping of time slot names to abbreviations
# time_slot_abbreviations <- c(
# 'Prefer Early Morning (6am to 9am)' = '6-9am',
# 'Prefer Late Morning (9am to 12pm)' = '9am-12pm',
# 'Prefer Afternoon (12pm to 3pm)' = '12-3pm',
# 'Prefer Early Evening (3pm to 6pm)' = '3-6pm',
# 'Prefer Evening (6pm to 9pm)' = '6-9pm',
# 'Prefer Late Night (9pm to midnight)' = '9pm-12am'
# )
#
# # Update the time_slot column in the df_summary dataframe with the abbreviations
# df_summary$time_slot <- time_slot_abbreviations[df_summary$time_slot]
#
# # Draw a bar plot
# ggplot(df_summary, aes(x = time_slot, y = percentage, fill = response_type)) +
# geom_bar(stat = "identity", position = "dodge", width = 0.6) +
# geom_text(aes(label = paste0(round(percentage, 1), "%")),
# position = position_dodge(width = 0.6), vjust = -0.5, size = 4) +
# scale_fill_brewer(palette = "Set2") +
# scale_y_continuous(limits = c(0, max_percentage * 1.1)) + # Add 10% extra space for labels
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), # 45 degree angle for x-axis labels
# axis.text.y = element_text(size = 12),
# axis.title = element_text(size = 14, face = "bold"),
# plot.title = element_text(size = 16, face = "bold"),
# legend.title = element_text(size = 14, face = "bold"),
# legend.text = element_text(size = 12)) +
# labs(x = "Time Slot", y = "Percentage", fill = "Response Type",
# title = "Comparison of Preferred vs Allowed Responses by Time Slot")
# # df_summary_table <- df %>%
# group_by(time_slot) %>%
# summarise(
# distribution_preferred = sum(as.numeric(is_preferred), na.rm = TRUE)/n(),
# distribution_allowed = sum(as.numeric(is_allowed), na.rm = TRUE)/n(),
# total_number = n(),
# .groups = "drop"
# )
#
# df_summary_tabledf %>%
mutate(is_preferred = as.numeric(is_preferred),
is_allowed = as.numeric(is_allowed)) %>%
group_by(time_slot) %>%
summarise(
preferred_dist = sum(is_preferred, na.rm = TRUE)/length(is_preferred),
allowed_dist = sum(is_allowed, na.rm = TRUE)/length(is_allowed),
.groups = "drop"
)# A tibble: 6 × 3
time_slot preferred_dist allowed_dist
<fct> <dbl> <dbl>
1 Prefer Early Morning (6am to 9am) 0.343 0.791
2 Prefer Late Morning (9am to 12pm) 0.516 0.974
3 Prefer Afternoon (12pm to 3pm) 0.471 0.941
4 Prefer Early Evening (3pm to 6pm) 0.434 0.915
5 Prefer Evening (6pm to 9pm) 0.484 0.905
6 Prefer Late Night (9pm to midnight) 0.277 0.999