personalized timing distribution

Modular Timing Prototype 2

let’s read in the old data:

df <- read.csv('/Users/haochensong/Desktop/MHA Mooclet/data_new/Modular Timing Prototype 2.csv')
library(tidyverse)

filtering out by all contextual TS policy:

library(dplyr)

df %>%
  mutate(arm = gsub("Modular Timing Prototype 2 ", "offset ", arm)) %>%
  mutate(arm = sub("Offset Arm", "", arm)) %>%
  filter(policy == 'thompson_sampling_contextual') %>%
  group_by(arm) %>%
  summarize(total = length(arm)) %>%
  mutate(arm = factor(arm, levels = paste0("offset ", 0:23))) %>%
  ggplot(aes(x = arm, y = total)) +
  geom_bar(stat = "identity") +
  labs(title = "Distribution of Arms",
       x = "Arm",
       y = "Total Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

also look into distribution:

distribution_table <- df %>%
  filter(policy == 'thompson_sampling_contextual') %>%
  mutate(arm_number = as.integer(gsub("Modular Timing Prototype 2 ([0-9]+)Offset Arm", "\\1", arm))) %>%
  mutate(arm = paste("offset", arm_number)) %>%
  group_by(arm) %>%
  summarise(frequency = length(arm), .groups = 'drop')

# Arrange by arm_number extracted from the 'arm' column
distribution_table <- distribution_table %>%
  mutate(arm_number = as.integer(gsub("offset ", "", arm))) %>%
  arrange(arm_number)

# Remove the temporary 'arm_number' column
distribution_table$arm_number <- NULL
options(max.print = 50)
print(as.data.frame(distribution_table))
         arm frequency
1   offset 0       149
2   offset 1       179
3   offset 2       428
4   offset 3       177
5   offset 4       174
6   offset 5       171
7   offset 6       177
8   offset 7       390
9   offset 8       187
10  offset 9       180
11 offset 10       116
12 offset 11       220
13 offset 12       125
14 offset 13        82
15 offset 14        75
16 offset 15        76
17 offset 16        85
18 offset 17        91
19 offset 18        80
20 offset 19       109
21 offset 20        99
22 offset 21        77
23 offset 22        84
24 offset 23        71

Now let’s look at where reward is 1:

df %>%
  filter(reward == 1) %>% 
  mutate(arm = gsub("Modular Timing Prototype 2 ", "offset ", arm)) %>%
  mutate(arm = sub("Offset Arm", "", arm)) %>%
  filter(policy == 'thompson_sampling_contextual') %>%
  group_by(arm) %>%
  summarize(total = length(arm)) %>%
  mutate(arm = factor(arm, levels = paste0("offset ", 0:23))) %>%
  ggplot(aes(x = arm, y = total)) +
  geom_bar(stat = "identity") +
  labs(title = "Distribution of Arms",
       x = "Arm",
       y = "Total Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

distribution_table <- df %>%
  filter(reward == 1) %>% 
  filter(policy == 'thompson_sampling_contextual') %>%
  mutate(arm_number = as.integer(gsub("Modular Timing Prototype 2 ([0-9]+)Offset Arm", "\\1", arm))) %>%
  mutate(arm = paste("offset", arm_number)) %>%
  group_by(arm) %>%
  summarise(frequency = length(arm), .groups = 'drop')

# Arrange by arm_number extracted from the 'arm' column
distribution_table <- distribution_table %>%
  mutate(arm_number = as.integer(gsub("offset ", "", arm))) %>%
  arrange(arm_number)

# Remove the temporary 'arm_number' column
distribution_table$arm_number <- NULL
options(max.print = 50)
print(as.data.frame(distribution_table))
         arm frequency
1   offset 0        28
2   offset 1        53
3   offset 2       131
4   offset 3        34
5   offset 4        37
6   offset 5        41
7   offset 6        53
8   offset 7       123
9   offset 8        45
10  offset 9        41
11 offset 10        35
12 offset 11        92
13 offset 12        36
14 offset 13        18
15 offset 14        20
16 offset 15        36
17 offset 16        21
18 offset 17        33
19 offset 18        28
20 offset 19        50
21 offset 20        44
22 offset 21        20
23 offset 22        24
24 offset 23        20

in comparison to reward is not NA:

df %>%
  filter(!is.na(reward)) %>% 
  mutate(arm = gsub("Modular Timing Prototype 2 ", "offset ", arm)) %>%
  mutate(arm = sub("Offset Arm", "", arm)) %>%
  filter(policy == 'thompson_sampling_contextual') %>%
  group_by(arm) %>%
  summarize(total = length(arm)) %>%
  mutate(arm = factor(arm, levels = paste0("offset ", 0:23))) %>%
  ggplot(aes(x = arm, y = total)) +
  geom_bar(stat = "identity") +
  labs(title = "Distribution of Arms",
       x = "Arm",
       y = "Total Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

distribution_table <- df %>%
  filter(!is.na(reward)) %>% 
  filter(policy == 'thompson_sampling_contextual') %>%
  mutate(arm_number = as.integer(gsub("Modular Timing Prototype 2 ([0-9]+)Offset Arm", "\\1", arm))) %>%
  mutate(arm = paste("offset", arm_number)) %>%
  group_by(arm) %>%
  summarise(frequency = length(arm), .groups = 'drop')

# Arrange by arm_number extracted from the 'arm' column
distribution_table <- distribution_table %>%
  mutate(arm_number = as.integer(gsub("offset ", "", arm))) %>%
  arrange(arm_number)

# Remove the temporary 'arm_number' column
distribution_table$arm_number <- NULL
options(max.print = 50)
print(as.data.frame(distribution_table))
         arm frequency
1   offset 0       122
2   offset 1       140
3   offset 2       314
4   offset 3       139
5   offset 4       149
6   offset 5       137
7   offset 6       143
8   offset 7       279
9   offset 8       152
10  offset 9       139
11 offset 10        96
12 offset 11       160
13 offset 12        98
14 offset 13        62
15 offset 14        55
16 offset 15        60
17 offset 16        57
18 offset 17        68
19 offset 18        58
20 offset 19        88
21 offset 20        71
22 offset 21        53
23 offset 22        57
24 offset 23        48