library(readr)library(jsonlite)library(tidyr)library(tidyverse)library(lubridate)library(dplyr)library(chron)library(gridExtra)read_dist <-function(file_path){ data <-read_tsv(file_path, col_types =cols(.default ="c")) return(data)}
dist <-read_dist('participant_metadata.txt')# Define a custom function to convert time stringsconvert_to_time <-function(time_str) {strftime(strptime(time_str, "%H:%M"), "%H:%M:%S")}df <- dist %>%drop_na(`Messages Start`, `Messages End`) %>%mutate(across(c(`Messages Start`, `Messages End`), convert_to_time)) %>%pivot_longer(cols=`Prefer Early Morning (6am to 9am)`:`Prefer Late Night (9pm to midnight)`,names_to='time_slot',values_to='is_preferred')time_slot_ranges <-tibble(time_slot =c('Prefer Early Morning (6am to 9am)','Prefer Late Morning (9am to 12pm)','Prefer Afternoon (12pm to 3pm)','Prefer Early Evening (3pm to 6pm)','Prefer Evening (6pm to 9pm)','Prefer Late Night (9pm to midnight)'),start_time =map(c('06:00', '09:00', '12:00', '15:00', '18:00', '21:00'), convert_to_time),end_time =map(c('09:00', '12:00', '15:00', '18:00', '21:00', '24:00'), convert_to_time))df <-left_join(df, time_slot_ranges, by="time_slot") # join the time ranges# Define the order of the time slotstime_slots_order <-c('Prefer Early Morning (6am to 9am)','Prefer Late Morning (9am to 12pm)','Prefer Afternoon (12pm to 3pm)','Prefer Early Evening (3pm to 6pm)','Prefer Evening (6pm to 9pm)','Prefer Late Night (9pm to midnight)')df <- df %>%mutate(time_slot =factor(time_slot, levels = time_slots_order),is_allowed =case_when( (`Messages Start`>= start_time &`Messages Start`<= end_time) ~1, (`Messages End`>= start_time &`Messages End`<= end_time) ~1, (`Messages Start`<= start_time &`Messages End`>= end_time) ~1,TRUE~0 ))# Calculate the number of preferred and allowed responses for each time slotdf_summary <- df %>%mutate(is_preferred =as.numeric(is_preferred), is_allowed =as.numeric(is_allowed)) %>%group_by(time_slot) %>%summarise(is_preferred =sum(is_preferred, na.rm =TRUE),is_allowed =sum(is_allowed, na.rm =TRUE),.groups ="drop" ) %>%pivot_longer(cols =c(is_preferred, is_allowed),names_to ='response_type',values_to ='count')# Calculate the total count for each time slottotal_count <- df_summary %>%group_by(time_slot) %>%summarise(total =sum(count))# Join total count to summary datadf_summary <-left_join(df_summary, total_count)# Calculate percentagedf_summary <- df_summary %>%mutate(percentage = count / total *100)# Set plot sizeoptions(repr.plot.width =20, repr.plot.height =15)# Calculate the maximum count to adjust y limitsmax_count <-max(df_summary$count)# Create a mapping of time slot names to abbreviationstime_slot_abbreviations <-c('Prefer Early Morning (6am to 9am)'='6-9am','Prefer Late Morning (9am to 12pm)'='9am-12pm','Prefer Afternoon (12pm to 3pm)'='12-3pm','Prefer Early Evening (3pm to 6pm)'='3-6pm','Prefer Evening (6pm to 9pm)'='6-9pm','Prefer Late Night (9pm to midnight)'='9pm-12am')# Update the time_slot column in the df_summary dataframe with the abbreviationsdf_summary$time_slot <- time_slot_abbreviations[df_summary$time_slot]# Define the order of the levelstime_slot_levels <-c('6-9am', '9am-12pm', '12-3pm', '3-6pm', '6-9pm', '9pm-12am')# Convert the time_slot column to a factor and specify the order of the levelsdf_summary$time_slot <-factor(df_summary$time_slot, levels = time_slot_levels)# Now you can draw your bar plotggplot(df_summary, aes(x = time_slot, y = percentage, fill = response_type)) +geom_bar(stat ="identity", position ="dodge", width =0.6) +geom_text(aes(label =paste0(round(percentage, 1), "%")), position =position_dodge(width =0.6), vjust =-0.5, size =4) +scale_fill_brewer(palette ="Set2") +scale_y_continuous(limits =c(0, 100)) +# Add 10% extra space for labelstheme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1, size =12), # 45 degree angle for x-axis labelsaxis.text.y =element_text(size =12),axis.title =element_text(size =14, face ="bold"),plot.title =element_text(size =16, face ="bold"),legend.title =element_text(size =14, face ="bold"),legend.text =element_text(size =12)) +labs(x ="Time Slot", y ="Percentage", fill ="Response Type", title ="Comparison of PreferredvsAllowed Responses by TimeSlot")