#read gmail
mbox <- tolower(readLines(paste0(path,"leider.mbox"), warn = FALSE))
# Function to extract information from .mbox file
extract_email_data <- function(mbox_content) {
# Extract lines containing the 4 fields
relevant_lines <- mbox_content[grep("^(date:|subject:|from:|content-type:)", mbox_content)]
# Extract date & time field
dates_lines <- relevant_lines[grep("^date:", relevant_lines)]
# Date extraction ( dd - month name - year - HH:mm:ss )
dates <- str_extract(dates_lines, "\\d{1,2} [a-z]+ \\d{4} \\d{2}:\\d{2}:\\d{2}")
# Format string in date/time format
dates <- dmy_hms(dates)
# Create a data frame
email_df <- data.frame(Date_time = dates, stringsAsFactors = FALSE)
return(email_df)
}
emails <- extract_email_data(mbox)
Overview
links <- data.frame(
source= c(dt[,case], dt[,position_full],rep("invitation",5),"1.R. invitation"),
target= c(dt[,position_full], dt[,fb],c("1.R. no response","1.R. no response","1.R. rejection","1.R. rejection","1.R. invitation","2.R. rejection")),
value= c(rep(1, 2*nrow(dt)),rep(1,6))
)
# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(
name=c(as.character(links$source),
as.character(links$target)) %>% unique()
)
# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
links$IDsource <- match(links$source, nodes$name)-1
links$IDtarget <- match(links$target, nodes$name)-1
# Make the Network
sankeyNetwork(Links = links, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name",
nodeWidth = 10,fontSize=15,height=1000,width=700,
sinksRight=FALSE)
Position
position_data <- na.omit(dt[,.(n=.N),by=position_full])
highchart() %>%
hc_chart(type = "pie") %>%
hc_title(text = "") %>%
hc_add_series(
name = "",
data = list_parse2(position_data)
) %>%
hc_tooltip(pointFormat = '<b>{point.percentage:.1f}%</b> (<b>{point.y}</b> occurrences)')%>%
hc_add_theme(hc_theme_538())
Time
duration
duration_data <- dt[,.(duration,fb)]
duration_data[,duration:= cut(
duration,
breaks = c(-Inf, 3, 7, 14, 30, Inf),
labels = c("<3 days", "3-7 days", "7-14 days", "14-30 days", ">30 days"),
right = FALSE
)]
duration_data[,duration:=factor(duration,levels = c("<3 days", "3-7 days", "7-14 days", "14-30 days", ">30 days"))]
duration_data[,fb:=factor(fb,levels = c("rejection", "no response", "invitation"))]
duration_data <- na.omit(duration_data[,.(n = .N),by=.(duration,fb)])
duration_data <-duration_data[order(duration)]
duration_data %>%
hchart('column', hcaes(x = duration, y = n, group= fb))%>%
hc_title(text = "How long have I waited for a feedback?") %>%
hc_yAxis(title = list(text = "Frequency")) %>%
hc_tooltip(pointFormat = '<b>{point.y}</b> occurrences for {point.duration} days')%>%
hc_add_theme(hc_theme_538())
time of day
hours_data <- data.table(hour(emails$Date_time))
hours_data <- hours_data[,.(n=.N), by= V1]
hours_data %>%
hchart('column',hcaes(x=V1, y= n))%>%
hc_xAxis(title = list(text = "Hour of Day"), categories = 0:23) %>%
hc_yAxis(title = list(text = "Count")) %>%
hc_title(text = "At what time did I receive a 'Leider' or a 'Sorry'?") %>%
hc_tooltip(pointFormat = '<b>{point.y}</b> records at {point.x}:00')%>%
hc_add_theme(hc_theme_538())