Searching for a Job

Meng

06.09.2024

#read gmail
mbox <- tolower(readLines(paste0(path,"leider.mbox"), warn = FALSE))
# Function to extract information from .mbox file
extract_email_data <- function(mbox_content) {
  
  # Extract lines containing the 4 fields
  relevant_lines <- mbox_content[grep("^(date:|subject:|from:|content-type:)", mbox_content)]
  
  # Extract date & time field
  dates_lines <- relevant_lines[grep("^date:", relevant_lines)]
  # Date extraction ( dd - month name - year - HH:mm:ss )
  dates <- str_extract(dates_lines, "\\d{1,2} [a-z]+ \\d{4} \\d{2}:\\d{2}:\\d{2}")
  # Format string in date/time format
  dates <- dmy_hms(dates)
  
  # Create a data frame
  email_df <- data.frame(Date_time = dates, stringsAsFactors = FALSE)
  
  return(email_df)
}

emails <- extract_email_data(mbox)

Overview

links <- data.frame(
  source= c(dt[,case], dt[,position_full],rep("invitation",5),"1.R. invitation"),
  target= c(dt[,position_full], dt[,fb],c("1.R. no response","1.R. no response","1.R. rejection","1.R. rejection","1.R. invitation","2.R. rejection")),
  value= c(rep(1, 2*nrow(dt)),rep(1,6))
  )
 
# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(
  name=c(as.character(links$source), 
  as.character(links$target)) %>% unique()
)
 
# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
links$IDsource <- match(links$source, nodes$name)-1 
links$IDtarget <- match(links$target, nodes$name)-1
 
# Make the Network
sankeyNetwork(Links = links, Nodes = nodes,
              Source = "IDsource", Target = "IDtarget",
              Value = "value", NodeID = "name", 
              nodeWidth = 10,fontSize=15,height=1000,width=700,
              sinksRight=FALSE)

Position

position_data <- na.omit(dt[,.(n=.N),by=position_full])
highchart() %>%
  hc_chart(type = "pie") %>%
  hc_title(text = "") %>%
  hc_add_series(
    name = "",
    data = list_parse2(position_data)
  ) %>%
  hc_tooltip(pointFormat = '<b>{point.percentage:.1f}%</b> (<b>{point.y}</b> occurrences)')%>%
  hc_add_theme(hc_theme_538())

Time

duration

duration_data <- dt[,.(duration,fb)]
duration_data[,duration:= cut(
  duration,
  breaks = c(-Inf, 3, 7, 14, 30, Inf),
  labels = c("<3 days", "3-7 days", "7-14 days", "14-30 days", ">30 days"),
  right = FALSE
)]

duration_data[,duration:=factor(duration,levels = c("<3 days", "3-7 days", "7-14 days", "14-30 days", ">30 days"))]
duration_data[,fb:=factor(fb,levels = c("rejection", "no response", "invitation"))]
duration_data <- na.omit(duration_data[,.(n = .N),by=.(duration,fb)])
duration_data <-duration_data[order(duration)]

duration_data %>%
  hchart('column', hcaes(x = duration, y = n, group= fb))%>%
  hc_title(text = "How long have I waited for a feedback?") %>%
  hc_yAxis(title = list(text = "Frequency")) %>%
  hc_tooltip(pointFormat = '<b>{point.y}</b> occurrences for {point.duration} days')%>%
  hc_add_theme(hc_theme_538())

time of day

hours_data <- data.table(hour(emails$Date_time))
hours_data <- hours_data[,.(n=.N), by= V1]
hours_data %>%
  hchart('column',hcaes(x=V1, y= n))%>%
  hc_xAxis(title = list(text = "Hour of Day"), categories = 0:23) %>%
  hc_yAxis(title = list(text = "Count")) %>%
  hc_title(text = "At what time did I receive a 'Leider' or a 'Sorry'?") %>%
  hc_tooltip(pointFormat = '<b>{point.y}</b> records at {point.x}:00')%>%
  hc_add_theme(hc_theme_538())