source("~/R/Scripts/addChunkTitles.R")
rmd <- addChunkTitles(path = rstudioapi::getSourceEditorContext()$path)
write(rmd, file = (rstudioapi::getSourceEditorContext()$path))
paths <- c("~\\R","~\\Veginvesting","~\\Northeastern","~\\Graduate Studies Resumes,Transcripts & Applications")
all.rmd <- sapply(paths, list.files, pattern = "[A-Za-z0-9\\_\\.\\s\\-]+\\.[Rr][^d|p]?m?[^x|s|r]?d?$", all.files = T, full.names = T, recursive = T, ignore.case = T, include.dirs = T)
all.rmd <- lapply(all.rmd, function(l){
  l <- l[!grepl("win-library", l)]
  l <- l[!grepl("AlpacaforR", l)]
  l <- l[!grepl("min.js", l)]
  l <- l[!grepl("pdf", l, ignore.case = T)]
  l <- l[!grepl("png", l, ignore.case = T)]
  l <- l[!grepl("zip", l, ignore.case = T)]
  l <- l[!grepl("rnw", l, ignore.case = T)]
  l <- l[!grepl("rhistory", l, ignore.case = T)]
  l <- l[!grepl("rsconnect", l, ignore.case = T)]
  l <- l[!grepl("rtf", l, ignore.case = T)]
  l <- l[!grepl("rxproj", l, ignore.case = T)]})
all.rmd <- all.rmd[sapply(all.rmd,length) > 0]
all.rmd <- lapply(all.rmd,gsub,pattern="\\/",replacement="\\\\\\")
rmd.attr <- purrr::map_depth(all.rmd, .depth = 2,.f = function(.x){
  # Try to read the RMD
  .file_text <- try({readLines(.x)})
  # If the file doesnt exist return nothing
if(!HDA::go(".file_text")) return(NULL)
  if (stringr::str_detect(.x, "[Dd]$")){
    .Chunks <- try({.Chunks <- data.frame(begin = stringr::str_which(.file_text, "^```\\{r"), end = stringr::str_which(.file_text, "^```\\s{0,2}$"))})
    if (is.null(.Chunks) | class(.Chunks) == "try-error") {
      .Chunks <- list()
      .Chunks$begin <- stringr::str_which(.file_text, "^```\\{r") %>% print
      .Chunks$end <- stringr::str_which(.file_text, "^```\\s{0,2}$") %>% print
      .Chunks$fn <- .x
      return(.Chunks)
    }


#Remove rows from the chunks table indicating the line number of the following repetitive chunk types
.Chunks <- .Chunks[!.Chunks$begin %in% c(stringr::str_which(.file_text, "```\\{r\\ssetup"), # setup chunk
stringr::str_which(.file_text, "```\\{r\\s\\'As"), # Assignment 
stringr::str_which(.file_text, "```\\{r\\sLib"), #Libraries
stringr::str_which(.file_text, "```\\{r\\s\\'R\\sSel"), # Selenium
stringr::str_which(.file_text, "```\\{r\\s\\'R\\sChunk"), # name Chunks
stringr::str_which(.file_text, "```\\{r\\s\\'R\\sAttach")), ] # Dependencies

.total_lines <- try({.total_lines <- mutate(.Chunks, lines = end - begin) %>% .[["lines"]] %>% sum()})
if (class(.total_lines) == "try-error"){ print(paste0("File that failed: ", .x))
print(.Chunks)
  }
.fns <- purrr::pmap(list(.b = .Chunks$begin, .e = .Chunks$end), .ft = .file_text, function(.b, .e, .ft){
  .ch <- .file_text[.b:.e]
  .fns <- stringr::str_extract_all(.ch, "[A-Za-z\\_\\`][A-Za-z0-9\\.\\_\\:]+(?=\\()") %>%  unlist %>% trimws
}) %>% unlist %>% unique
  } else {
  .fns <- stringr::str_extract_all(.file_text, "[A-Za-z\\_\\`][A-Za-z0-9\\.\\_\\:]+(?=\\()") %>% unlist %>% trimws %>% unique
  
  .total_lines <- length(.file_text)
  }
  .pkgs <- stringr::str_extract(.fns, "[[:alnum:]\\_]+(?=\\:\\:)") %>% purrr::keep(~ !is.na(.))
  .fns %<>%   purrr::map_chr(function(.x) {
    if(stringr::str_detect(.x, "\\:{2}")) .out <- stringr::str_extract(.x, "(?<=\\:\\:)[A-Za-z0-9\\_\\.]+") else if (stringr::str_detect(.x, "\\:$")) .out <- "" else .out <- .x
    return(.out)
    }) %>% purrr::keep(~ nchar(.x) > 0)
  .lpkgs <- stringr::str_extract(.file_text, "^(?<=library\\()[:alnum:]+") %>% purrr::keep(~ !is.na(.))
  .spkgs <- stringr::str_extract(.file_text, "(?<=startPkgs\\(c?\\(?)[^\\)]+") %>% purrr::keep(~ !is.na(.)) %>% stringr::str_extract_all("(?<=\\\")[:alnum:]+") %>% unlist
  
  out <- list(Lines = .total_lines, Fns = .fns, Pkgs = c(.pkgs, .lpkgs, .spkgs))
  return(out)
})
# Function to extract functions
# purrr::map(rmd.attr, ~ purrr::keep(.x = .x, ~ length(.) > 2))

 
total.lines <- purrr::map_depth(rmd.attr, .depth = 2, .f = ~ purrr::pluck(.,"Lines")) %>% unlist %>% sum

all.fns <- purrr::map_depth(rmd.attr, .depth = 2, .f = ~ purrr::pluck(.,"Fns")) %>% unlist
all.pkgs <- purrr::map_depth(rmd.attr, .depth = 2, .f = ~ purrr::pluck(.,"Pkgs")) %>% unlist 

Lines of Code

Total lines of R code written:
40983

Breadth of R Experience

Table of packages used

# ip <- installed.packages(fields = 'Package') ip <- ip[,'Package'] %>%
# .[!grepl('^dd$',.)] %>% .[!grepl('^translations$',.)] ip.fns <- lapply(ip,
# function(p)try(getNamespaceExports(p))) fnNames <- all.fns %>% unlist %>%
# unique Fns <- fnNames[!grepl('^\\d|\\:', fnNames)] %>% unique %>%
# .[nchar(.) > 1] %>% data.frame(Function = ., stringsAsFactors = F) %>%
# mutate(Package = str_extract(Function,
# '([[:alnum:]\\.\\_]+)(?=\\:\\:)')) %>% mutate_at(vars(Function),
# funs(gsub('[[:alnum:]\\.\\_]+\\:\\:', '', .))) fn_pkgs <-
# purrr::pmap(.l = list(fn = Fns$Function, pkg = Fns$Package), ip = ip.fns,
# function(fn, pkg, ip){ if (is.na(pkg)) { out <-
# paste(names(ip[which(sapply(ip,function(i)any(grepl(paste0('^',fn,'$'),
# i))))]), collapse = ', ') } else { out <- pkg } return(out) }) Fns$Package <-
# unlist(fn_pkgs) Fns$Package %>% unique %>% .[nchar(.) > 0] %>% as.data.frame
# %>% setNames('Packages') %>% DT::datatable()
table(all.pkgs) %>% as.data.frame %>% setNames(c("Package", "# of Uses")) %>% dplyr::arrange(desc(`# of Uses`)) %>% 
    DT::datatable()

Table of all functions used to date

The (nearly) complete searchable table of all functions used to date in R.
All Functions Table

table(all.fns) %>% as.data.frame %>% setNames(c("Function", "# of Uses")) %>% dplyr::arrange(desc(`# of Uses`)) %>% 
    DT::datatable()

Total Coursework Hours

2017-2018 Academic Year

Read CSV from Plus for Trello

PlusforTrello <- read.csv(file = "~//R//Quantified Self//PlusforTrello2017-2018.csv")
names(PlusforTrello) %<>% gsub("[!@#$%^&*?.????]", "", ., perl = T)  # Remove the wierd symbols in the names
PlusforTrello$Date %<>% lubridate::ymd_hm()  # Make Date a date
PlusforTrello$Board %<>% gsub("DA\\s5030", "DA5030", .)  #Format DA 5030 to DA5030
PlusforTrello$Board %<>% seq_along(.) %>% sapply(pft = PlusforTrello, FUN = function(i, 
    pft) {
    x <- pft$Board[i] %>% as.character
    if (pft$Date[i] < lubridate::ymd("2017-12-31") & x == "PPUA5302") {
        out <- x %>% gsub("PPUA5302", "PPUA5301", .)
    } else if (pft$Date[i] < lubridate::ymd("2017-12-31") & x == "DA5030") {
        out <- x %>% gsub("DA5030", "DA5020", .)
    } else {
        out <- x
    }
    return(out)
}) %>% as.factor()  # Change board names from prior semester back to the actual class call number
{
    PfTSummary <- PlusforTrello[stringr::str_detect(PlusforTrello$Board, "(?:PPUA)|(?:DA5?)"), 
        ] %>% group_by(Board, Week) %>% summarise(TotalHrsPerWeek = sum(S))
} %>% ggplot(data = ., mapping = aes(x = Week, y = TotalHrsPerWeek)) + geom_bar(stat = "identity", 
    aes(fill = Board)) + coord_flip() + theme_minimal() + labs(title = "Total hours per week", 
    subtitle = "Fill color by course", caption = "Note: 2nd semester showed a decrease in hours spent during finals week \n because taxes were due during this time window.", 
    x = "Week", y = "Hours") + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), 
    plot.caption = element_text(hjust = 0)) + scale_fill_discrete(breaks = c("DA5030", 
    "PPUA5302", "DA5020", "PPUA5301"))

htmltools::tagList(htmltools::tags$p(paste("Total hours:", {
    CourseHrs <- PfTSummary %>% group_by(Board) %>% summarise(TotalCourseHours = sum(TotalHrsPerWeek))
} %>% .$TotalCourseHours %>% sum)))

Total hours: 734.44

htmltools::tagList(htmltools::tags$h3("Total hours per course"))

Total hours per course

CourseHrs %>% as.data.frame %>% kableExtra::kable("html") %>% kableExtra::kable_styling(position = "center")
Board TotalCourseHours
DA5020 204.26
DA5030 202.65
PPUA5301 146.90
PPUA5302 180.63
htmltools::tags$p("Average Hours spent per week per Course")

Average Hours spent per week per Course

PfTSummary %>% group_by(Board) %>% summarize(Avg = mean(TotalHrsPerWeek)) %>% kableExtra::kable("html") %>% 
    kableExtra::kable_styling(position = "center")
Board Avg
DA5020 12.76625
DA5030 11.92059
PPUA5301 11.30000
PPUA5302 11.28937

2018-2019 Academic Year

Fall 2018

Read CSV from Plus for Trello Fall18

PlusforTrello <- read.csv(file = "~//R//Quantified Self//PlusforTrelloFall2018.csv")
names(PlusforTrello) %<>% gsub("[!@#$%^&*?.????]", "", ., perl = T)  # Remove the wierd symbols in the names
PlusforTrello$Date %<>% lubridate::ymd_hm()  # Make Date a date
cols <- c(RColorBrewer::brewer.pal(12, name = "Paired")[5:6], RColorBrewer::brewer.pal(12, 
    name = "Paired")[c(8, 10)])
names(cols) <- PlusforTrello$Board %>% unique %>% .[c(6, 7, 3, 5)] %>% as.character()
{
    PlusforTrello %>% filter(str_detect(Board, "(?:HINF)|(?:CAEP)")) %>% mutate(grp = str_extract(Board, 
        "^\\w{4}\\s?\\d{4}") %>% gsub("\\s", "", .)) %>% mutate_at(vars(grp), funs(as.factor)) %>% 
        group_by(Board, grp, Week) %>% summarise(TotalHrsPerWeek = sum(S)) %>% assign("PfTSummary", 
        ., envir = .GlobalEnv)
} %>% ggplot(data = ., mapping = aes(x = Week, y = TotalHrsPerWeek, group = grp)) + 
    geom_bar(stat = "identity", aes(fill = Board), position = "dodge") + theme_minimal() + 
    labs(title = "Total hours per week", subtitle = "Fill color by course", caption = paste("Average Hours per Week:", 
        paste0(names(with(PfTSummary, tapply(TotalHrsPerWeek, INDEX = Board, FUN = mean)) %>% 
            .[1:4]), ": ", with(PfTSummary, tapply(TotalHrsPerWeek, INDEX = Board, 
            FUN = mean)) %>% .[1:4] %>% round(2), collapse = "\n"), sep = "\n"), 
        x = "Week", y = "Hours") + scale_fill_manual(values = cols) + coord_flip() + 
    scale_y_continuous(breaks = seq(0, max(PfTSummary$TotalHrsPerWeek), 2), minor_breaks = seq(0, 
        max(PfTSummary$TotalHrsPerWeek), 1)) + theme_grey() + theme(plot.title = element_text(hjust = 0.5), 
    plot.subtitle = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0))

ggsave("Fall2018Hours.png", device = "png", type = "cairo", width = 12, height = 9)

htmltools::tagList(htmltools::tags$p(paste("Total hours:", {
    CourseHrs <- PfTSummary %>% group_by(Board) %>% summarise(TotalCourseHours = sum(TotalHrsPerWeek))
} %>% .$TotalCourseHours %>% sum)))

Total hours: 241.33

htmltools::tagList(htmltools::tags$h3("Total hours per course"))

Total hours per course

CourseHrs %>% as.data.frame %>% arrange(desc(TotalCourseHours)) %>% kableExtra::kable("html") %>% 
    kableExtra::kable_styling(position = "center")
Board TotalCourseHours
CAEP7712 79.45
HINF6400 Incentivized Insurance 64.27
HINF 6400 57.73
HINF5512 39.88
htmltools::tags$p("Average Hours spent per week per Course")

Average Hours spent per week per Course

PfTSummary %>% group_by(Board) %>% summarize(Avg = mean(TotalHrsPerWeek)) %>% arrange(desc(Avg)) %>% 
    kableExtra::kable("html") %>% kableExtra::kable_styling(position = "center")
Board Avg
CAEP7712 6.620833
HINF6400 Incentivized Insurance 5.355833
HINF 6400 4.810833
HINF5512 3.988000

Spring 2019

PlusforTrello <- read.csv(file = "~//R//Quantified Self//PlusforTrelloSpring2019.csv")
names(PlusforTrello) %<>% gsub("[!@#$%^&*?.????]", "", ., perl = T)  # Remove the wierd symbols in the names
PlusforTrello$Date %<>% lubridate::ymd_hm()  # Make Date a date
cols <- HDA::ggColor(5)
names(cols) <- PlusforTrello$Board %>% unique %>% .[c(3, 4, 5, 7, 10)] %>% as.character()
{
    PlusforTrello %>% filter(str_detect(Board, "(?:HINF)|(?:Mariy)|(?:Capstone)")) %>% 
        mutate(grp = str_extract(Board, "^\\w{4}\\s?\\d{4}") %>% gsub("\\s", "", 
            .)) %>% mutate_at(vars(grp), funs(as.factor)) %>% group_by(Board, grp, 
        Week) %>% summarise(TotalHrsPerWeek = sum(S)) %>% assign("PfTSummary", ., 
        envir = .GlobalEnv)
} %>% ggplot(data = ., mapping = aes(x = Week, y = TotalHrsPerWeek, group = grp)) + 
    geom_bar(stat = "identity", aes(fill = Board), position = "dodge") + theme_minimal() + 
    labs(title = "Total hours per week", subtitle = "Fill color by course", caption = paste("Average Hours per Week:", 
        paste0(names(with(PfTSummary, tapply(TotalHrsPerWeek, INDEX = Board, FUN = mean)) %>% 
            .[!is.na(.)]), ": ", with(PfTSummary, tapply(TotalHrsPerWeek, INDEX = Board, 
            FUN = mean)) %>% .[!is.na(.)] %>% .[1:5] %>% round(2), collapse = "\n"), 
        sep = "\n"), x = "Week", y = "Hours") + scale_fill_manual(values = cols) + 
    coord_flip() + scale_y_continuous(breaks = seq(0, max(PfTSummary$TotalHrsPerWeek), 
    2), minor_breaks = seq(0, max(PfTSummary$TotalHrsPerWeek), 1)) + theme_grey() + 
    theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), 
        plot.caption = element_text(hjust = 0))

ggsave("Spring2019Hours.png", device = "png", type = "cairo", width = 12, height = 9)

htmltools::tagList(htmltools::tags$p(paste("Total hours:", {
    CourseHrs <- PfTSummary %>% group_by(Board) %>% summarise(TotalCourseHours = sum(TotalHrsPerWeek))
} %>% .$TotalCourseHours %>% sum)))

Total hours: 148.24

htmltools::tagList(htmltools::tags$h3("Total hours per course"))

Total hours per course

CourseHrs %>% as.data.frame %>% arrange(desc(TotalCourseHours)) %>% kableExtra::kable("html") %>% 
    kableExtra::kable_styling(position = "center")
Board TotalCourseHours
HINF6500 Predictive Analytics 48.52
HINF 5105 American Healthcare 37.35
Capstone 35.04
Mariyashiyko.love 17.22
HINF5105 Group Project 10.11
htmltools::tags$p("Average Hours spent per week per Course")

Average Hours spent per week per Course

PfTSummary %>% group_by(Board) %>% summarize(Avg = mean(TotalHrsPerWeek)) %>% arrange(desc(Avg)) %>% 
    kableExtra::kable("html") %>% kableExtra::kable_styling(position = "center")
Board Avg
Capstone 7.008000
HINF6500 Predictive Analytics 6.931429
Mariyashiyko.love 5.740000
HINF 5105 American Healthcare 3.395455
HINF5105 Group Project 3.370000