source("~/R/Scripts/addChunkTitles.R")
rmd <- addChunkTitles(path = rstudioapi::getSourceEditorContext()$path)
write(rmd, file = (rstudioapi::getSourceEditorContext()$path))
paths <- c("~\\R","~\\Veginvesting","~\\Northeastern","~\\Graduate Studies Resumes,Transcripts & Applications")
all.rmd <- sapply(paths, list.files, pattern = "[A-Za-z0-9\\_\\.\\s\\-]+\\.[Rr][^d|p]?m?[^x|s|r]?d?$", all.files = T, full.names = T, recursive = T, ignore.case = T, include.dirs = T)
all.rmd <- lapply(all.rmd, function(l){
l <- l[!grepl("win-library", l)]
l <- l[!grepl("AlpacaforR", l)]
l <- l[!grepl("min.js", l)]
l <- l[!grepl("pdf", l, ignore.case = T)]
l <- l[!grepl("png", l, ignore.case = T)]
l <- l[!grepl("zip", l, ignore.case = T)]
l <- l[!grepl("rnw", l, ignore.case = T)]
l <- l[!grepl("rhistory", l, ignore.case = T)]
l <- l[!grepl("rsconnect", l, ignore.case = T)]
l <- l[!grepl("rtf", l, ignore.case = T)]
l <- l[!grepl("rxproj", l, ignore.case = T)]})
all.rmd <- all.rmd[sapply(all.rmd,length) > 0]
all.rmd <- lapply(all.rmd,gsub,pattern="\\/",replacement="\\\\\\")
rmd.attr <- purrr::map_depth(all.rmd, .depth = 2,.f = function(.x){
# Try to read the RMD
.file_text <- try({readLines(.x)})
# If the file doesnt exist return nothing
if(!HDA::go(".file_text")) return(NULL)
if (stringr::str_detect(.x, "[Dd]$")){
.Chunks <- try({.Chunks <- data.frame(begin = stringr::str_which(.file_text, "^```\\{r"), end = stringr::str_which(.file_text, "^```\\s{0,2}$"))})
if (is.null(.Chunks) | class(.Chunks) == "try-error") {
.Chunks <- list()
.Chunks$begin <- stringr::str_which(.file_text, "^```\\{r") %>% print
.Chunks$end <- stringr::str_which(.file_text, "^```\\s{0,2}$") %>% print
.Chunks$fn <- .x
return(.Chunks)
}
#Remove rows from the chunks table indicating the line number of the following repetitive chunk types
.Chunks <- .Chunks[!.Chunks$begin %in% c(stringr::str_which(.file_text, "```\\{r\\ssetup"), # setup chunk
stringr::str_which(.file_text, "```\\{r\\s\\'As"), # Assignment
stringr::str_which(.file_text, "```\\{r\\sLib"), #Libraries
stringr::str_which(.file_text, "```\\{r\\s\\'R\\sSel"), # Selenium
stringr::str_which(.file_text, "```\\{r\\s\\'R\\sChunk"), # name Chunks
stringr::str_which(.file_text, "```\\{r\\s\\'R\\sAttach")), ] # Dependencies
.total_lines <- try({.total_lines <- mutate(.Chunks, lines = end - begin) %>% .[["lines"]] %>% sum()})
if (class(.total_lines) == "try-error"){ print(paste0("File that failed: ", .x))
print(.Chunks)
}
.fns <- purrr::pmap(list(.b = .Chunks$begin, .e = .Chunks$end), .ft = .file_text, function(.b, .e, .ft){
.ch <- .file_text[.b:.e]
.fns <- stringr::str_extract_all(.ch, "[A-Za-z\\_\\`][A-Za-z0-9\\.\\_\\:]+(?=\\()") %>% unlist %>% trimws
}) %>% unlist %>% unique
} else {
.fns <- stringr::str_extract_all(.file_text, "[A-Za-z\\_\\`][A-Za-z0-9\\.\\_\\:]+(?=\\()") %>% unlist %>% trimws %>% unique
.total_lines <- length(.file_text)
}
.pkgs <- stringr::str_extract(.fns, "[[:alnum:]\\_]+(?=\\:\\:)") %>% purrr::keep(~ !is.na(.))
.fns %<>% purrr::map_chr(function(.x) {
if(stringr::str_detect(.x, "\\:{2}")) .out <- stringr::str_extract(.x, "(?<=\\:\\:)[A-Za-z0-9\\_\\.]+") else if (stringr::str_detect(.x, "\\:$")) .out <- "" else .out <- .x
return(.out)
}) %>% purrr::keep(~ nchar(.x) > 0)
.lpkgs <- stringr::str_extract(.file_text, "^(?<=library\\()[:alnum:]+") %>% purrr::keep(~ !is.na(.))
.spkgs <- stringr::str_extract(.file_text, "(?<=startPkgs\\(c?\\(?)[^\\)]+") %>% purrr::keep(~ !is.na(.)) %>% stringr::str_extract_all("(?<=\\\")[:alnum:]+") %>% unlist
out <- list(Lines = .total_lines, Fns = .fns, Pkgs = c(.pkgs, .lpkgs, .spkgs))
return(out)
})
# Function to extract functions
# purrr::map(rmd.attr, ~ purrr::keep(.x = .x, ~ length(.) > 2))
total.lines <- purrr::map_depth(rmd.attr, .depth = 2, .f = ~ purrr::pluck(.,"Lines")) %>% unlist %>% sum
all.fns <- purrr::map_depth(rmd.attr, .depth = 2, .f = ~ purrr::pluck(.,"Fns")) %>% unlist
all.pkgs <- purrr::map_depth(rmd.attr, .depth = 2, .f = ~ purrr::pluck(.,"Pkgs")) %>% unlist
# ip <- installed.packages(fields = 'Package') ip <- ip[,'Package'] %>%
# .[!grepl('^dd$',.)] %>% .[!grepl('^translations$',.)] ip.fns <- lapply(ip,
# function(p)try(getNamespaceExports(p))) fnNames <- all.fns %>% unlist %>%
# unique Fns <- fnNames[!grepl('^\\d|\\:', fnNames)] %>% unique %>%
# .[nchar(.) > 1] %>% data.frame(Function = ., stringsAsFactors = F) %>%
# mutate(Package = str_extract(Function,
# '([[:alnum:]\\.\\_]+)(?=\\:\\:)')) %>% mutate_at(vars(Function),
# funs(gsub('[[:alnum:]\\.\\_]+\\:\\:', '', .))) fn_pkgs <-
# purrr::pmap(.l = list(fn = Fns$Function, pkg = Fns$Package), ip = ip.fns,
# function(fn, pkg, ip){ if (is.na(pkg)) { out <-
# paste(names(ip[which(sapply(ip,function(i)any(grepl(paste0('^',fn,'$'),
# i))))]), collapse = ', ') } else { out <- pkg } return(out) }) Fns$Package <-
# unlist(fn_pkgs) Fns$Package %>% unique %>% .[nchar(.) > 0] %>% as.data.frame
# %>% setNames('Packages') %>% DT::datatable()
table(all.pkgs) %>% as.data.frame %>% setNames(c("Package", "# of Uses")) %>% dplyr::arrange(desc(`# of Uses`)) %>%
DT::datatable()
The (nearly) complete searchable table of all functions used to date in R.
All Functions Table
Read CSV from Plus for Trello
PlusforTrello <- read.csv(file = "~//R//Quantified Self//PlusforTrello2017-2018.csv")
names(PlusforTrello) %<>% gsub("[!@#$%^&*?.????]", "", ., perl = T) # Remove the wierd symbols in the names
PlusforTrello$Date %<>% lubridate::ymd_hm() # Make Date a date
PlusforTrello$Board %<>% gsub("DA\\s5030", "DA5030", .) #Format DA 5030 to DA5030
PlusforTrello$Board %<>% seq_along(.) %>% sapply(pft = PlusforTrello, FUN = function(i,
pft) {
x <- pft$Board[i] %>% as.character
if (pft$Date[i] < lubridate::ymd("2017-12-31") & x == "PPUA5302") {
out <- x %>% gsub("PPUA5302", "PPUA5301", .)
} else if (pft$Date[i] < lubridate::ymd("2017-12-31") & x == "DA5030") {
out <- x %>% gsub("DA5030", "DA5020", .)
} else {
out <- x
}
return(out)
}) %>% as.factor() # Change board names from prior semester back to the actual class call number
{
PfTSummary <- PlusforTrello[stringr::str_detect(PlusforTrello$Board, "(?:PPUA)|(?:DA5?)"),
] %>% group_by(Board, Week) %>% summarise(TotalHrsPerWeek = sum(S))
} %>% ggplot(data = ., mapping = aes(x = Week, y = TotalHrsPerWeek)) + geom_bar(stat = "identity",
aes(fill = Board)) + coord_flip() + theme_minimal() + labs(title = "Total hours per week",
subtitle = "Fill color by course", caption = "Note: 2nd semester showed a decrease in hours spent during finals week \n because taxes were due during this time window.",
x = "Week", y = "Hours") + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
plot.caption = element_text(hjust = 0)) + scale_fill_discrete(breaks = c("DA5030",
"PPUA5302", "DA5020", "PPUA5301"))
htmltools::tagList(htmltools::tags$p(paste("Total hours:", {
CourseHrs <- PfTSummary %>% group_by(Board) %>% summarise(TotalCourseHours = sum(TotalHrsPerWeek))
} %>% .$TotalCourseHours %>% sum)))
Total hours: 734.44
CourseHrs %>% as.data.frame %>% kableExtra::kable("html") %>% kableExtra::kable_styling(position = "center")
Board | TotalCourseHours |
---|---|
DA5020 | 204.26 |
DA5030 | 202.65 |
PPUA5301 | 146.90 |
PPUA5302 | 180.63 |
Average Hours spent per week per Course
PfTSummary %>% group_by(Board) %>% summarize(Avg = mean(TotalHrsPerWeek)) %>% kableExtra::kable("html") %>%
kableExtra::kable_styling(position = "center")
Board | Avg |
---|---|
DA5020 | 12.76625 |
DA5030 | 11.92059 |
PPUA5301 | 11.30000 |
PPUA5302 | 11.28937 |
Read CSV from Plus for Trello Fall18
PlusforTrello <- read.csv(file = "~//R//Quantified Self//PlusforTrelloFall2018.csv")
names(PlusforTrello) %<>% gsub("[!@#$%^&*?.????]", "", ., perl = T) # Remove the wierd symbols in the names
PlusforTrello$Date %<>% lubridate::ymd_hm() # Make Date a date
cols <- c(RColorBrewer::brewer.pal(12, name = "Paired")[5:6], RColorBrewer::brewer.pal(12,
name = "Paired")[c(8, 10)])
names(cols) <- PlusforTrello$Board %>% unique %>% .[c(6, 7, 3, 5)] %>% as.character()
{
PlusforTrello %>% filter(str_detect(Board, "(?:HINF)|(?:CAEP)")) %>% mutate(grp = str_extract(Board,
"^\\w{4}\\s?\\d{4}") %>% gsub("\\s", "", .)) %>% mutate_at(vars(grp), funs(as.factor)) %>%
group_by(Board, grp, Week) %>% summarise(TotalHrsPerWeek = sum(S)) %>% assign("PfTSummary",
., envir = .GlobalEnv)
} %>% ggplot(data = ., mapping = aes(x = Week, y = TotalHrsPerWeek, group = grp)) +
geom_bar(stat = "identity", aes(fill = Board), position = "dodge") + theme_minimal() +
labs(title = "Total hours per week", subtitle = "Fill color by course", caption = paste("Average Hours per Week:",
paste0(names(with(PfTSummary, tapply(TotalHrsPerWeek, INDEX = Board, FUN = mean)) %>%
.[1:4]), ": ", with(PfTSummary, tapply(TotalHrsPerWeek, INDEX = Board,
FUN = mean)) %>% .[1:4] %>% round(2), collapse = "\n"), sep = "\n"),
x = "Week", y = "Hours") + scale_fill_manual(values = cols) + coord_flip() +
scale_y_continuous(breaks = seq(0, max(PfTSummary$TotalHrsPerWeek), 2), minor_breaks = seq(0,
max(PfTSummary$TotalHrsPerWeek), 1)) + theme_grey() + theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0))
ggsave("Fall2018Hours.png", device = "png", type = "cairo", width = 12, height = 9)
htmltools::tagList(htmltools::tags$p(paste("Total hours:", {
CourseHrs <- PfTSummary %>% group_by(Board) %>% summarise(TotalCourseHours = sum(TotalHrsPerWeek))
} %>% .$TotalCourseHours %>% sum)))
Total hours: 241.33
CourseHrs %>% as.data.frame %>% arrange(desc(TotalCourseHours)) %>% kableExtra::kable("html") %>%
kableExtra::kable_styling(position = "center")
Board | TotalCourseHours |
---|---|
CAEP7712 | 79.45 |
HINF6400 Incentivized Insurance | 64.27 |
HINF 6400 | 57.73 |
HINF5512 | 39.88 |
Average Hours spent per week per Course
PfTSummary %>% group_by(Board) %>% summarize(Avg = mean(TotalHrsPerWeek)) %>% arrange(desc(Avg)) %>%
kableExtra::kable("html") %>% kableExtra::kable_styling(position = "center")
Board | Avg |
---|---|
CAEP7712 | 6.620833 |
HINF6400 Incentivized Insurance | 5.355833 |
HINF 6400 | 4.810833 |
HINF5512 | 3.988000 |
PlusforTrello <- read.csv(file = "~//R//Quantified Self//PlusforTrelloSpring2019.csv")
names(PlusforTrello) %<>% gsub("[!@#$%^&*?.????]", "", ., perl = T) # Remove the wierd symbols in the names
PlusforTrello$Date %<>% lubridate::ymd_hm() # Make Date a date
cols <- HDA::ggColor(5)
names(cols) <- PlusforTrello$Board %>% unique %>% .[c(3, 4, 5, 7, 10)] %>% as.character()
{
PlusforTrello %>% filter(str_detect(Board, "(?:HINF)|(?:Mariy)|(?:Capstone)")) %>%
mutate(grp = str_extract(Board, "^\\w{4}\\s?\\d{4}") %>% gsub("\\s", "",
.)) %>% mutate_at(vars(grp), funs(as.factor)) %>% group_by(Board, grp,
Week) %>% summarise(TotalHrsPerWeek = sum(S)) %>% assign("PfTSummary", .,
envir = .GlobalEnv)
} %>% ggplot(data = ., mapping = aes(x = Week, y = TotalHrsPerWeek, group = grp)) +
geom_bar(stat = "identity", aes(fill = Board), position = "dodge") + theme_minimal() +
labs(title = "Total hours per week", subtitle = "Fill color by course", caption = paste("Average Hours per Week:",
paste0(names(with(PfTSummary, tapply(TotalHrsPerWeek, INDEX = Board, FUN = mean)) %>%
.[!is.na(.)]), ": ", with(PfTSummary, tapply(TotalHrsPerWeek, INDEX = Board,
FUN = mean)) %>% .[!is.na(.)] %>% .[1:5] %>% round(2), collapse = "\n"),
sep = "\n"), x = "Week", y = "Hours") + scale_fill_manual(values = cols) +
coord_flip() + scale_y_continuous(breaks = seq(0, max(PfTSummary$TotalHrsPerWeek),
2), minor_breaks = seq(0, max(PfTSummary$TotalHrsPerWeek), 1)) + theme_grey() +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5),
plot.caption = element_text(hjust = 0))
ggsave("Spring2019Hours.png", device = "png", type = "cairo", width = 12, height = 9)
htmltools::tagList(htmltools::tags$p(paste("Total hours:", {
CourseHrs <- PfTSummary %>% group_by(Board) %>% summarise(TotalCourseHours = sum(TotalHrsPerWeek))
} %>% .$TotalCourseHours %>% sum)))
Total hours: 148.24
CourseHrs %>% as.data.frame %>% arrange(desc(TotalCourseHours)) %>% kableExtra::kable("html") %>%
kableExtra::kable_styling(position = "center")
Board | TotalCourseHours |
---|---|
HINF6500 Predictive Analytics | 48.52 |
HINF 5105 American Healthcare | 37.35 |
Capstone | 35.04 |
Mariyashiyko.love | 17.22 |
HINF5105 Group Project | 10.11 |
Average Hours spent per week per Course
PfTSummary %>% group_by(Board) %>% summarize(Avg = mean(TotalHrsPerWeek)) %>% arrange(desc(Avg)) %>%
kableExtra::kable("html") %>% kableExtra::kable_styling(position = "center")
Board | Avg |
---|---|
Capstone | 7.008000 |
HINF6500 Predictive Analytics | 6.931429 |
Mariyashiyko.love | 5.740000 |
HINF 5105 American Healthcare | 3.395455 |
HINF5105 Group Project | 3.370000 |