rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(visNetwork)
require(reactable)
require(data.table)
require(stringi)
require(igraph)
require(yaml)
require(jsonlite)
require(gt)
Load config file.
tool <- yaml::read_yaml("../tools.yml")
conf <- yaml::read_yaml("../conf/kaiaulu.yml")
save_path_issue_or_pr_comments <- conf[["issue_tracker"]][["github"]][["project_key_1"]][["issue_or_pr_comment"]]
Since Jira issue creators and the issue description, and the comment creator and the comment body can be understood as e-mail relies, we can format the issue data into the same reply format as mailing lists:
all_issue_or_pr_comments <- lapply(list.files(save_path_issue_or_pr_comments,
full.names = TRUE),read_json)
all_issue_or_pr_comments <- lapply(all_issue_or_pr_comments,
github_parse_project_issue_or_pr_comments)
all_issue_or_pr_comments <- rbindlist(all_issue_or_pr_comments,fill=TRUE)
project_github <- all_issue_or_pr_comments
head(all_issue_or_pr_comments,2) %>%
gt(auto_align = FALSE)
| comment_id | html_url | issue_url | created_at | updated_at | comment_user_login | author_association | body |
|---|---|---|---|---|---|---|---|
| 615692483 | https://github.com/sailuh/kaiaulu/issues/2#issuecomment-615692483 | https://api.github.com/repos/sailuh/kaiaulu/issues/2 | 2020-04-18T07:42:11Z | 2020-04-18T08:48:14Z | carlosparadis | MEMBER | Quick search for Ctags on Codeface to see where it is used, in hoping to find how functions can be extracted efficiently across an entire git log: https://github.com/siemens/codeface/search?q=ctags&unscoped_q=ctags |
| 615702209 | https://github.com/sailuh/kaiaulu/issues/2#issuecomment-615702209 | https://api.github.com/repos/sailuh/kaiaulu/issues/2 | 2020-04-18T07:49:34Z | 2020-04-18T07:49:34Z | carlosparadis | MEMBER | Code logic on how Codeface parse Exuberant Ctags to identify functions on source code using the `python ctags` package linked above: https://github.com/siemens/codeface/blob/e6640c931f76e82719982318a5cd6facf1f3df48/codeface/VCS.py#L1370-L1421 The limitation of Java and C# seems to be a consequence of how the tags are written |
#Identity matching
project_log <- list(project_github=project_github)
project_log <- identity_match(project_log,
name_column = c("comment_user_login"),
assign_identity_function = assign_exact_identity,
use_name_only=TRUE,
label = "raw_name")
project_github <- project_log[["project_github"]]
# Parse timestamps and convert to UTC 2020-04-18T07:42:11Z
project_github$reply_datetimetz <- as.POSIXct(project_github$created_at,
format = "%Y-%m-%dT%H:%M:%SZ", tz = "UTC")
project_github$reply_from <- project_github$comment_user_login
issue_split_urls <- stringi::stri_split(project_github$issue_url,regex = "/")
issue_id_index_in_url <- length(issue_split_urls[[1]])
project_github$reply_subject <- as.numeric(sapply(issue_split_urls,"[[",issue_id_index_in_url))
project_github_network <- transform_reply_to_bipartite_network(project_github)
project_github_network[["nodes"]][type == FALSE]$color <- '#3895D3'
project_github_network <- igraph::graph_from_data_frame(d=project_github_network[["edgelist"]],
directed = TRUE,
vertices = project_github_network[["nodes"]])
visIgraph(project_github_network,randomSeed = 1)