rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(visNetwork)
require(reactable)
require(data.table)
require(stringi)
require(igraph)
require(yaml)
require(jsonlite)
require(gt)

Load config file.

tool <- yaml::read_yaml("../tools.yml")
conf <- yaml::read_yaml("../conf/kaiaulu.yml")

save_path_issue_or_pr_comments <- conf[["issue_tracker"]][["github"]][["project_key_1"]][["issue_or_pr_comment"]]

Since Jira issue creators and the issue description, and the comment creator and the comment body can be understood as e-mail relies, we can format the issue data into the same reply format as mailing lists:

all_issue_or_pr_comments <- lapply(list.files(save_path_issue_or_pr_comments,
                                     full.names = TRUE),read_json)
all_issue_or_pr_comments <- lapply(all_issue_or_pr_comments,
                                   github_parse_project_issue_or_pr_comments)
all_issue_or_pr_comments <- rbindlist(all_issue_or_pr_comments,fill=TRUE)

project_github <- all_issue_or_pr_comments

head(all_issue_or_pr_comments,2)  %>%
  gt(auto_align = FALSE) 
comment_id html_url issue_url created_at updated_at comment_user_login author_association body
615692483 https://github.com/sailuh/kaiaulu/issues/2#issuecomment-615692483 https://api.github.com/repos/sailuh/kaiaulu/issues/2 2020-04-18T07:42:11Z 2020-04-18T08:48:14Z carlosparadis MEMBER Quick search for Ctags on Codeface to see where it is used, in hoping to find how functions can be extracted efficiently across an entire git log: https://github.com/siemens/codeface/search?q=ctags&unscoped_q=ctags
615702209 https://github.com/sailuh/kaiaulu/issues/2#issuecomment-615702209 https://api.github.com/repos/sailuh/kaiaulu/issues/2 2020-04-18T07:49:34Z 2020-04-18T07:49:34Z carlosparadis MEMBER Code logic on how Codeface parse Exuberant Ctags to identify functions on source code using the `python ctags` package linked above: https://github.com/siemens/codeface/blob/e6640c931f76e82719982318a5cd6facf1f3df48/codeface/VCS.py#L1370-L1421 The limitation of Java and C# seems to be a consequence of how the tags are written

1 Identity Matching

#Identity matching
project_log <- list(project_github=project_github)
project_log <- identity_match(project_log,
                                name_column = c("comment_user_login"),
                                assign_identity_function = assign_exact_identity,
                                use_name_only=TRUE,
                                label = "raw_name")
project_github <- project_log[["project_github"]]

2 Datetime Parsing

# Parse timestamps and convert to UTC 2020-04-18T07:42:11Z  

project_github$reply_datetimetz <- as.POSIXct(project_github$created_at,
                                        format = "%Y-%m-%dT%H:%M:%SZ", tz = "UTC")

project_github$reply_from <- project_github$comment_user_login


issue_split_urls <- stringi::stri_split(project_github$issue_url,regex = "/")
issue_id_index_in_url <- length(issue_split_urls[[1]])
project_github$reply_subject <- as.numeric(sapply(issue_split_urls,"[[",issue_id_index_in_url))

3 Construct GitHub Communication Network

project_github_network <- transform_reply_to_bipartite_network(project_github)

project_github_network[["nodes"]][type == FALSE]$color <- '#3895D3'

project_github_network <- igraph::graph_from_data_frame(d=project_github_network[["edgelist"]], 
                      directed = TRUE, 
                      vertices = project_github_network[["nodes"]])

visIgraph(project_github_network,randomSeed = 1)