rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(visNetwork)
require(data.table)
require(igraph)
require(yaml)
require(stringi)
require(knitr)
This notebook demonstrates the first end-to-end pipeline from raw mailing list data to DV8 DR Space clustering applied to an author communication network in Kaiaulu. Prior use of DV8 in Kaiaulu focused on source code dependency structure. Here, the same clustering infrastructure is applied to a social network of developers derived from mbox data.
DR Space (Dependency and Ratio Space) is a hierarchical clustering algorithm implemented in DV8. It groups nodes based on the density and balance of their connections. In a code dependency context, this identifies architectural modules. Applied to an author communication network, it identifies cohesive communication clusters among developers.
The pipeline proceeds as follows:
tool <- parse_config("../tools.yml")
conf <- parse_config("../conf/kaiaulu.yml")
dv8_path <- get_tool_project("dv8", tool)
github_issue_path <- get_github_issue_path(conf, "project_key_1")
github_pull_request_path <- get_github_pull_request_path(conf, "project_key_1")
github_reply_path <- get_github_issue_or_pr_comment_path(conf, "project_key_1")
github_commit_path <- get_github_commit_path(conf, "project_key_1")
github_pr_comments_path <- get_github_pr_comments_path(conf, "project_key_1")
# DV8 parameters
project_path <- get_dv8_folder_path(conf)
project_name <- stringi::stri_split_regex(project_path, pattern = "/")[[1]]
project_name <- project_name[length(project_name)]
Parse all GitHub issues, pull requests, and comments into a single reply table.
project_github_replies <- parse_github_replies(
issues_json_folder_path = github_issue_path,
pull_requests_json_folder_path = github_pull_request_path,
comments_json_folder_path = github_reply_path,
commit_json_folder_path = github_commit_path,
pr_comments_json_folder_path = github_pr_comments_path
)
nrow(project_github_replies)
## [1] 1648
kable(head(project_github_replies[, .(reply_from, reply_subject)]))
| reply_from | reply_subject |
|---|---|
| BenjyNStrauss | 306 |
| BenjyNStrauss | 306 |
| BenjyNStrauss | 306 |
| BenjyNStrauss | 306 |
| BenjyNStrauss | 306 |
| CorneJB | 128 |
Export the author-author graph as a DSM JSON file and convert it to the DV8 binary format required for clustering.
dir.create(project_path, showWarnings = FALSE, recursive = TRUE)
adsmj_path <- file.path(project_path, paste0(project_name, "-author-hdsm.json"))
graph_to_dsmj(author_graph, dsmj_path = adsmj_path, dsmj_name = paste0(project_name, "-author-hdsm"))
## [1] "../../analysis/junit/dv8//-author-hdsm.json"
adsmb_path <- file.path(project_path, paste0(project_name, "-author-hdsm.dv8-dsm"))
dv8_dsmj_to_dsmb(dv8_path = dv8_path, dsmj_path = adsmj_path, dsmb_path = adsmb_path)
## [1] "../../analysis/junit/dv8//-author-hdsm.dv8-dsm"
Run DV8’s DR Space hierarchical clustering on the author-author DSM binary.
clsx_path <- file.path(project_path, paste0(project_name, "-author-clsx.dv8-clsx"))
dv8_mdsmb_to_hierclsxb(dv8_path = dv8_path, mdsmb_path = adsmb_path, hierclsxb_path = clsx_path,
recursive = TRUE)
## [1] "../../analysis/junit/dv8//-author-clsx.dv8-clsx"
Convert the binary cluster file to JSON and parse the cluster assignments into a data table.
clsxj_path <- file.path(project_path, paste0(project_name, "-author-clsx.json"))
dv8_clsxb_to_clsxj(dv8_path = dv8_path, clsxb_path = clsx_path, clsxj_path = clsxj_path)
## [1] "../../analysis/junit/dv8//-author-clsx.json"
cluster_table <- parse_dv8_clusters(clsxj_path)
top_cluster_table <- cluster_table[!grepl("/", layer)]
kable(head(top_cluster_table))
| file_path | module | layer |
|---|---|---|
| haotian1028 | M0 | L0 |
| splimon | M0 | L0 |
| jseto808 | M0 | L0 |
| usradam | M0 | L0 |
| Sean Sunoo 97641529+Ssunoo2@users.noreply.github.com | M0 | L0 |
| RavenMarQ 143663502+RavenMarQ@users.noreply.github.com | M0 | L0 |
kable(top_cluster_table[, .N, by = module][order(module == "Isolated", module)])
| module | N |
|---|---|
| M0 | 19 |
| M1 | 1 |
| M2 | 1 |
| M3 | 1 |
| M4 | 1 |
| Isolated | 13 |