rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(visNetwork)
require(data.table)
require(stringi)
require(igraph)
require(gh)
require(yaml)
require(magrittr)
require(knitr)
tool <- yaml::read_yaml("../tools.yml")
#conf <- yaml::read_yaml("../conf/apr.yml")
conf <- yaml::read_yaml("../conf/vcs_shark.yml")
git_repo_path <- conf[["version_control"]][["log"]]

# Depends parameters
depends_jar_path <- tool[["depends"]]
language <- conf[["tool"]][["depends"]][["code_language"]]
keep_dependencies_type <- conf[["tool"]][["depends"]][["keep_dependencies_type"]]

# Filters
file_extensions <- conf[["filter"]][["keep_filepaths_ending_with"]]
substring_filepath <- conf[["filter"]][["remove_filepaths_containing"]]

1 Construct File Network

1.1 Project Language

We can use the gh package with the language endpoint from Github to figure out what language a project has. Paste the project url below and run the code block.

project_github_url <- "https://github.com/apache/apr"


project_github_url <- stri_split_regex(project_github_url,pattern="/")[[1]]
owner <- project_github_url[length(project_github_url)-1]
repo <- project_github_url[length(project_github_url)]
language_distribution_byte <- unlist(gh("GET /repos/:owner/:repo/languages",owner=owner,repo=repo))
language_distribution_byte <- language_distribution_byte/sum(language_distribution_byte)
format(round(language_distribution_byte, 2), nsmall = 2)

1.2 Parse Dependencies

Based on the most used language on the project, choose the parameter below for Depends. Accepted languages are: cpp, java, ruby, python, pom.

project_dependencies <- parse_dependencies(depends_jar_path,git_repo_path,language=language)

2 Filter files

project_dependencies[["nodes"]] <- project_dependencies[["nodes"]]  %>%
  filter_by_file_extension(file_extensions,"filepath")  %>% 
  filter_by_filepath_substring(substring_filepath,"filepath")

project_dependencies[["edgelist"]] <- project_dependencies[["edgelist"]]  %>%
  filter_by_file_extension(file_extensions,"src_filepath")  %>% 
  filter_by_file_extension(file_extensions,"dest_filepath")  %>% 
  filter_by_filepath_substring(substring_filepath,"src_filepath") %>%
  filter_by_filepath_substring(substring_filepath,"dest_filepath")

2.1 Example network visualization with igraph

project_file_network <- transform_dependencies_to_network(project_dependencies,
                                                   weight_types = keep_dependencies_type)

kable(head(project_dependencies[["edgelist"]],10))
src_filepath dest_filepath Call Import Extend Use Contain Create
pyvcsshark/datastores/mongostore.py pyvcsshark/datastores/init.py 1 0 0 0 0 0
pyvcsshark/datastores/mongostore.py pyvcsshark/datastores/basestore.py 0 1 1 3 0 0
pyvcsshark/parser/baseparser.py pyvcsshark/parser/svnparser.py 0 0 0 0 1 0
pyvcsshark/parser/baseparser.py pyvcsshark/utils.py 1 1 0 2 0 0
pyvcsshark/datastores/mysqlstore.py pyvcsshark/datastores/basestore.py 0 1 1 2 0 0
pyvcsshark/init.py pyvcsshark/config.py 0 0 0 12 1 1
vcsshark.py pyvcsshark/main.py 0 1 0 0 0 0
pyvcsshark/main.py pyvcsshark/parser/svnparser.py 0 0 0 0 2 0
pyvcsshark/main.py pyvcsshark/parser/gitparser.py 0 0 0 0 2 0
pyvcsshark/init.py pyvcsshark/main.py 1 1 0 8 0 0
project_file_network <- igraph::graph_from_data_frame(d=project_file_network[["edgelist"]],
                      directed = TRUE,
                      vertices = project_file_network[["nodes"]])
visIgraph(project_file_network,randomSeed = 1)