rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(visNetwork)
require(data.table)
require(stringi)
require(igraph)
require(gh)
require(yaml)
require(magrittr)
require(knitr)
tool <- yaml::read_yaml("../tools.yml")
#conf <- yaml::read_yaml("../conf/apr.yml")
conf <- yaml::read_yaml("../conf/perceval.yml")
git_repo_path <- conf[["version_control"]][["log"]]

# Depends parameters
depends_jar_path <- tool[["depends"]]
language <- conf[["tool"]][["depends"]][["code_language"]]
keep_dependencies_type <- conf[["tool"]][["depends"]][["keep_dependencies_type"]]

# Filters
file_extensions <- conf[["filter"]][["keep_filepaths_ending_with"]]
substring_filepath <- conf[["filter"]][["remove_filepaths_containing"]]

1 Construct File Network

1.1 Project Language

We can use the gh package with the language endpoint from Github to figure out what language a project has. Paste the project url below and run the code block.

project_github_url <- "https://github.com/apache/apr"


project_github_url <- stri_split_regex(project_github_url,pattern="/")[[1]]
owner <- project_github_url[length(project_github_url)-1]
repo <- project_github_url[length(project_github_url)]
language_distribution_byte <- unlist(gh("GET /repos/:owner/:repo/languages",owner=owner,repo=repo))
language_distribution_byte <- language_distribution_byte/sum(language_distribution_byte)
format(round(language_distribution_byte, 2), nsmall = 2)

1.2 Parse Dependencies

Based on the most used language on the project, choose the parameter below for Depends. Accepted languages are: cpp, java, ruby, python, pom.

project_dependencies <- parse_dependencies(depends_jar_path,git_repo_path,language=language)

2 Filter files

project_dependencies[["nodes"]] <- project_dependencies[["nodes"]]  %>%
  filter_by_file_extension(file_extensions,"filepath")  %>% 
  filter_by_filepath_substring(substring_filepath,"filepath")

project_dependencies[["edgelist"]] <- project_dependencies[["edgelist"]]  %>%
  filter_by_file_extension(file_extensions,"src_filepath")  %>% 
  filter_by_file_extension(file_extensions,"dest_filepath")  %>% 
  filter_by_filepath_substring(substring_filepath,"src_filepath") %>%
  filter_by_filepath_substring(substring_filepath,"dest_filepath")

2.1 Example network visualization with igraph

project_file_network <- transform_dependencies_to_network(project_dependencies,
                                                   weight_types = keep_dependencies_type)

kable(head(project_dependencies[["edgelist"]],10))
src_filepath dest_filepath Use ImplLink Contain Call Import Return Extend Throw
perceval/backends/core/mbox.py perceval/backends/core/googlehits.py 17 0 0 0 0 0 0 0
perceval/backends/core/askbot.py perceval/backend.py 10 1 1 5 1 1 2 0
perceval/backends/core/supybot.py perceval/backend.py 13 5 1 5 1 1 2 0
perceval/backends/core/mbox.py perceval/backends/core/supybot.py 5 0 0 0 0 0 0 0
perceval/backends/core/rss.py perceval/client.py 2 0 0 3 1 0 1 0
perceval/backends/core/gitter.py perceval/errors.py 1 0 0 0 0 0 0 1
perceval/backends/core/bugzilla.py perceval/backends/core/nntp.py 0 0 1 0 0 0 0 0
perceval/backends/core/bugzilla.py perceval/backends/core/googlehits.py 12 0 0 0 0 0 0 0
perceval/backends/core/gitlab.py perceval/backends/core/githubql.py 0 12 0 0 0 0 0 0
perceval/backends/core/phabricator.py perceval/backend.py 10 1 1 6 1 1 2 0
project_file_network <- igraph::graph_from_data_frame(d=project_file_network[["edgelist"]],
                      directed = TRUE,
                      vertices = project_file_network[["nodes"]])
visIgraph(project_file_network,randomSeed = 1)