rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(igraph)
require(visNetwork)
require(data.table)
require(yaml)
require(stringi)
require(knitr)

1 Introduction

This notebook demonstrates how to assign custom numeric values as edge weights when transforming raw data into a graph object. Specifically, it uses sentiment polarity scores from GitHub issue comments as edge weights in a bipartite author-to-issue network, and visualizes the resulting network with edges colored by sentiment.

Polarity integers (0 = neutral, 1 = positive, 2 = negative) are remapped to (0, 1, -1) before being passed to transform_reply_to_bipartite_network() via the weight parameter. The weight_agg parameter controls how multiple comments between the same author and issue are aggregated — defaulting to mean.

2 Load Data

The input is a CSV of Kaiaulu GitHub issue comments with predicted sentiment polarity labels.

dt <- fread("https://github.com/user-attachments/files/27417102/kaiaulu_predictions.csv")
## Warning in require_bit64_if_needed(ans): Some columns are type 'integer64' but
## package bit64 is not installed. Those columns will print as strange looking
## floating point data. There is no need to reload the data. Simply
## install.packages('bit64') to obtain the integer64 print method and print the
## data again.
kable(head(dt[, .(comment_id, issue_url, comment_user_login, polarity)]))
comment_id issue_url comment_user_login polarity
3.041925e-315 https://api.github.com/repos/sailuh/kaiaulu/issues/2 carlosparadis 0
3.041973e-315 https://api.github.com/repos/sailuh/kaiaulu/issues/2 carlosparadis 0
3.042337e-315 https://api.github.com/repos/sailuh/kaiaulu/issues/2 carlosparadis 0
3.042382e-315 https://api.github.com/repos/sailuh/kaiaulu/issues/2 rnkazman 0
3.042538e-315 https://api.github.com/repos/sailuh/kaiaulu/issues/2 carlosparadis 0
3.044116e-315 https://api.github.com/repos/sailuh/kaiaulu/issues/3 carlosparadis 0

3 Prepare Data

Rename columns to match the expected reply_from and reply_subject format, and remap polarity integers (0 = neutral, 1 = positive, 2 = negative) to signed weights (0, 1, -1).

dt[, reply_from    := comment_user_login]
dt[, reply_subject := sub(".*/issues/", "#", issue_url)]
dt[, polarity_int  := fcase(
  polarity == 1L,  1L,
  polarity == 0L,  0L,
  polarity == 2L, -1L
)]
kable(head(dt[, .(reply_from, reply_subject, polarity_int)]))
reply_from reply_subject polarity_int
carlosparadis #2 0
carlosparadis #2 0
carlosparadis #2 0
rnkazman #2 0
carlosparadis #2 0
carlosparadis #3 0

4 Build Weighted Bipartite Network

Transform the reply table into a bipartite graph using sentiment polarity as the edge weight, aggregated by mean across multiple comments between the same author and issue.

reply_graph <- transform_reply_to_bipartite_network(dt, weight = "polarity_int", weight_agg = mean)
kable(head(reply_graph[["nodes"]]))
name type color
carlosparadis TRUE black
rnkazman TRUE black
massihonda TRUE black
CorneJB TRUE black
tuejari TRUE black
valentina-lenarduzzi TRUE black
kable(head(reply_graph[["edgelist"]]))
from to weight direction
carlosparadis #2 -0.40 directed
rnkazman #2 0.00 directed
carlosparadis #3 0.00 directed
carlosparadis #1 -0.25 directed
carlosparadis #4 0.00 directed
carlosparadis #11 -0.50 directed

5 Visualize Network

Edge color reflects the mean sentiment between an author and a commit: green for positive, black for neutral, and red for negative.

nodes <- reply_graph[["nodes"]]
edges <- reply_graph[["edgelist"]]

edges[, color := fcase(
  weight > 0,  "green",
  weight == 0, "black",
  weight < 0,  "red"
)]

is_directed <- any(edges[["direction"]] == "directed")
plot_reply_graph <- igraph::graph_from_data_frame(d = edges,
                                                  directed = is_directed,
                                                  vertices = nodes)
visIgraph(plot_reply_graph, randomSeed = 1)

6 Emotion Graph

This section demonstrates using binary emotion labels as edge weights in a bipartite author-to-issue network. The dataset contains JIRA comments from the Apache Harmony project, each annotated with six binary emotion labels: love, joy, sadness, anger, surprise, and fear.

Each emotion is visualized as a separate graph. The weight parameter is set to the emotion column, and weight_agg = mean aggregates multiple comments between the same author and issue into a proportion (0 = no comments with that emotion, 1 = all comments with that emotion). Only edges with weight > 0 are shown. Edge color encodes intensity using three levels — light, medium, dark — and edge labels show the rounded weight value.

6.1 Load Emotion Data

emotion_dt <- as.data.table(read.csv("https://github.com/user-attachments/files/27313854/harmony_emotion_comments_joined.csv"))
kable(head(emotion_dt[, .(comment_id, issue_key, author_login, author_name, love, joy, sadness, anger, surprise, fear)]))
comment_id issue_key author_login author_name love joy sadness anger surprise fear
12449632 HARMONY-2181 varlax Alexey Varlamov 1 0.0000000 0 NA NA NA
12446909 HARMONY-2041 rusanov Anton Rusanov 1 0.0000000 0 NA NA NA
12562381 HARMONY-5224 smm Stepan Mishura 1 0.0000000 0 NA NA NA
12466111 HARMONY-1767 vmz Vasily Zakharov 0 0.0000000 1 NA NA NA
12438066 HARMONY-1325 ilya.okomin Ilya Okomin 1 0.3333333 0 NA NA NA
12519687 HARMONY-3741 mcfirst Pavel Pervov 0 0.0000000 1 NA NA NA

6.2 Prepare Data

emotion_dt[, reply_from    := author_login]
emotion_dt[, reply_subject := issue_key]
kable(head(emotion_dt[, .(reply_from, reply_subject, love, joy, sadness, anger, surprise, fear)]))
reply_from reply_subject love joy sadness anger surprise fear
varlax HARMONY-2181 1 0.0000000 0 NA NA NA
rusanov HARMONY-2041 1 0.0000000 0 NA NA NA
smm HARMONY-5224 1 0.0000000 0 NA NA NA
vmz HARMONY-1767 0 0.0000000 1 NA NA NA
ilya.okomin HARMONY-1325 1 0.3333333 0 NA NA NA
mcfirst HARMONY-3741 0 0.0000000 1 NA NA NA

6.3 Love

love_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "love", weight_agg = mean)
nodes <- love_graph[["nodes"]]
edges <- love_graph[["edgelist"]]
if (nrow(edges) > 0) {
  edges[, color := "hotpink"]
  edges[, label := as.character(round(weight, 2))]
  is_directed <- any(love_graph[["edgelist"]][["direction"]] == "directed")
  visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
    visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}

6.4 Joy

joy_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "joy", weight_agg = mean)
nodes <- joy_graph[["nodes"]]
edges <- joy_graph[["edgelist"]]
if (nrow(edges) > 0) {
  edges[, color := "darkorange"]
  edges[, label := as.character(round(weight, 2))]
  is_directed <- any(joy_graph[["edgelist"]][["direction"]] == "directed")
  visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
    visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}

6.5 Sadness

sadness_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "sadness", weight_agg = mean)
nodes <- sadness_graph[["nodes"]]
edges <- sadness_graph[["edgelist"]]
if (nrow(edges) > 0) {
  edges[, color := "steelblue"]
  edges[, label := as.character(round(weight, 2))]
  is_directed <- any(sadness_graph[["edgelist"]][["direction"]] == "directed")
  visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
    visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}

6.6 Anger

anger_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "anger", weight_agg = mean)
nodes <- anger_graph[["nodes"]]
edges <- anger_graph[["edgelist"]]
if (nrow(edges) > 0) {
  edges[, color := "darkred"]
  edges[, label := as.character(round(weight, 2))]
  is_directed <- any(anger_graph[["edgelist"]][["direction"]] == "directed")
  visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
    visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}

6.7 Surprise

surprise_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "surprise", weight_agg = mean)
nodes <- surprise_graph[["nodes"]]
edges <- surprise_graph[["edgelist"]]
if (nrow(edges) > 0) {
  edges[, color := "darkcyan"]
  edges[, label := as.character(round(weight, 2))]
  is_directed <- any(surprise_graph[["edgelist"]][["direction"]] == "directed")
  visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
    visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}

6.8 Fear

fear_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "fear", weight_agg = mean)
nodes <- fear_graph[["nodes"]]
edges <- fear_graph[["edgelist"]]
if (nrow(edges) > 0) {
  edges[, color := "mediumpurple"]
  edges[, label := as.character(round(weight, 2))]
  is_directed <- any(fear_graph[["edgelist"]][["direction"]] == "directed")
  visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
    visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}