rm(list = ls())
seed <- 1
set.seed(seed)
require(kaiaulu)
require(igraph)
require(visNetwork)
require(data.table)
require(yaml)
require(stringi)
require(knitr)
This notebook demonstrates how to assign custom numeric values as edge weights when transforming raw data into a graph object. Specifically, it uses sentiment polarity scores from GitHub issue comments as edge weights in a bipartite author-to-issue network, and visualizes the resulting network with edges colored by sentiment.
Polarity integers (0 = neutral, 1 =
positive, 2 = negative) are remapped to (0,
1, -1) before being passed to
transform_reply_to_bipartite_network() via the
weight parameter. The weight_agg parameter
controls how multiple comments between the same author and issue are
aggregated — defaulting to mean.
The input is a CSV of Kaiaulu GitHub issue comments with predicted sentiment polarity labels.
dt <- fread("https://github.com/user-attachments/files/27417102/kaiaulu_predictions.csv")
## Warning in require_bit64_if_needed(ans): Some columns are type 'integer64' but
## package bit64 is not installed. Those columns will print as strange looking
## floating point data. There is no need to reload the data. Simply
## install.packages('bit64') to obtain the integer64 print method and print the
## data again.
kable(head(dt[, .(comment_id, issue_url, comment_user_login, polarity)]))
| comment_id | issue_url | comment_user_login | polarity |
|---|---|---|---|
| 3.041925e-315 | https://api.github.com/repos/sailuh/kaiaulu/issues/2 | carlosparadis | 0 |
| 3.041973e-315 | https://api.github.com/repos/sailuh/kaiaulu/issues/2 | carlosparadis | 0 |
| 3.042337e-315 | https://api.github.com/repos/sailuh/kaiaulu/issues/2 | carlosparadis | 0 |
| 3.042382e-315 | https://api.github.com/repos/sailuh/kaiaulu/issues/2 | rnkazman | 0 |
| 3.042538e-315 | https://api.github.com/repos/sailuh/kaiaulu/issues/2 | carlosparadis | 0 |
| 3.044116e-315 | https://api.github.com/repos/sailuh/kaiaulu/issues/3 | carlosparadis | 0 |
Rename columns to match the expected reply_from and
reply_subject format, and remap polarity integers (0 =
neutral, 1 = positive, 2 = negative) to signed weights (0, 1, -1).
dt[, reply_from := comment_user_login]
dt[, reply_subject := sub(".*/issues/", "#", issue_url)]
dt[, polarity_int := fcase(
polarity == 1L, 1L,
polarity == 0L, 0L,
polarity == 2L, -1L
)]
kable(head(dt[, .(reply_from, reply_subject, polarity_int)]))
| reply_from | reply_subject | polarity_int |
|---|---|---|
| carlosparadis | #2 | 0 |
| carlosparadis | #2 | 0 |
| carlosparadis | #2 | 0 |
| rnkazman | #2 | 0 |
| carlosparadis | #2 | 0 |
| carlosparadis | #3 | 0 |
Transform the reply table into a bipartite graph using sentiment polarity as the edge weight, aggregated by mean across multiple comments between the same author and issue.
reply_graph <- transform_reply_to_bipartite_network(dt, weight = "polarity_int", weight_agg = mean)
kable(head(reply_graph[["nodes"]]))
| name | type | color |
|---|---|---|
| carlosparadis | TRUE | black |
| rnkazman | TRUE | black |
| massihonda | TRUE | black |
| CorneJB | TRUE | black |
| tuejari | TRUE | black |
| valentina-lenarduzzi | TRUE | black |
kable(head(reply_graph[["edgelist"]]))
| from | to | weight | direction |
|---|---|---|---|
| carlosparadis | #2 | -0.40 | directed |
| rnkazman | #2 | 0.00 | directed |
| carlosparadis | #3 | 0.00 | directed |
| carlosparadis | #1 | -0.25 | directed |
| carlosparadis | #4 | 0.00 | directed |
| carlosparadis | #11 | -0.50 | directed |
Edge color reflects the mean sentiment between an author and a commit: green for positive, black for neutral, and red for negative.
nodes <- reply_graph[["nodes"]]
edges <- reply_graph[["edgelist"]]
edges[, color := fcase(
weight > 0, "green",
weight == 0, "black",
weight < 0, "red"
)]
is_directed <- any(edges[["direction"]] == "directed")
plot_reply_graph <- igraph::graph_from_data_frame(d = edges,
directed = is_directed,
vertices = nodes)
visIgraph(plot_reply_graph, randomSeed = 1)
This section demonstrates using binary emotion labels as edge weights
in a bipartite author-to-issue network. The dataset contains JIRA
comments from the Apache Harmony project, each annotated with six binary
emotion labels: love, joy,
sadness, anger, surprise, and
fear.
Each emotion is visualized as a separate graph. The
weight parameter is set to the emotion column, and
weight_agg = mean aggregates multiple comments between the
same author and issue into a proportion (0 = no comments with that
emotion, 1 = all comments with that emotion). Only edges with weight
> 0 are shown. Edge color encodes intensity using three levels —
light, medium, dark — and edge labels show the rounded weight value.
emotion_dt <- as.data.table(read.csv("https://github.com/user-attachments/files/27313854/harmony_emotion_comments_joined.csv"))
kable(head(emotion_dt[, .(comment_id, issue_key, author_login, author_name, love, joy, sadness, anger, surprise, fear)]))
| comment_id | issue_key | author_login | author_name | love | joy | sadness | anger | surprise | fear |
|---|---|---|---|---|---|---|---|---|---|
| 12449632 | HARMONY-2181 | varlax | Alexey Varlamov | 1 | 0.0000000 | 0 | NA | NA | NA |
| 12446909 | HARMONY-2041 | rusanov | Anton Rusanov | 1 | 0.0000000 | 0 | NA | NA | NA |
| 12562381 | HARMONY-5224 | smm | Stepan Mishura | 1 | 0.0000000 | 0 | NA | NA | NA |
| 12466111 | HARMONY-1767 | vmz | Vasily Zakharov | 0 | 0.0000000 | 1 | NA | NA | NA |
| 12438066 | HARMONY-1325 | ilya.okomin | Ilya Okomin | 1 | 0.3333333 | 0 | NA | NA | NA |
| 12519687 | HARMONY-3741 | mcfirst | Pavel Pervov | 0 | 0.0000000 | 1 | NA | NA | NA |
emotion_dt[, reply_from := author_login]
emotion_dt[, reply_subject := issue_key]
kable(head(emotion_dt[, .(reply_from, reply_subject, love, joy, sadness, anger, surprise, fear)]))
| reply_from | reply_subject | love | joy | sadness | anger | surprise | fear |
|---|---|---|---|---|---|---|---|
| varlax | HARMONY-2181 | 1 | 0.0000000 | 0 | NA | NA | NA |
| rusanov | HARMONY-2041 | 1 | 0.0000000 | 0 | NA | NA | NA |
| smm | HARMONY-5224 | 1 | 0.0000000 | 0 | NA | NA | NA |
| vmz | HARMONY-1767 | 0 | 0.0000000 | 1 | NA | NA | NA |
| ilya.okomin | HARMONY-1325 | 1 | 0.3333333 | 0 | NA | NA | NA |
| mcfirst | HARMONY-3741 | 0 | 0.0000000 | 1 | NA | NA | NA |
love_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "love", weight_agg = mean)
nodes <- love_graph[["nodes"]]
edges <- love_graph[["edgelist"]]
if (nrow(edges) > 0) {
edges[, color := "hotpink"]
edges[, label := as.character(round(weight, 2))]
is_directed <- any(love_graph[["edgelist"]][["direction"]] == "directed")
visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}
joy_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "joy", weight_agg = mean)
nodes <- joy_graph[["nodes"]]
edges <- joy_graph[["edgelist"]]
if (nrow(edges) > 0) {
edges[, color := "darkorange"]
edges[, label := as.character(round(weight, 2))]
is_directed <- any(joy_graph[["edgelist"]][["direction"]] == "directed")
visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}
sadness_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "sadness", weight_agg = mean)
nodes <- sadness_graph[["nodes"]]
edges <- sadness_graph[["edgelist"]]
if (nrow(edges) > 0) {
edges[, color := "steelblue"]
edges[, label := as.character(round(weight, 2))]
is_directed <- any(sadness_graph[["edgelist"]][["direction"]] == "directed")
visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}
anger_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "anger", weight_agg = mean)
nodes <- anger_graph[["nodes"]]
edges <- anger_graph[["edgelist"]]
if (nrow(edges) > 0) {
edges[, color := "darkred"]
edges[, label := as.character(round(weight, 2))]
is_directed <- any(anger_graph[["edgelist"]][["direction"]] == "directed")
visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}
surprise_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "surprise", weight_agg = mean)
nodes <- surprise_graph[["nodes"]]
edges <- surprise_graph[["edgelist"]]
if (nrow(edges) > 0) {
edges[, color := "darkcyan"]
edges[, label := as.character(round(weight, 2))]
is_directed <- any(surprise_graph[["edgelist"]][["direction"]] == "directed")
visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}
fear_graph <- transform_reply_to_bipartite_network(emotion_dt, weight = "fear", weight_agg = mean)
nodes <- fear_graph[["nodes"]]
edges <- fear_graph[["edgelist"]]
if (nrow(edges) > 0) {
edges[, color := "mediumpurple"]
edges[, label := as.character(round(weight, 2))]
is_directed <- any(fear_graph[["edgelist"]][["direction"]] == "directed")
visIgraph(igraph::graph_from_data_frame(d = edges, directed = is_directed, vertices = nodes), randomSeed = seed) |>
visEdges(font = list(align = "horizontal", background = "white", strokeWidth = 0))
}