df_two_mode_90 <- read.csv('two_mode_90.csv', row.names = 1)
df_two_mode_90 <- df_two_mode_90[,!colnames(df_two_mode_90) %in% "pol"]
print(paste("There are", nrow(df_two_mode_90), "items."))
## [1] "There are 133 items."
#### summary statistics ####
mean(rowSums(df_two_mode_90))
## [1] 1.684211
paste("On average, an article contains", round(mean(rowSums(df_two_mode_90)),2), "keyword themes")
## [1] "On average, an article contains 1.68 keyword themes"
# share of articles w/o themes, calculated from row sum
paste(round(table(rowSums(df_two_mode_90))[1] / nrow(df_two_mode_90),4), "articles do not contain any keyword themes.")
## [1] "0.0977 articles do not contain any keyword themes."
df_two_mode_90_simple <- df_two_mode_90[rowSums(df_two_mode_90) > 0,]
print(paste(nrow(df_two_mode_90_simple), "are kept for network visualization."))
## [1] "120 are kept for network visualization."
# How many rows in difference?
print(nrow(df_two_mode_90) - nrow(df_two_mode_90_simple))
## [1] 13
two_mode_90_graph <- graph_from_incidence_matrix(as.matrix(df_two_mode_90_simple))
# How many nodes?
V_count <- length(V(two_mode_90_graph))
Validation
# Confirms that column names (features) are among the node names
colnames(df_two_mode_90) %in% V(two_mode_90_graph)$name
## [1] TRUE TRUE TRUE TRUE TRUE
rownames(df_two_mode_90)[!rownames(df_two_mode_90) %in% V(two_mode_90_graph)$name]
## [1] "91_1" "92_2" "95_6" "96_5" "96_10" "97_9" "99_12" "00_4" "00_5"
## [10] "00_11" "00_14" "00_18" "00_29"
# V()$type returns FALSE or TRUE, FALSE are the row names
table(V(two_mode_90_graph)$type)
##
## FALSE TRUE
## 120 5
V(two_mode_90_graph)$label <- ""
# show feature labels
V(two_mode_90_graph)[V(two_mode_90_graph)$type == T]$label <- V(two_mode_90_graph)[V(two_mode_90_graph)$type == T]$name
#### COLOR ####
V(two_mode_90_graph)$color <- c("orange","lightsteelblue")[V(two_mode_90_graph)$type + 1]
#### DEGREE (could impact SIZE) ####
feature_degrees_two_mode_90_graph <- degree(two_mode_90_graph, v = V(two_mode_90_graph)[V(two_mode_90_graph)$type])
log_feature_degrees_two_mode_90_graph <- log(feature_degrees_two_mode_90_graph + 0.1)
feature_degree_range <- max(log_feature_degrees_two_mode_90_graph) - min(log_feature_degrees_two_mode_90_graph)
print(paste("The range of log-transformed degree of features is:", round(feature_degree_range,2)))
## [1] "The range of log-transformed degree of features is: 1.63"
#### SIZE ####
## set initial sizes for ids and features
V(two_mode_90_graph)$size <- c(2, 20)[V(two_mode_90_graph)$type + 1]
## overwrite node size of features, depending on their degree
V(two_mode_90_graph)$size[V(two_mode_90_graph)$type] <- 15 + log_feature_degrees_two_mode_90_graph
# check feature size
V(two_mode_90_graph)$size[V(two_mode_90_graph)$type]
## [1] 18.04927 19.63570 18.74005 18.64021 18.00072
Visualization
par(mar = c(1, 1, 3, 1))
set.seed(1234)
plot(two_mode_90_graph, vertex.frame.color = NA, vertex.label.cex = 1, vertex.label.font = 2, layout = layout_nicely)
df_two_mode_00 <- read.csv('two_mode_00.csv', row.names = 1)
df_two_mode_00 <- df_two_mode_00[,!colnames(df_two_mode_00) %in% "pol"]
print(paste("There are", nrow(df_two_mode_00), "items."))
## [1] "There are 494 items."
#### summary statistics ####
mean(rowSums(df_two_mode_00))
## [1] 1.65587
paste("On average, an article contains", round(mean(rowSums(df_two_mode_00)),2), "keyword themes")
## [1] "On average, an article contains 1.66 keyword themes"
# share of articles w/o themes, calculated from row sum
paste(round(table(rowSums(df_two_mode_00))[1] / nrow(df_two_mode_00),4), "articles do not contain any keyword themes.")
## [1] "0.1215 articles do not contain any keyword themes."
df_two_mode_00_simple <- df_two_mode_00[rowSums(df_two_mode_00) > 0,]
print(paste(nrow(df_two_mode_00_simple), "are kept for network visualization."))
## [1] "434 are kept for network visualization."
# How many rows in difference?
print(nrow(df_two_mode_00) - nrow(df_two_mode_00_simple))
## [1] 60
two_mode_00_graph <- graph_from_incidence_matrix(as.matrix(df_two_mode_00_simple))
# How many nodes?
V_count <- length(V(two_mode_00_graph))
# Confirms that column names (features) are among the node names
colnames(df_two_mode_00) %in% V(two_mode_00_graph)$name
## [1] TRUE TRUE TRUE TRUE TRUE
# Confirms that row names (ids) are among the node names
rownames(df_two_mode_00)[!rownames(df_two_mode_00) %in% V(two_mode_00_graph)$name]
## [1] "01_7" "01_16" "01_22" "02_4" "02_12" "02_14" "02_16" "03_12"
## [9] "03_14" "04_10" "05_1" "05_2" "05_3" "05_22" "05_26" "06_1"
## [17] "06_2" "06_5" "06_15" "06_21" "06_23" "06_36" "06_37" "07_25"
## [25] "07_35" "07_51" "08_18" "08_22" "08_27" "08_32" "08_33" "08_40"
## [33] "08_51" "08_54" "08_61" "09_4" "09_9" "09_13" "09_45" "09_55"
## [41] "09_59" "10_6" "10_9" "10_10" "10_15" "10_21" "10_23" "10_33"
## [49] "10_35" "10_36" "10_48" "10_61" "10_63" "10_64" "10_68" "10_69"
## [57] "10_70" "10_93" "10_100" "10_103"
length(rownames(df_two_mode_00) %in% V(two_mode_00_graph)$name)
## [1] 494
# V()$type returns FALSE or TRUE, FALSE are the row names
table(V(two_mode_00_graph)$type)
##
## FALSE TRUE
## 434 5
#### LABELS ####
# default no node labels
V(two_mode_00_graph)$label <- ""
# show feature labels
V(two_mode_00_graph)[V(two_mode_00_graph)$type == T]$label <- V(two_mode_00_graph)[V(two_mode_00_graph)$type == T]$name
#### COLOR ####
V(two_mode_00_graph)$color <- c("orange","lightsteelblue")[V(two_mode_00_graph)$type + 1]
#### DEGREE (could impact SIZE) ####
feature_degrees_two_mode_00_graph <- degree(two_mode_00_graph, v = V(two_mode_00_graph)[V(two_mode_00_graph)$type])
log_feature_degrees_two_mode_00_graph <- log(feature_degrees_two_mode_00_graph + 0.1)
feature_degree_range <- max(log_feature_degrees_two_mode_00_graph) - min(log_feature_degrees_two_mode_00_graph)
print(paste("The range of log-transformed degree of features is:", round(feature_degree_range,2)))
## [1] "The range of log-transformed degree of features is: 2.08"
#### SIZE ####
## set initial sizes for ids and features
V(two_mode_00_graph)$size <- c(2, 20)[V(two_mode_00_graph)$type + 1]
## overwrite node size of features, depending on their degree
V(two_mode_00_graph)$size[V(two_mode_00_graph)$type] <- 15 + log_feature_degrees_two_mode_00_graph
# check feature size
V(two_mode_00_graph)$size[V(two_mode_00_graph)$type]
## [1] 19.83708 20.79332 19.99111 20.17105 18.71601
par(mar = c(1, 1, 3, 1))
set.seed(1234)
#### Visualize ####
plot(two_mode_00_graph, vertex.frame.color = NA, vertex.label.cex = 1, vertex.label.font = 2, layout = layout_nicely)
df_two_mode_10 <- read.csv('two_mode_10.csv', row.names = 1)
df_two_mode_10 <- df_two_mode_10[,!colnames(df_two_mode_10) %in% "pol"]
print(paste("There are", nrow(df_two_mode_10), "items."))
## [1] "There are 1519 items."
#### summary statistics ####
mean(rowSums(df_two_mode_10))
## [1] 1.545095
paste("On average, an article contains", round(mean(rowSums(df_two_mode_10)),2), "keyword themes")
## [1] "On average, an article contains 1.55 keyword themes"
# share of articles w/o themes, calculated from row sum
paste(round(table(rowSums(df_two_mode_10))[1] / nrow(df_two_mode_10),4), "articles do not contain any keyword themes.")
## [1] "0.1422 articles do not contain any keyword themes."
df_two_mode_10_simple <- df_two_mode_10[rowSums(df_two_mode_10) > 0,]
print(paste(nrow(df_two_mode_10_simple), "are kept for network visualization."))
## [1] "1303 are kept for network visualization."
# How many rows in difference?
print(nrow(df_two_mode_10) - nrow(df_two_mode_10_simple))
## [1] 216
two_mode_10_graph <- graph_from_incidence_matrix(as.matrix(df_two_mode_10_simple))
# How many nodes?
V_count <- length(V(two_mode_10_graph))
# Confirms that column names (features) are among the node names
colnames(df_two_mode_10) %in% V(two_mode_10_graph)$name
## [1] TRUE TRUE TRUE TRUE TRUE
# Confirms that row names (ids) are among the node names, how many ids are not associated with any features?
length(rownames(df_two_mode_10)[!rownames(df_two_mode_10) %in% V(two_mode_10_graph)$name])
## [1] 216
# V()$type returns FALSE or TRUE, FALSE are the row names
table(V(two_mode_10_graph)$type)
##
## FALSE TRUE
## 1303 5
#### LABELS ####
# default no node labels
V(two_mode_10_graph)$label <- ""
# show feature labels
V(two_mode_10_graph)[V(two_mode_10_graph)$type == T]$label <- V(two_mode_10_graph)[V(two_mode_10_graph)$type == T]$name
#### COLOR ####
V(two_mode_10_graph)$color <- c("orange","lightsteelblue")[V(two_mode_10_graph)$type + 1]
#### DEGREE (could impact SIZE) ####
feature_degrees_two_mode_10_graph <- degree(two_mode_10_graph, v = V(two_mode_10_graph)[V(two_mode_10_graph)$type])
log_feature_degrees_two_mode_10_graph <- log(feature_degrees_two_mode_10_graph + 0.1)
feature_degree_range <- max(log_feature_degrees_two_mode_10_graph) - min(log_feature_degrees_two_mode_10_graph)
print(paste("The range of log-transformed degree of features is:", round(feature_degree_range,2)))
## [1] "The range of log-transformed degree of features is: 2.02"
#### SIZE ####
## set initial sizes for ids and features
V(two_mode_10_graph)$size <- c(2, 20)[V(two_mode_10_graph)$type + 1]
## overwrite node size of features, depending on their degree
V(two_mode_10_graph)$size[V(two_mode_10_graph)$type] <- 15 + log_feature_degrees_two_mode_10_graph
# check feature size
V(two_mode_10_graph)$size[V(two_mode_10_graph)$type]
## [1] 21.05467 21.84599 20.93251 21.17191 19.82911
par(mar = c(1, 1, 3, 1))
set.seed(1234)
plot(two_mode_10_graph, vertex.frame.color = NA, vertex.label.cex = 1.8, vertex.label.font = 2, layout = layout_with_lgl)