1990s

df_two_mode_90 <- read.csv('two_mode_90.csv', row.names = 1)
df_two_mode_90 <- df_two_mode_90[,!colnames(df_two_mode_90) %in% "pol"]
print(paste("There are", nrow(df_two_mode_90), "items."))

## [1] "There are 133 items."

#### summary statistics ####
mean(rowSums(df_two_mode_90))

## [1] 1.684211

paste("On average, an article contains", round(mean(rowSums(df_two_mode_90)),2), "keyword themes")

## [1] "On average, an article contains 1.68 keyword themes"

# share of articles w/o themes, calculated from row sum
paste(round(table(rowSums(df_two_mode_90))[1] / nrow(df_two_mode_90),4), "articles do not contain any keyword themes.")

## [1] "0.0977 articles do not contain any keyword themes."

df_two_mode_90_simple <- df_two_mode_90[rowSums(df_two_mode_90) > 0,]
print(paste(nrow(df_two_mode_90_simple), "are kept for network visualization."))

## [1] "120 are kept for network visualization."

# How many rows in difference?
print(nrow(df_two_mode_90) - nrow(df_two_mode_90_simple))

## [1] 13

two_mode_90_graph <- graph_from_incidence_matrix(as.matrix(df_two_mode_90_simple))
# How many nodes?
V_count <- length(V(two_mode_90_graph))

Validation

# Confirms that column names (features) are among the node names
colnames(df_two_mode_90) %in% V(two_mode_90_graph)$name

## [1] TRUE TRUE TRUE TRUE TRUE

rownames(df_two_mode_90)[!rownames(df_two_mode_90) %in% V(two_mode_90_graph)$name]

##  [1] "91_1"  "92_2"  "95_6"  "96_5"  "96_10" "97_9"  "99_12" "00_4"  "00_5" 
## [10] "00_11" "00_14" "00_18" "00_29"

# V()$type returns FALSE or TRUE, FALSE are the row names
table(V(two_mode_90_graph)$type)

## 
## FALSE  TRUE 
##   120     5

V(two_mode_90_graph)$label <- "" 
# show feature labels
V(two_mode_90_graph)[V(two_mode_90_graph)$type == T]$label <- V(two_mode_90_graph)[V(two_mode_90_graph)$type == T]$name

#### COLOR ####
V(two_mode_90_graph)$color <- c("orange","lightsteelblue")[V(two_mode_90_graph)$type + 1]

#### DEGREE (could impact SIZE) ####
feature_degrees_two_mode_90_graph <- degree(two_mode_90_graph, v = V(two_mode_90_graph)[V(two_mode_90_graph)$type])
log_feature_degrees_two_mode_90_graph <- log(feature_degrees_two_mode_90_graph + 0.1)
feature_degree_range <- max(log_feature_degrees_two_mode_90_graph) - min(log_feature_degrees_two_mode_90_graph)
print(paste("The range of log-transformed degree of features is:", round(feature_degree_range,2)))

## [1] "The range of log-transformed degree of features is: 1.63"

#### SIZE ####
## set initial sizes for ids and features
V(two_mode_90_graph)$size <- c(2, 20)[V(two_mode_90_graph)$type + 1]
## overwrite node size of features, depending on their degree
V(two_mode_90_graph)$size[V(two_mode_90_graph)$type] <- 15 + log_feature_degrees_two_mode_90_graph
# check feature size
V(two_mode_90_graph)$size[V(two_mode_90_graph)$type]

## [1] 18.04927 19.63570 18.74005 18.64021 18.00072

Visualization

par(mar = c(1, 1, 3, 1))
set.seed(1234)
plot(two_mode_90_graph, vertex.frame.color = NA, vertex.label.cex = 1, vertex.label.font = 2, layout = layout_nicely)

2000s

df_two_mode_00 <- read.csv('two_mode_00.csv', row.names = 1)
df_two_mode_00 <- df_two_mode_00[,!colnames(df_two_mode_00) %in% "pol"]
print(paste("There are", nrow(df_two_mode_00), "items."))

## [1] "There are 494 items."

#### summary statistics ####
mean(rowSums(df_two_mode_00))

## [1] 1.65587

paste("On average, an article contains", round(mean(rowSums(df_two_mode_00)),2), "keyword themes")

## [1] "On average, an article contains 1.66 keyword themes"

# share of articles w/o themes, calculated from row sum
paste(round(table(rowSums(df_two_mode_00))[1] / nrow(df_two_mode_00),4), "articles do not contain any keyword themes.")

## [1] "0.1215 articles do not contain any keyword themes."

df_two_mode_00_simple <- df_two_mode_00[rowSums(df_two_mode_00) > 0,]
print(paste(nrow(df_two_mode_00_simple), "are kept for network visualization."))

## [1] "434 are kept for network visualization."

# How many rows in difference?
print(nrow(df_two_mode_00) - nrow(df_two_mode_00_simple))

## [1] 60

two_mode_00_graph <- graph_from_incidence_matrix(as.matrix(df_two_mode_00_simple))
# How many nodes?
V_count <- length(V(two_mode_00_graph))
# Confirms that column names (features) are among the node names
colnames(df_two_mode_00) %in% V(two_mode_00_graph)$name

## [1] TRUE TRUE TRUE TRUE TRUE

# Confirms that row names (ids) are among the node names
rownames(df_two_mode_00)[!rownames(df_two_mode_00) %in% V(two_mode_00_graph)$name]

##  [1] "01_7"   "01_16"  "01_22"  "02_4"   "02_12"  "02_14"  "02_16"  "03_12" 
##  [9] "03_14"  "04_10"  "05_1"   "05_2"   "05_3"   "05_22"  "05_26"  "06_1"  
## [17] "06_2"   "06_5"   "06_15"  "06_21"  "06_23"  "06_36"  "06_37"  "07_25" 
## [25] "07_35"  "07_51"  "08_18"  "08_22"  "08_27"  "08_32"  "08_33"  "08_40" 
## [33] "08_51"  "08_54"  "08_61"  "09_4"   "09_9"   "09_13"  "09_45"  "09_55" 
## [41] "09_59"  "10_6"   "10_9"   "10_10"  "10_15"  "10_21"  "10_23"  "10_33" 
## [49] "10_35"  "10_36"  "10_48"  "10_61"  "10_63"  "10_64"  "10_68"  "10_69" 
## [57] "10_70"  "10_93"  "10_100" "10_103"

length(rownames(df_two_mode_00) %in% V(two_mode_00_graph)$name)

## [1] 494

# V()$type returns FALSE or TRUE, FALSE are the row names
table(V(two_mode_00_graph)$type)

## 
## FALSE  TRUE 
##   434     5

#### LABELS ####
# default no node labels
V(two_mode_00_graph)$label <- "" 
# show feature labels
V(two_mode_00_graph)[V(two_mode_00_graph)$type == T]$label <- V(two_mode_00_graph)[V(two_mode_00_graph)$type == T]$name

#### COLOR ####
V(two_mode_00_graph)$color <- c("orange","lightsteelblue")[V(two_mode_00_graph)$type + 1]

#### DEGREE (could impact SIZE) ####
feature_degrees_two_mode_00_graph <- degree(two_mode_00_graph, v = V(two_mode_00_graph)[V(two_mode_00_graph)$type])
log_feature_degrees_two_mode_00_graph <- log(feature_degrees_two_mode_00_graph + 0.1)
feature_degree_range <- max(log_feature_degrees_two_mode_00_graph) - min(log_feature_degrees_two_mode_00_graph)
print(paste("The range of log-transformed degree of features is:", round(feature_degree_range,2)))

## [1] "The range of log-transformed degree of features is: 2.08"

#### SIZE ####
## set initial sizes for ids and features
V(two_mode_00_graph)$size <- c(2, 20)[V(two_mode_00_graph)$type + 1]
## overwrite node size of features, depending on their degree
V(two_mode_00_graph)$size[V(two_mode_00_graph)$type] <- 15 + log_feature_degrees_two_mode_00_graph
# check feature size
V(two_mode_00_graph)$size[V(two_mode_00_graph)$type]

## [1] 19.83708 20.79332 19.99111 20.17105 18.71601

par(mar = c(1, 1, 3, 1))
set.seed(1234)
#### Visualize ####
plot(two_mode_00_graph, vertex.frame.color = NA, vertex.label.cex = 1, vertex.label.font = 2, layout = layout_nicely)

2010s

df_two_mode_10 <- read.csv('two_mode_10.csv', row.names = 1)
df_two_mode_10 <- df_two_mode_10[,!colnames(df_two_mode_10) %in% "pol"]
print(paste("There are", nrow(df_two_mode_10), "items."))

## [1] "There are 1519 items."

#### summary statistics ####
mean(rowSums(df_two_mode_10))

## [1] 1.545095

paste("On average, an article contains", round(mean(rowSums(df_two_mode_10)),2), "keyword themes")

## [1] "On average, an article contains 1.55 keyword themes"

# share of articles w/o themes, calculated from row sum
paste(round(table(rowSums(df_two_mode_10))[1] / nrow(df_two_mode_10),4), "articles do not contain any keyword themes.")

## [1] "0.1422 articles do not contain any keyword themes."

df_two_mode_10_simple <- df_two_mode_10[rowSums(df_two_mode_10) > 0,]
print(paste(nrow(df_two_mode_10_simple), "are kept for network visualization."))

## [1] "1303 are kept for network visualization."

# How many rows in difference?
print(nrow(df_two_mode_10) - nrow(df_two_mode_10_simple))

## [1] 216

two_mode_10_graph <- graph_from_incidence_matrix(as.matrix(df_two_mode_10_simple))
# How many nodes?
V_count <- length(V(two_mode_10_graph))
# Confirms that column names (features) are among the node names
colnames(df_two_mode_10) %in% V(two_mode_10_graph)$name

## [1] TRUE TRUE TRUE TRUE TRUE

# Confirms that row names (ids) are among the node names, how many ids are not associated with any features?
length(rownames(df_two_mode_10)[!rownames(df_two_mode_10) %in% V(two_mode_10_graph)$name])

## [1] 216

# V()$type returns FALSE or TRUE, FALSE are the row names
table(V(two_mode_10_graph)$type)

## 
## FALSE  TRUE 
##  1303     5

#### LABELS ####
# default no node labels
V(two_mode_10_graph)$label <- "" 
# show feature labels
V(two_mode_10_graph)[V(two_mode_10_graph)$type == T]$label <- V(two_mode_10_graph)[V(two_mode_10_graph)$type == T]$name

#### COLOR ####
V(two_mode_10_graph)$color <- c("orange","lightsteelblue")[V(two_mode_10_graph)$type + 1]

#### DEGREE (could impact SIZE) ####
feature_degrees_two_mode_10_graph <- degree(two_mode_10_graph, v = V(two_mode_10_graph)[V(two_mode_10_graph)$type])
log_feature_degrees_two_mode_10_graph <- log(feature_degrees_two_mode_10_graph + 0.1)
feature_degree_range <- max(log_feature_degrees_two_mode_10_graph) - min(log_feature_degrees_two_mode_10_graph)
print(paste("The range of log-transformed degree of features is:", round(feature_degree_range,2)))

## [1] "The range of log-transformed degree of features is: 2.02"

#### SIZE ####
## set initial sizes for ids and features
V(two_mode_10_graph)$size <- c(2, 20)[V(two_mode_10_graph)$type + 1]
## overwrite node size of features, depending on their degree
V(two_mode_10_graph)$size[V(two_mode_10_graph)$type] <- 15 + log_feature_degrees_two_mode_10_graph
# check feature size
V(two_mode_10_graph)$size[V(two_mode_10_graph)$type]

## [1] 21.05467 21.84599 20.93251 21.17191 19.82911

par(mar = c(1, 1, 3, 1))
set.seed(1234)
plot(two_mode_10_graph, vertex.frame.color = NA, vertex.label.cex = 1.8, vertex.label.font = 2, layout = layout_with_lgl)

Two_mode_network_viz

Tiangeng Lu

2023-08-08

1990s

2000s

2010s