Сеть существительных в ‘Записках Цезаря’

Автор

Ольга Старунова

Дата публикации

3 марта 2026 г.

На этой странице показана сеть совместной встречаемости существительных из текста “Записок Цезаря”: размер узлов отражает их степень, цвет — частоту встречаемости.

library(udpipe)
library(tidyverse)
library(igraph)
library(ggraph)

caesar <- udpipe::udpipe_read_conllu("https://github.com/locusclassicus/text_analysis_2024/raw/main/files/bg_latinpipe.conllu")

# cовместная встречаемость существительных
caesar_nouns <- caesar |> filter(upos == "NOUN")

cooc_nouns <- cooccurrence(caesar_nouns, 
                           term = "lemma", 
                           group = c("doc_id", "sentence_id")) |>
  as_tibble() |> 
  filter(cooc > 15)

# граф
edges <- cooc_nouns |> rename(from = term1, to = term2, weight = cooc)
caesar_graph <- graph_from_data_frame(edges, directed = FALSE)

# атрибуты узлов
V(caesar_graph)$degree <- degree(caesar_graph)

word_freq <- caesar_nouns |> 
  group_by(lemma) |> 
  summarise(frequency = n())
freq_dict <- setNames(word_freq$frequency, word_freq$lemma)
V(caesar_graph)$frequency <- freq_dict[V(caesar_graph)$name]
V(caesar_graph)$frequency[is.na(V(caesar_graph)$frequency)] <- 0

# визуализация
set.seed(21092024) 

ggraph(caesar_graph, layout = "dh", maxiter = 100) +  
  geom_edge_link(color = "grey50", alpha = 0.5) + 
  geom_node_point(aes(size = degree, fill = frequency), 
                  shape = 21, 
                  color = "black") +
  geom_node_text(aes(label = ifelse(degree > 5, name, NA)),
                 repel = TRUE, size = 3) +
  scale_fill_gradient(low = "lightblue", high = "darkred") + 
  scale_size(guide = 'none') + 
  theme_graph(base_family = "sans") +
  labs(title = "Сеть существительных в 'Записках Цезаря'",
       subtitle = paste("Узлов:", vcount(caesar_graph), "Ребер:", ecount(caesar_graph)))