Сеть художественных влияний Винсента Ван Гога

Author

Ксения Войтова

Published

March 30, 2026

Code
library(widyr)
library(igraph)
library(visNetwork)
library(base64enc)
library(tidyverse)


# 1. ЗАГРУЗКА ДАННЫХ


url <- "https://raw.githubusercontent.com/PrasadSalimath/Text-Analytics-on-Van-Gogh-Letters/main/van_gogh_letters_data.csv"
vangogh_letters <- read_csv(url, show_col_types = FALSE) |>
  filter(!is.na(letter_text)) |>
  mutate(doc_id = paste0("letter_", row_number()))

vangogh_annotated <- readRDS("vangogh_annotated.rds")


# 2. ИЗВЛЕЧЕНИЕ УПОМИНАНИЙ ХУДОЖНИКОВ


known_artists <- c(
  "Millet", "Millets", "Gauguin", "Gauguins", "Rembrandt", "Delacroix", "Bernard",
  "Corot", "Rousseau", "Daubigny", "Breton", "Dupré",
  "Monet", "Pissarro", "Seurat", "Signac", "Lautrec",
  "Renoir", "Sisley", "Degas", "Cézanne",
  "Mauve", "Israels", "Weissenbruch", "Mesdag", "Hals",
  "Rubens", "Michelangelo", "Raphael", "Giotto", "Titian",
  "Hiroshige", "Hokusai", "Utamaro",
  "Daumier", "Courbet", "Meunier"
)

artist_mentions <- vangogh_annotated |>
  filter(upos == "PROPN") |>
  filter(lemma %in% known_artists) |>
  left_join(vangogh_letters, by = "doc_id") |>
  mutate(
    year = as.numeric(str_extract(date, "\\d{4}")),
    lemma = case_when(
      str_detect(lemma, "Millet") ~ "Millet",
      str_detect(lemma, "Gauguin") ~ "Gauguin",
      TRUE ~ lemma
    )
  )
Code
# 3. ФУНКЦИЯ ДЛЯ ИЗВЛЕЧЕНИЯ ЦИТАТ (ЦЕЛЫЕ ПРЕДЛОЖЕНИЯ)

get_artist_quotes <- function(artist_name, mentions_data, n_quotes = 1) {
  
  quotes <- mentions_data |>
    filter(lemma == artist_name) |>
    mutate(
      sentences = str_split(letter_text, "(?<=[.!?])\\s+")
    ) |>
    unnest(sentences) |>
    filter(str_detect(sentences, regex(artist_name, ignore_case = TRUE))) |>
    filter(
      str_length(sentences) >= 60,
      str_length(sentences) <= 150,
      !str_detect(sentences, "^\\d"),
      !str_detect(sentences, "^[0-9]+v:"),
      str_count(sentences, "\\w+") >= 8
    ) |>
    distinct(sentences, .keep_all = TRUE) |>
    mutate(sentences = str_squish(sentences)) |>
    slice_head(n = n_quotes) |>
    pull(sentences)
  
  if (length(quotes) == 0) return("")
  
  # Разбиваем на части по 40 символов с переносом
  quote_text <- quotes[1]
  words <- str_split(quote_text, " ")[[1]]
  lines <- ""
  current_line <- ""
  
  for (word in words) {
    test_line <- paste(current_line, word)
    if (str_length(test_line) > 40) {
      lines <- paste0(lines, current_line, "<br>")
      current_line <- word
    } else {
      current_line <- test_line
    }
  }
  lines <- paste0(lines, current_line)  # добавляем последнюю строку
  
  paste0(
    "<br><i>Цитата:</i><br>",
    "\"", lines, "\""
  )
}
Code
# 4. ПОСТРОЕНИЕ СЕТИ


artist_pairs <- artist_mentions |>
  pairwise_count(lemma, doc_id, sort = TRUE) |>
  filter(n >= 3) |>
  filter(item1 < item2)

artist_freq <- artist_mentions |>
  count(lemma, name = "total_mentions")

# Проверь что получилось
nrow(artist_pairs)
[1] 118
Code
# Путь к сжатым картинкам
img_dir <- "/Users/az/Documents/R scripts/project_VanGogh/images/compressed"

img_to_base64 <- function(path) {
  if (!file.exists(path)) return(NA_character_)
  paste0("data:image/jpeg;base64,", base64encode(path))
}
Code
# 5. СОЗДАНИЕ УЗЛОВ


nodes <- data.frame(
  id = unique(c(artist_pairs$item1, artist_pairs$item2))
) |>
  left_join(artist_freq, by = c("id" = "lemma"))

nodes <- nodes |>
  mutate(
    label = id,
    image = case_when(
      id == "Bernard" & file.exists(file.path(img_dir, "bernard.jpg")) ~ 
        img_to_base64(file.path(img_dir, "bernard.jpg")),
      id == "Breton" & file.exists(file.path(img_dir, "breton.jpg")) ~ 
        img_to_base64(file.path(img_dir, "breton.jpg")),
      id == "Cézanne" & file.exists(file.path(img_dir, "cezanne.jpg")) ~ 
        img_to_base64(file.path(img_dir, "cezanne.jpg")),
      id == "Corot" & file.exists(file.path(img_dir, "corot.jpg")) ~ 
        img_to_base64(file.path(img_dir, "corot.jpg")),
      id == "Courbet" & file.exists(file.path(img_dir, "courbet.jpeg")) ~ 
        img_to_base64(file.path(img_dir, "courbet.jpeg")),
      id == "Daubigny" & file.exists(file.path(img_dir, "daubigny.jpg")) ~ 
        img_to_base64(file.path(img_dir, "daubigny.jpg")),
      id == "Degas" & file.exists(file.path(img_dir, "degas.jpg")) ~ 
        img_to_base64(file.path(img_dir, "degas.jpg")),
      id == "Delacroix" & file.exists(file.path(img_dir, "delacroix.jpg")) ~ 
        img_to_base64(file.path(img_dir, "delacroix.jpg")),
      id == "Giotto" & file.exists(file.path(img_dir, "giotto.jpg")) ~ 
        img_to_base64(file.path(img_dir, "giotto.jpg")),
      id == "Hals" & file.exists(file.path(img_dir, "hals.jpg")) ~ 
        img_to_base64(file.path(img_dir, "hals.jpg")),
      id == "Lautrec" & file.exists(file.path(img_dir, "lautrec.jpg")) ~ 
        img_to_base64(file.path(img_dir, "lautrec.jpg")),
      id == "Mauve" & file.exists(file.path(img_dir, "mauve.jpg")) ~ 
        img_to_base64(file.path(img_dir, "mauve.jpg")),
      id == "Mesdag" & file.exists(file.path(img_dir, "mesdag.jpg")) ~ 
        img_to_base64(file.path(img_dir, "mesdag.jpg")),
      id == "Michelangelo" & file.exists(file.path(img_dir, "michelangelo.jpg")) ~ 
        img_to_base64(file.path(img_dir, "michelangelo.jpg")),
      id == "Pissarro" & file.exists(file.path(img_dir, "pissarro.jpg")) ~ 
        img_to_base64(file.path(img_dir, "pissarro.jpg")),
      id == "Renoir" & file.exists(file.path(img_dir, "renoir.jpg")) ~ 
        img_to_base64(file.path(img_dir, "renoir.jpg")),
      id == "Rousseau" & file.exists(file.path(img_dir, "russo.jpg")) ~ 
        img_to_base64(file.path(img_dir, "russo.jpg")),
      id == "Rubens" & file.exists(file.path(img_dir, "rubens.jpg")) ~ 
        img_to_base64(file.path(img_dir, "rubens.jpg")),
      id == "Seurat" & file.exists(file.path(img_dir, "serat.jpg")) ~ 
        img_to_base64(file.path(img_dir, "serat.jpg")),
      id == "Signac" & file.exists(file.path(img_dir, "sinyak.jpg")) ~ 
        img_to_base64(file.path(img_dir, "sinyak.jpg")),
      id == "Sisley" & file.exists(file.path(img_dir, "sisley.jpg")) ~ 
        img_to_base64(file.path(img_dir, "sisley.jpg")),
      id == "Weissenbruch" & file.exists(file.path(img_dir, "weissenbruch.jpg")) ~ 
        img_to_base64(file.path(img_dir, "weissenbruch.jpg")),
      id == "Gauguin" & file.exists(file.path(img_dir, "gauguin.jpg")) ~ 
        img_to_base64(file.path(img_dir, "gauguin.jpg")),
      id == "Rembrandt" & file.exists(file.path(img_dir, "rembrandt.jpg")) ~ 
        img_to_base64(file.path(img_dir, "rembrandt.jpg")),
      id == "Daumier" & file.exists(file.path(img_dir, "daumier.jpg")) ~ 
        img_to_base64(file.path(img_dir, "daumier.jpg")),
      id == "Monet" & file.exists(file.path(img_dir, "monet.jpg")) ~ 
        img_to_base64(file.path(img_dir, "monet.jpg")),
      id == "Millet" & file.exists(file.path(img_dir, "millet.jpg")) ~ 
        img_to_base64(file.path(img_dir, "millet.jpg")),
      TRUE ~ NA_character_
    ),
    shape = ifelse(!is.na(image), "circularImage", "dot"),
    size = 30,
    borderWidth = 2,
    title = paste0(
      "<b>", id, "</b><br>",
      "Упоминаний: ", total_mentions,
      sapply(id, get_artist_quotes, mentions_data = artist_mentions, n_quotes = 1)
    )
  )


# 6. СОЗДАНИЕ РЕБЕР


edges <- artist_pairs |>
  rename(from = item1, to = item2) |>
  mutate(
    width = n / 5,
    title = paste0("Упомянуты вместе: ", n, " раз")
  )


# 7. КЛАСТЕРИЗАЦИЯ


g <- graph_from_data_frame(edges, directed = FALSE, vertices = nodes)
communities <- cluster_louvain(g)
nodes$group <- membership(communities)[match(nodes$id, V(g)$name)]

nodes <- nodes |>
  mutate(
    group_label = case_when(
      group == 1 ~ "Импрессионисты и постимпрессионисты",
      group == 2 ~ "Старые мастера и теоретики цвета", 
      group == 3 ~ "Барбизонская школа и голландцы",
      TRUE ~ paste("Группа", group)
    )
  )
Code
# 8. ВИЗУАЛИЗАЦИЯ

network <- visNetwork(nodes, edges, width = "100%", height = "800px") |>
  visNodes(shapeProperties = list(useBorderWithImage = TRUE)) |>
  visOptions(
    highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
    nodesIdSelection = TRUE
  ) |>
  visInteraction(
    tooltipStyle = 'max-width: 200px; white-space: normal; word-wrap: break-word; font-size: 11px;',
    navigationButtons = TRUE, 
    zoomView = TRUE
  ) |>
  visPhysics(
    enabled = TRUE,
    stabilization = list(enabled = TRUE, iterations = 200),
    solver = "barnesHut",
    barnesHut = list(
      gravitationalConstant = -2000,
      centralGravity = 0.1,
      springLength = 150,
      springConstant = 0.01,
      damping = 0.09
    )
  ) |>
  visLegend(
    addNodes = list(
      list(label = "Импрессионисты и постимпрессионисты", 
           shape = "dot", size = 15, color = "lightblue"),
      list(label = "Старые мастера и теоретики цвета", 
           shape = "dot", size = 15, color = "orange"),
      list(label = "Барбизонская школа и голландцы", 
           shape = "dot", size = 15, color = "red")
    ),
    useGroups = FALSE,
    width = 0.2,
    position = "right"
  )

network
Code
visSave(network, file = "vangogh_influence_network.html", selfcontained = TRUE)
Code
# После кластеризации
communities <- cluster_louvain(g)
modularity(communities)
[1] 0.2069808
Code
# Вершины и рёбра
vcount(g)  # количество вершин (художников)
[1] 27
Code
ecount(g)  # количество рёбер (связей)
[1] 118
Code
# Подробнее
cat("Вершин:", vcount(g), "\n")
Вершин: 27 
Code
cat("Рёбер:", ecount(g), "\n")
Рёбер: 118 
Code
cat("Плотность:", edge_density(g), "\n")
Плотность: 0.3361823 
Code
# Посмотри состав кластеров
nodes |>
  select(id, group, total_mentions) |>
  arrange(group, desc(total_mentions))
             id group total_mentions
1       Gauguin     1            492
2       Bernard     1            178
3      Pissarro     1             39
4         Monet     1             35
5       Cézanne     1             19
6         Degas     1             18
7        Seurat     1             14
8       Lautrec     1             10
9        Renoir     1              9
10       Sisley     1              5
11    Delacroix     2            177
12      Daumier     2             74
13       Giotto     2             20
14       Signac     2             19
15 Michelangelo     2             12
16       Millet     2              9
17        Mauve     3            318
18    Rembrandt     3            118
19       Breton     3            100
20        Corot     3             86
21     Daubigny     3             57
22 Weissenbruch     3             29
23     Rousseau     3             25
24         Hals     3             24
25       Mesdag     3             22
26       Rubens     3             21
27      Courbet     3             13
Code
nodes |>
  select(id, group, total_mentions) |>
  arrange(group, desc(total_mentions)) |>
  head(30)
             id group total_mentions
1       Gauguin     1            492
2       Bernard     1            178
3      Pissarro     1             39
4         Monet     1             35
5       Cézanne     1             19
6         Degas     1             18
7        Seurat     1             14
8       Lautrec     1             10
9        Renoir     1              9
10       Sisley     1              5
11    Delacroix     2            177
12      Daumier     2             74
13       Giotto     2             20
14       Signac     2             19
15 Michelangelo     2             12
16       Millet     2              9
17        Mauve     3            318
18    Rembrandt     3            118
19       Breton     3            100
20        Corot     3             86
21     Daubigny     3             57
22 Weissenbruch     3             29
23     Rousseau     3             25
24         Hals     3             24
25       Mesdag     3             22
26       Rubens     3             21
27      Courbet     3             13