Ethics & Privacy

Važno: Ova analiza je sprovedena u cilju istraživanja propagandnih obrazaca i javnog diskursa, a ne targetiranja pojedinaca.

Principi: - Lični podaci (user name-ovi, telefoni, adrese) se ne objavljuju - Rezultati su agregirani; grupe su anonimizovane (npr. “Group A”, “Group B”) - Puni tekstovi poruka sa govorom mržnje se ne prikazuju u izlazu - Prikazani su samo anonimizovani isečci (20-30 reči) ili ključni termini - Javni kanali mogu biti identifikovani samo uz eksplicitnu dozvolu korisnika

Setup i Učitavanje Podataka

# Funkcija za brzo učitavanje CSV-a
load_telegram_data <- function(path, 
                               date_filter = NULL,  # "YYYY-MM-DD" format
                               keyword_filter = NULL,  # vektor ključnih reči
                               max_rows = NULL) {  # limit broja redova
    
  cat("Učitavanje podataka iz:", path, "\n")
  
  # Provera postojanja fajla
  if (!file.exists(path)) {
    stop("CSV fajl nije pronađen na putanji: ", path)
  }
  
  # Učitavanje sa data.table::fread (brzo)
  cat("Učitavanje CSV fajla...\n")
  start_time <- Sys.time()
  
  # Učitaj samo kolone koje su potrebne (optimizacija)
  # Prvo pročitaj samo prvi red da vidimo kolone
  sample <- data.table::fread(path, nrows = 1000)
  col_names <- names(sample)
  
  cat("Pronađene kolone:", paste(col_names, collapse = ", "), "\n")
  
  # Mapiranje kolona (robustno)
  col_mapping <- list(
    group_id = c("group_id", "group_name", "channel", "group", "source"),
    post_id = c("post_id", "id", "message_id", "msg_id"),
    date_time = c("date_time", "timestamp", "date", "time", "created_at", "datetime"),
    text = c("text", "message", "content", "body", "post_text"),
    forwarded_from = c("forwarded_from", "forward_from", "forward_source"),
    reply_to = c("reply_to", "reply_to_id", "reply"),
    views = c("views", "view_count", "view"),
    links = c("links", "urls", "link"),
    media_type = c("media_type", "media", "attachment_type"),
    sender = c("sender", "author", "user", "username", "from")
  )
  
  # Pronađi stvarne nazive kolona
  actual_cols <- list()
  for (var_name in names(col_mapping)) {
    found <- intersect(col_mapping[[var_name]], col_names)
    if (length(found) > 0) {
      actual_cols[[var_name]] <- found[1]
      cat("Mapirano:", var_name, "->", found[1], "\n")
    } else {
      actual_cols[[var_name]] <- NA
      cat("Nije pronađeno:", var_name, "\n")
    }
  }
  
  # Učitaj puni dataset
  if (is.null(max_rows)) {
    dt <- data.table::fread(path, encoding = "UTF-8", showProgress = TRUE)
  } else {
    dt <- data.table::fread(path, nrows = max_rows, encoding = "UTF-8", showProgress = TRUE)
  }
  
  # Preimenuj kolone na standardne nazive
  for (var_name in names(actual_cols)) {
    if (!is.na(actual_cols[[var_name]])) {
      data.table::setnames(dt, actual_cols[[var_name]], var_name)
    }
  }
  
  # Kreiraj standardne kolone ako ne postoje
  if (!"group_id" %in% names(dt)) {
    if ("group_name" %in% names(dt)) {
      dt[, group_id := group_name]
    } else {
      dt[, group_id := paste0("Group_", seq_len(.N))]
    }
  }
  
  if (!"post_id" %in% names(dt)) {
    dt[, post_id := seq_len(.N)]
  }
  
  if (!"text" %in% names(dt)) {
    stop("Kritična kolona 'text' nije pronađena!")
  }
  
  # Parsiranje datuma
  if ("date_time" %in% names(dt)) {
    # Pokušaj različite formate
    dt[, date_time_parsed := as.POSIXct(NA)]
    
    # Pokušaj različite formate
    formats <- c(
      "%Y-%m-%d %H:%M:%S",
      "%Y-%m-%dT%H:%M:%S",
      "%Y-%m-%d",
      "%d.%m.%Y %H:%M:%S",
      "%d/%m/%Y %H:%M:%S"
    )
    
    for (fmt in formats) {
      parsed <- as.POSIXct(dt$date_time, format = fmt, tz = "Europe/Belgrade")
      dt[is.na(date_time_parsed) & !is.na(parsed), date_time_parsed := parsed]
    }
    
    dt[, date := as.Date(date_time_parsed)]
  } else {
    dt[, date := Sys.Date()]  # Fallback
    dt[, date_time_parsed := as.POSIXct(Sys.time())]
  }
  
  # Filter po datumu
  if (!is.null(date_filter)) {
    filter_date <- as.Date(date_filter)
    dt <- dt[date >= filter_date]
    cat("Filtrirano po datumu >= ", date_filter, ": ", nrow(dt), " redova\n")
  }
  
  # Filter po ključnim rečima
  if (!is.null(keyword_filter) && length(keyword_filter) > 0) {
    pattern <- paste(keyword_filter, collapse = "|")
    dt <- dt[grepl(pattern, text, ignore.case = TRUE)]
    cat("Filtrirano po ključnim rečima: ", nrow(dt), " redova\n")
  }
  
  # Dodaj redni broj
  dt[, row_id := seq_len(.N)]
  
  end_time <- Sys.time()
  cat("Učitano", nrow(dt), "redova za", 
      round(as.numeric(end_time - start_time, units = "secs"), 2), "sekundi\n")
  
  return(dt)
}

# Učitaj podatke
# Opcija 1: Učitaj sve (može biti sporo za 302 MB)
# dt <- load_telegram_data(CSV_PATH)

# Opcija 2: Učitaj samo poslednjih N dana (brže za testiranje)
# dt <- load_telegram_data(CSV_PATH, date_filter = "2024-01-01")

# Opcija 3: Učitaj samo postove sa ključnim rečima
# dt <- load_telegram_data(CSV_PATH, keyword_filter = c("Kragujevac", "migrant", "protest"))

# Opcija 4: Učitaj ograničen broj redova za testiranje
# dt <- load_telegram_data(CSV_PATH, max_rows = 100000)

# TODO: Odkomentariši jednu od opcija iznad ili prilagodi parametre
# Za produkciju, koristi Opciju 1 ili 2

# Privremeno: učitaj sample za testiranje (promeni ovo!)
dt <- load_telegram_data(CSV_PATH, max_rows = 50000)
## Učitavanje podataka iz: D:/MIGRANTI/telegram-migranti.csv 
## Učitavanje CSV fajla...
## Pronađene kolone: id, date, message, editDate, views, forwards, messageLink, pinned, fwdFrom, viaBotId, postAuthor, replies, groupedId, reactions._👍, reactions._❤, reactions._🔥, reactions._🥰, post, entities, restrictionReason, channel, className, action, reactions._🙏, reactions._👏, reactions._💯, reactions._😁, reactions._🤡, reactions._🤣, reactions._🖕, reactions._🎉, reactions._🏆, reactions._👌, reactions, reactions._😢, groupedId.value, reactions._🙈, reactions._💊, reactions._🍾, reactions._🥱, reactions._👎, reactions._😈, reactions._😨, reactions._👀, reactions._🤬, reactions._🤔, reactions._🫡, reactions._🤩, reactions._😭, reactions._🤨, reactions._🍌, reactions._🤯, reactions._🤓, reactions._🤷‍♂, reactions._🤪, reactions._🕊, reactions._😡, reactions._🌭, reactions._❤‍🔥, reactions._⚡, reactions._🥴, fwdFrom.fwd_channel, fwdFrom.fwd_message, reactions._😱, reactions._🗿, reactions._🦄, reactions._🐳, reactions._🤝, reactions._🌚, reactions._😍, reactions._🙉, reactions._✍, reactions._🆒, reactions._😇, reactions._👻, reactions._😎, reactions._💅, reactions._🙊, reactions._👾, reactions._🤗, reactions._🤷, reactions._😐, reactions._🎃, reactions._🤷‍♀, reactions._😴, reactions._☃, reactions._🎄, reactions._😘, reactions._👨‍💻, reactions._🎅, reactions._💔, reactions._💋, reactions._🍓, reactions._💘, reactions._💩, reactions._🤮, viaBotId.value 
## Mapirano: group_id -> channel 
## Mapirano: post_id -> id 
## Mapirano: date_time -> date 
## Mapirano: text -> message 
## Nije pronađeno: forwarded_from 
## Nije pronađeno: reply_to 
## Mapirano: views -> views 
## Nije pronađeno: links 
## Nije pronađeno: media_type 
## Nije pronađeno: sender 
## Učitano 50000 redova za 0.79 sekundi
cat("\n=== PREGLED DATASETA ===\n")
## 
## === PREGLED DATASETA ===
cat("Broj redova:", nrow(dt), "\n")
## Broj redova: 50000
cat("Broj kolona:", ncol(dt), "\n")
## Broj kolona: 100
cat("Kolone:", paste(names(dt), collapse = ", "), "\n")
## Kolone: post_id, date_time, text, editDate, views, forwards, messageLink, pinned, fwdFrom, viaBotId, postAuthor, replies, groupedId, reactions._👍, reactions._❤, reactions._🔥, reactions._🥰, post, entities, restrictionReason, group_id, className, action, reactions._🙏, reactions._👏, reactions._💯, reactions._😁, reactions._🤡, reactions._🤣, reactions._🖕, reactions._🎉, reactions._🏆, reactions._👌, reactions, reactions._😢, groupedId.value, reactions._🙈, reactions._💊, reactions._🍾, reactions._🥱, reactions._👎, reactions._😈, reactions._😨, reactions._👀, reactions._🤬, reactions._🤔, reactions._🫡, reactions._🤩, reactions._😭, reactions._🤨, reactions._🍌, reactions._🤯, reactions._🤓, reactions._🤷‍♂, reactions._🤪, reactions._🕊, reactions._😡, reactions._🌭, reactions._❤‍🔥, reactions._⚡, reactions._🥴, fwdFrom.fwd_channel, fwdFrom.fwd_message, reactions._😱, reactions._🗿, reactions._🦄, reactions._🐳, reactions._🤝, reactions._🌚, reactions._😍, reactions._🙉, reactions._✍, reactions._🆒, reactions._😇, reactions._👻, reactions._😎, reactions._💅, reactions._🙊, reactions._👾, reactions._🤗, reactions._🤷, reactions._😐, reactions._🎃, reactions._🤷‍♀, reactions._😴, reactions._☃, reactions._🎄, reactions._😘, reactions._👨‍💻, reactions._🎅, reactions._💔, reactions._💋, reactions._🍓, reactions._💘, reactions._💩, reactions._🤮, viaBotId.value, date_time_parsed, date, row_id
cat("Vremenski raspon:", 
    if ("date" %in% names(dt)) paste(range(dt$date, na.rm = TRUE), collapse = " - ") else "N/A", 
    "\n")
## Vremenski raspon: 2021-09-24 - 2025-12-12

Data Cleaning & Preprocessing

# Funkcija za čišćenje teksta
clean_text <- function(text) {
  if (is.null(text) || length(text) == 0) return(character(0))
  
  text <- as.character(text)
  
  # Ukloni NA
  text[is.na(text)] <- ""
  
  # Normalizacija latinice/ćirilice (osnovna)
  # TODO: Dodaj kompletnu transliteraciju ako je potrebno
  
  # Ukloni URL-ove
  text <- str_replace_all(text, "https?://[\\S]+", " ")
  text <- str_replace_all(text, "www\\.[\\S]+", " ")
  
  # Ukloni email adrese
  text <- str_replace_all(text, "[\\S]+@[\\S]+", " ")
  
  # Ukloni telefone (osnovni pattern)
  text <- str_replace_all(text, "\\+?[0-9]{8,}", " ")
  
  # Ukloni višestruke razmake
  text <- str_replace_all(text, "\\s+", " ")
  
  # Trim
  text <- str_trim(text)
  
  return(text)
}

# Čišćenje teksta
dt[, text_clean := clean_text(text)]

# Ukloni prazne postove
dt <- dt[text_clean != "" & nchar(text_clean) > 5]

# Dodaj dodatne kolone
dt[, text_length := nchar(text_clean)]
dt[, word_count := str_count(text_clean, "\\S+")]

# Anonimizacija grupa (osim ako su javni kanali)
if ("group_id" %in% names(dt)) {
  # Kreiraj anonimizovane nazive
  unique_groups <- unique(dt$group_id)
  group_mapping <- data.table(
    group_id = unique_groups,
    group_anon = paste0("Group_", LETTERS[seq_along(unique_groups)])
  )
  dt <- data.table::merge.data.table(dt, group_mapping, by = "group_id", all.x = TRUE)
} else {
  dt[, group_anon := "Group_A"]
}

cat("Očišćeno:", nrow(dt), "postova\n")
## Očišćeno: 34609 postova
cat("Broj grupa:", length(unique(dt$group_anon)), "\n")
## Broj grupa: 3

Exploratory Analysis

# Opis dataseta
cat("=== OPIS DATASETA ===\n")
## === OPIS DATASETA ===
cat("Ukupan broj postova:", nrow(dt), "\n")
## Ukupan broj postova: 34609
cat("Broj jedinstvenih grupa:", length(unique(dt$group_anon)), "\n")
## Broj jedinstvenih grupa: 3
if ("date" %in% names(dt)) {
  cat("Vremenski raspon:", 
      min(dt$date, na.rm = TRUE), "do", 
      max(dt$date, na.rm = TRUE), "\n")
}
## Vremenski raspon: 18894 do 20434
# Broj postova po grupama
posts_by_group <- dt[, .N, by = group_anon][order(-N)]
print(posts_by_group)
##    group_anon     N
##        <char> <int>
## 1:    Group_A 24233
## 2:    Group_C  7576
## 3:    Group_B  2800
# Trend postova po danu
if ("date" %in% names(dt)) {
  posts_by_date <- dt[!is.na(date), .N, by = date][order(date)]
  
  p1 <- ggplot(posts_by_date, aes(x = date, y = N)) +
    geom_line(color = "steelblue", linewidth = 1) +
    geom_point(color = "steelblue", alpha = 0.6) +
    labs(
      title = "Broj postova po danu",
      x = "Datum",
      y = "Broj postova"
    ) +
    theme_minimal() +
    theme(plot.title = element_text(size = 14, face = "bold"))
  
  print(p1)
  
  # Trend po nedelji
  dt[!is.na(date), week := lubridate::floor_date(date, "week")]
  posts_by_week <- dt[!is.na(week), .N, by = week][order(week)]
  
  p2 <- ggplot(posts_by_week, aes(x = week, y = N)) +
    geom_line(color = "darkgreen", linewidth = 1) +
    geom_point(color = "darkgreen", alpha = 0.6) +
    labs(
      title = "Broj postova po nedelji",
      x = "Nedelja",
      y = "Broj postova"
    ) +
    theme_minimal() +
    theme(plot.title = element_text(size = 14, face = "bold"))
  
  print(p2)
}

Fokus na Kragujevac

# Lista ključnih reči za Kragujevac
kragujevac_keywords <- c(
  "Kragujevac", "Kragujevcu", "Kragujevca", "Kragujevcem",
  "KG", "kg", "K.G.",
  "Šumadija", "Šumadije", "Šumadiji", "Šumadijom",
  "Stanovo", "Stanova",
  "Aerodrom", "Aerodroma",
  "Centar", "Centru", "Centra",
  "Kragujevčani", "Kragujevčanima"
)

# TODO: Dodaj dodatne lokacije/pojmove ako je potrebno

# Detekcija pominjanja Kragujevca
detect_kragujevac <- function(text, keywords = kragujevac_keywords) {
  pattern <- paste(keywords, collapse = "|")
  grepl(pattern, text, ignore.case = TRUE)
}

dt[, mentions_kg := grepl(paste(kragujevac_keywords, collapse = "|"), 
                          text_clean, ignore.case = TRUE)]

cat("=== KRAGUJEVAC MENTIONS ===\n")
## === KRAGUJEVAC MENTIONS ===
cat("Broj postova koji pominju Kragujevac:", sum(dt$mentions_kg, na.rm = TRUE), "\n")
## Broj postova koji pominju Kragujevac: 37
cat("Procenat:", round(100 * mean(dt$mentions_kg, na.rm = TRUE), 2), "%\n")
## Procenat: 0.11 %
# Trend pominjanja kroz vreme
if ("date" %in% names(dt)) {
  kg_by_date <- dt[!is.na(date) & mentions_kg == TRUE, .N, by = date][order(date)]
  
  if (nrow(kg_by_date) > 0) {
    p3 <- ggplot(kg_by_date, aes(x = date, y = N)) +
      geom_line(color = "red", linewidth = 1) +
      geom_point(color = "red", alpha = 0.6) +
      labs(
        title = "Pominjanje Kragujevca kroz vreme",
        x = "Datum",
        y = "Broj postova"
      ) +
      theme_minimal() +
      theme(plot.title = element_text(size = 14, face = "bold"))
    
    print(p3)
  }
}

# Pominjanje po grupama
kg_by_group <- dt[mentions_kg == TRUE, .N, by = group_anon][order(-N)]
if (nrow(kg_by_group) > 0) {
  cat("\nPominjanje Kragujevca po grupama:\n")
  print(kg_by_group)
  
  p4 <- ggplot(kg_by_group, aes(x = reorder(group_anon, N), y = N)) +
    geom_bar(stat = "identity", fill = "coral") +
    coord_flip() +
    labs(
      title = "Pominjanje Kragujevca po grupama",
      x = "Grupa",
      y = "Broj postova"
    ) +
    theme_minimal() +
    theme(plot.title = element_text(size = 14, face = "bold"))
  
  print(p4)
}
## 
## Pominjanje Kragujevca po grupama:
##    group_anon     N
##        <char> <int>
## 1:    Group_A    19
## 2:    Group_C    11
## 3:    Group_B     7

Detekcija Organizovanja Protesta / Call-to-Action

# Rečnik CTA fraza
cta_phrases <- c(
  "okupljanje", "okupljanja", "okupljanju",
  "protest", "protesta", "protestu", "proteste",
  "večeras u", "večeras u", "sutra u", "sutra u",
  "ponesite", "ponesite", "ponesite",
  "blokada", "blokade", "blokadi",
  "skup", "skupa", "skupu", "skupovi",
  "dođite", "dođi", "dođimo", "dođite",
  "share", "delite", "podelite", "podeli",
  "hitno", "hitno", "urgentno",
  "mobilizacija", "mobilizacije", "mobilizaciji",
  "u \\d+ sati", "u \\d+:\\d+",  # vreme
  "ispred", "ispred",
  "kod", "kod",
  "na trgu", "na trgu",
  "ispred opštine", "ispred opštine",
  "sastanak", "sastanka", "sastanku",
  "demonstracije", "demonstracija",
  "marš", "marša", "maršu"
)

# TODO: Dodaj dodatne CTA fraze ako je potrebno

# Funkcija za izračunavanje CTA score-a
calculate_cta_score <- function(text) {
  if (is.na(text) || text == "") return(0)
  
  score <- 0
  
  # Broj CTA fraza
  for (phrase in cta_phrases) {
    matches <- str_count(text, regex(phrase, ignore_case = TRUE))
    score <- score + matches
  }
  
  # Detekcija vremena (pattern: "u 18:00", "u 18 sati", "sutra u 20")
  time_patterns <- c(
    "u \\d{1,2}:\\d{2}",
    "u \\d{1,2} sati",
    "u \\d{1,2}h"
  )
  for (pattern in time_patterns) {
    if (grepl(pattern, text, ignore.case = TRUE)) {
      score <- score + 2
    }
  }
  
  # Detekcija datuma (pattern: "sutra", "danas", "večeras")
  date_words <- c("sutra", "danas", "večeras", "prekosutra")
  for (word in date_words) {
    if (grepl(paste0("\\b", word, "\\b"), text, ignore.case = TRUE)) {
      score <- score + 1
    }
  }
  
  # Detekcija lokacije (pattern: "ispred", "kod", "na")
  location_words <- c("ispred", "kod", "na trgu", "na", "u")
  location_count <- sum(sapply(location_words, function(w) {
    grepl(paste0("\\b", w, "\\b"), text, ignore.case = TRUE)
  }))
  if (location_count > 0) {
    score <- score + min(location_count, 3)
  }
  
  # Uzbunjivanje (ALL CAPS, uzvičnici)
  caps_ratio <- str_count(text, "[A-ZА-Я]") / max(nchar(text), 1)
  if (caps_ratio > 0.3) {
    score <- score + 1
  }
  
  exclamation_count <- str_count(text, "!")
  if (exclamation_count > 2) {
    score <- score + min(exclamation_count - 2, 3)
  }
  
  return(score)
}

# Izračunaj CTA score
dt[, cta_score := sapply(text_clean, calculate_cta_score)]

# Flag za "event-like" postove
dt[, flag_cta := cta_score >= 3]  # Threshold može se prilagoditi

cat("=== CTA DETEKCIJA ===\n")
## === CTA DETEKCIJA ===
cat("Broj postova sa CTA score >= 3:", sum(dt$flag_cta, na.rm = TRUE), "\n")
## Broj postova sa CTA score >= 3: 189
cat("Prosečan CTA score:", round(mean(dt$cta_score, na.rm = TRUE), 2), "\n")
## Prosečan CTA score: 0.04
# TOP CTA postovi (bez punog teksta - samo anonimizovani isečci)
top_cta <- dt[flag_cta == TRUE][order(-cta_score)][1:min(20, sum(dt$flag_cta))]
top_cta[, text_preview := substr(text_clean, 1, 100)]  # Prvih 100 karaktera

cat("\nTOP 10 CTA postova (preview):\n")
## 
## TOP 10 CTA postova (preview):
print(top_cta[, .(group_anon, date, cta_score, text_preview)])
##     group_anon       date cta_score
##         <char>     <Date>     <num>
##  1:    Group_A 2022-10-16         9
##  2:    Group_A 2025-03-04         6
##  3:    Group_B 2024-07-10         6
##  4:    Group_A 2025-08-13         5
##  5:    Group_A 2025-06-21         5
##  6:    Group_A 2025-05-23         5
##  7:    Group_A 2025-04-26         5
##  8:    Group_C 2025-05-05         4
##  9:    Group_C 2025-02-26         4
## 10:    Group_C 2025-01-19         4
## 11:    Group_A 2024-10-01         4
## 12:    Group_A 2024-05-11         4
## 13:    Group_A 2023-11-16         4
## 14:    Group_A 2023-11-15         4
## 15:    Group_A 2023-10-03         4
## 16:    Group_A 2023-07-02         4
## 17:    Group_B 2025-08-13         4
## 18:    Group_B 2025-04-01         4
## 19:    Group_B 2024-05-28         4
## 20:    Group_B 2023-11-08         4
##     group_anon       date cta_score
##                                                                                             text_preview
##                                                                                                   <char>
##  1: 💥Данас је одржан још један масовни скуп у Молдавији, а увече је ”демократска” председница Маја Санду
##  2: ⚡️⚡️⚡️⚡️⚡️⚡️ NAJVECA GRESKA koju je ""kolektivni zapad"" uradio u 21. veku je sto je naterao ruske n
##  3:  SUTRA BRANIMO USTAVNI POREDAK ISPRED USTAVNOG SUDA, U 8 UJUTRO ‼️ Ustavni sud nije zaštitio državno 
##  4: 🌍 Pridružite se kanalu Stevan II, koji vodi čovek iz Rusije ! 🇷🇺 Geopolitika, vesti, analize i malo 
##  5: 🌍 Pridružite se kanalu Stevan II, koji vodi čovek iz Rusije ! 🇷🇺 Geopolitika, vesti, analize i malo 
##  6: 🌍 Pridružite se kanalu Stevan II, koji vodi čovek iz Rusije ! 🇷🇺 Geopolitika, vesti, analize i malo 
##  7: 🌍 Pridružite se kanalu Stevan II, koji vodi čovek iz Rusije ! 🇷🇺 Geopolitika, vesti, analize i malo 
##  8: 🇭🇺🇪🇺🇺🇸 Tramp predložio Orbanu da Mađarska izađe iz Evropske unije 💬 Mađarski premijer Viktor Orban i
##  9: Ајмо браћо и сестре да помогнемо, Срби су вазда били сложни за овакве ствари време је за нову мини б
## 10: 🇷🇸🚨 Beograd - Protest studenata ispred Ministarstva prosvete 🔗 Čitajte i zapratite nas: Nulta Tačka 
## 11: 📌КОЛАПС ИЗРАЕЛСКЕ ОДБРАНЕ!!!!!📌 🇷🇸#Српски 🇷🇸 #Сербский ⚡️⚡️⚡️⚡️⚡️👇👇👇 📱 InfoDefenseSERBIA 📱 InfoDefen
## 12: 🔈 Nulta Tačka Vaš SIGURNI izvor najnovijih informacija kojih nema u mejnstrim medijima 🖥 Nulta Tačka
## 13:  ⭕️ Nulta Tačka Vaš SIGURNI izvor najnovijih informacija kojih nema u mejnstrim medijima ⭕️ 👨‍💻 Porta
## 14:  ⭕️ Nulta Tačka Vaš SIGURNI izvor najnovijih informacija kojih nema u mejnstrim medijima ⭕️ 👨‍💻 Porta
## 15:        Dobar dan Bilja. Htelabi Vas zamoliti, da podelite link kanala Srpskih dobrovoljaca u Rusiji.
## 16: Јуриј Подољака преноси извештај @wargonzo ⚡️Ситуација у правцу Орехов-Работино u 13:00 по московском
## 17:                      Migrant kod ekonomskog divlja nozem i udara po drvecu u parku, plasi prolaznike
## 18:                              🎥 SNS degenerik koji danas zamalo nije pregazio studentkinju kod FON a.
## 19: 🇷🇸 Dr James Thorp kaže da je od Covid-19 ""vakcina"" do sada ubijeno ili teško povređeno više od 585
## 20: PODELITE! DA NAROD VIDI ISTINU! Priština: Vučićeva Srpska lista položila zakletvu Kosovu, pa odala p
##                                                                                             text_preview
# Trend CTA score kroz vreme
cta_by_date <- data.table()  # Inicijalizacija
if ("date" %in% names(dt)) {
  cta_by_date <- dt[!is.na(date), .(avg_score = mean(cta_score, na.rm = TRUE), 
                                     count = sum(flag_cta, na.rm = TRUE)), 
                    by = date][order(date)]
  
  if (nrow(cta_by_date) > 0) {
    p5 <- ggplot(cta_by_date, aes(x = date)) +
      geom_line(aes(y = avg_score), color = "blue", linewidth = 1) +
      geom_line(aes(y = count / max(count, na.rm = TRUE) * max(avg_score, na.rm = TRUE)), 
                color = "red", linewidth = 1, linetype = "dashed") +
      scale_y_continuous(
        name = "Prosečan CTA score",
        sec.axis = sec_axis(~ . / max(cta_by_date$avg_score, na.rm = TRUE) * max(cta_by_date$count, na.rm = TRUE),
                            name = "Broj CTA postova")
      ) +
      labs(
        title = "Trend CTA score i broj CTA postova",
        x = "Datum"
      ) +
      theme_minimal() +
      theme(plot.title = element_text(size = 14, face = "bold"))
    
    print(p5)
  }
}

# Identifikacija "talasa" (pikovi)
if ("date" %in% names(dt) && nrow(cta_by_date) > 0) {
  # Pronađi dane sa iznadprosečnim CTA aktivnostima
  threshold <- quantile(cta_by_date$count, 0.9, na.rm = TRUE)
  peaks <- cta_by_date[count >= threshold]
  
  cat("\n=== TOP TALASI (Pikovi) ===\n")
  if (nrow(peaks) > 0) {
    print(peaks[order(-count)])
  }
}
## 
## === TOP TALASI (Pikovi) ===
##            date avg_score count
##          <Date>     <num> <int>
##   1: 2022-10-13 0.2307692     6
##   2: 2025-06-15 0.1323529     3
##   3: 2022-10-14 0.2800000     2
##   4: 2022-10-16 2.1428571     2
##   5: 2023-10-09 0.1944444     2
##  ---                           
## 154: 2025-11-29 0.1111111     1
## 155: 2025-11-30 0.1304348     1
## 156: 2025-12-07 0.1153846     1
## 157: 2025-12-08 0.1034483     1
## 158: 2025-12-11 0.1666667     1

Propagandni Narativi i Teme

# Priprema korpusa za quanteda
corpus <- quanteda::corpus(dt$text_clean, docvars = data.frame(
  group = dt$group_anon,
  date = dt$date,
  cta_score = dt$cta_score,
  mentions_kg = dt$mentions_kg
))

# Tokenizacija
tokens_obj <- quanteda::tokens(
  corpus,
  remove_punct = TRUE,
  remove_numbers = TRUE,
  remove_symbols = TRUE,
  remove_url = TRUE
)

# Ukloni stopwords (srpski + engleski)
# Pokušaj da učitaš srpski stopwords, ako ne postoji koristi samo engleski
sr_stopwords <- tryCatch({
  quanteda::stopwords("sr")
}, error = function(e) {
  character(0)
})

custom_stopwords <- c(
  sr_stopwords,
  quanteda::stopwords("en"),
  "ovo", "to", "što", "šta", "koji", "koja", "koje",
  "je", "su", "bi", "će", "ćemo", "ćete", "ćeš", "ću",
  "sam", "si", "smo", "ste", "su", "jesam", "jesi", "jesmo", "jeste", "jesu",
  "ću", "ćeš", "će", "ćemo", "ćete", "će",
  "sam", "si", "je", "smo", "ste", "su",
  "bio", "bila", "bilo", "bili", "bile",
  "biti", "bit", "bi", "u", "na", "i", "su", "se", "nas", "i", "je", "za", "da"
)

tokens_obj <- quanteda::tokens_remove(tokens_obj, pattern = custom_stopwords)

# Kreiraj DFM
dfm_obj <- quanteda::dfm(tokens_obj)

# Top termini
top_terms <- quanteda::topfeatures(dfm_obj, n = 50)
cat("=== TOP 50 TERMINA ===\n")
## === TOP 50 TERMINA ===
print(head(top_terms, 20))
##                ⚡️                 у                 и                је 
##             80897             73890             73498             70150 
##                🇷🇸                да                на                се 
##             57984             55995             42293             38573 
##                су                за infodefenseserbia                од 
##             33420             24233             21568             18659 
##         #сербский           #српски       infodefense                са 
##             18519             18511             17694             15916 
##               нас                не                 а                ће 
##             14481             11058             10537             10094
# Bigrami
tokens_bigrams <- quanteda::tokens_ngrams(tokens_obj, n = 2)
dfm_bigrams <- quanteda::dfm(tokens_bigrams)
top_bigrams <- quanteda::topfeatures(dfm_bigrams, n = 30)
cat("\n=== TOP 30 BIGRAMA ===\n")
## 
## === TOP 30 BIGRAMA ===
print(head(top_bigrams, 15))
##                         ⚡️_⚡️                  🇷🇸_#сербский 
##                         56105                         18516 
##                    #српски_🇷🇸                    🇷🇸_#српски 
##                         18508                         18372 
## infodefenseserbia_infodefense                         🇷🇸_🇷🇸 
##                         17461                         10845 
##         нас_infodefenseserbia                   пратите_нас 
##                          9854                          9500 
##             #сербский_пратите          ⚡️_infodefenseserbia 
##                          9301                          9064 
##                  #сербский_⚡️                         да_се 
##                          9054                          7294 
##                оружаних_снага                   t.me_buntcg 
##                          7165                          6655 
##                         да_је 
##                          6011
# TF-IDF po grupama
dfm_grouped <- quanteda::dfm_group(dfm_obj, groups = quanteda::docvars(corpus, "group"))
tfidf_grouped <- quanteda::dfm_tfidf(dfm_grouped)

# Top TF-IDF termini po grupama
cat("\n=== TOP TF-IDF TERMINI PO GRUPAMA ===\n")
## 
## === TOP TF-IDF TERMINI PO GRUPAMA ===
for (grp in quanteda::docnames(tfidf_grouped)) {
  top_tfidf <- quanteda::topfeatures(tfidf_grouped[grp,], n = 10)
  cat("\n", grp, ":\n")
  print(top_tfidf)
}
## 
##  Group_A :
##   #сербский     #српски infodefense     палчеве    подољака         осу 
##   3260.6818   3259.2731   3115.4066    809.1976    645.0679    633.1399 
##     осташко     чат-бот  infodefall   рввоенкор 
##    585.9049    535.1413    532.5000    520.5393 
## 
##  Group_B :
## narodnapatrola            снс    principshop     србин.инфо       суботице 
##     215.359610      26.765871      22.187499      10.973789       9.332837 
##       белграде        дамњана          русов       paladins      subscribe 
##       9.065304       8.100198       7.633940       7.156819       6.679698 
## 
##  Group_C :
##             🇮🇷             🇮🇱 narodnapatrola        шиптари            снс 
##       27.11805       26.23760       18.31349       17.43303       16.90476 
##      шиптарски       приштини           иран      митровица      митровици 
##       16.90476       16.02430       14.79167       12.32639       11.97421
# Topic Modeling sa STM
cat("\n=== TOPIC MODELING ===\n")
## 
## === TOPIC MODELING ===
cat("Priprema podataka za topic modeling...\n")
## Priprema podataka za topic modeling...
# Konvertuj DFM u format za STM
dfm_stm <- quanteda::convert(dfm_obj, to = "stm")

# Odredi optimalan broj tema (može se prilagoditi)
K <- 10  # TODO: Prilagodi broj tema (8-15)

# STM model
stm_model <- stm::stm(
  documents = dfm_stm$documents,
  vocab = dfm_stm$vocab,
  K = K,
  data = dfm_stm$meta,
  init.type = "Spectral",
  seed = 12345
)
## Beginning Spectral Initialization 
##   Calculating the gram matrix...
##   Using only 10000 most frequent terms during initialization...
##   Finding anchor words...
##      ..........
##   Recovering initialization...
##      ....................................................................................................
## Initialization complete.
## ....................................................................................................
## Completed E-Step (8 seconds). 
## Completed M-Step. 
## Completing Iteration 1 (approx. per word bound = -8.574) 
## ....................................................................................................
## Completed E-Step (7 seconds). 
## Completed M-Step. 
## Completing Iteration 2 (approx. per word bound = -7.853, relative change = 8.408e-02) 
## ....................................................................................................
## Completed E-Step (7 seconds). 
## Completed M-Step. 
## Completing Iteration 3 (approx. per word bound = -7.737, relative change = 1.477e-02) 
## ....................................................................................................
## Completed E-Step (7 seconds). 
## Completed M-Step. 
## Completing Iteration 4 (approx. per word bound = -7.652, relative change = 1.096e-02) 
## ....................................................................................................
## Completed E-Step (7 seconds). 
## Completed M-Step. 
## Completing Iteration 5 (approx. per word bound = -7.601, relative change = 6.727e-03) 
## Topic 1: и, на, се, у, су 
##  Topic 2: infodefenseserbia, infodefense, #српски, је, и 
##  Topic 3: у, t.me, је, да, 🇷🇸 
##  Topic 4: је, да, и, у, за 
##  Topic 5: и, је, да, се, на 
##  Topic 6: у, су, је, 🇷🇸, од 
##  Topic 7: у, 🇷🇸, на, #сербский, нас 
##  Topic 8: да, на, се, у, за 
##  Topic 9: ⚡️, у, је, и, да 
##  Topic 10: 🇷🇸, и, да, се, 🇷🇺 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 6 (approx. per word bound = -7.569, relative change = 4.168e-03) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 7 (approx. per word bound = -7.550, relative change = 2.584e-03) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 8 (approx. per word bound = -7.536, relative change = 1.749e-03) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 9 (approx. per word bound = -7.527, relative change = 1.304e-03) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 10 (approx. per word bound = -7.519, relative change = 1.051e-03) 
## Topic 1: и, у, се, на, да 
##  Topic 2: infodefenseserbia, infodefense, #српски, #сербский, је 
##  Topic 3: у, t.me, је, 🇷🇸, на 
##  Topic 4: је, да, и, у, се 
##  Topic 5: и, је, да, се, у 
##  Topic 6: су, је, у, од, 🇷🇸 
##  Topic 7: 🇷🇸, на, у, нас, и 
##  Topic 8: да, на, у, се, и 
##  Topic 9: ⚡️, у, 🇷🇸, и, 🇷🇺 
##  Topic 10: 🇷🇸, и, ❤️, нас, истина 
## ....................................................................................................
## Completed E-Step (7 seconds). 
## Completed M-Step. 
## Completing Iteration 11 (approx. per word bound = -7.512, relative change = 9.022e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 12 (approx. per word bound = -7.506, relative change = 8.017e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 13 (approx. per word bound = -7.501, relative change = 7.003e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 14 (approx. per word bound = -7.496, relative change = 5.919e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 15 (approx. per word bound = -7.492, relative change = 5.013e-04) 
## Topic 1: у, и, се, на, да 
##  Topic 2: infodefenseserbia, infodefense, #српски, #сербский, пратите 
##  Topic 3: у, t.me, је, buntcg, 🇷🇸 
##  Topic 4: је, да, и, у, се 
##  Topic 5: и, је, да, у, се 
##  Topic 6: је, су, у, од, за 
##  Topic 7: 🇷🇸, на, у, и, нас 
##  Topic 8: да, у, и, на, се 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, у, и 
##  Topic 10: 🇷🇸, и, нас, ❤️, запратите 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 16 (approx. per word bound = -7.489, relative change = 4.405e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 17 (approx. per word bound = -7.486, relative change = 4.018e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 18 (approx. per word bound = -7.483, relative change = 3.596e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 19 (approx. per word bound = -7.481, relative change = 3.152e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 20 (approx. per word bound = -7.479, relative change = 2.861e-04) 
## Topic 1: у, и, на, се, да 
##  Topic 2: infodefenseserbia, #српски, infodefense, #сербский, пратите 
##  Topic 3: у, t.me, је, buntcg, 🇷🇸 
##  Topic 4: да, је, и, у, се 
##  Topic 5: и, је, да, у, се 
##  Topic 6: је, у, су, од, за 
##  Topic 7: 🇷🇸, на, у, и, нас 
##  Topic 8: да, у, и, на, је 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, у, infodefenseserbia 
##  Topic 10: 🇷🇸, нас, и, ❤️, запратите 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 21 (approx. per word bound = -7.477, relative change = 2.666e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 22 (approx. per word bound = -7.475, relative change = 2.518e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 23 (approx. per word bound = -7.473, relative change = 2.473e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 24 (approx. per word bound = -7.471, relative change = 2.464e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 25 (approx. per word bound = -7.470, relative change = 2.489e-04) 
## Topic 1: у, и, на, се, су 
##  Topic 2: infodefenseserbia, #српски, #сербский, infodefense, 🇷🇸 
##  Topic 3: у, t.me, buntcg, је, 🇷🇸 
##  Topic 4: да, је, и, у, се 
##  Topic 5: и, је, да, у, се 
##  Topic 6: је, у, су, од, и 
##  Topic 7: на, 🇷🇸, у, и, су 
##  Topic 8: да, у, и, је, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, infodefenseserbia, и 
##  Topic 10: 🇷🇸, и, ❤️, нас, запратите 
## ....................................................................................................
## Completed E-Step (7 seconds). 
## Completed M-Step. 
## Completing Iteration 26 (approx. per word bound = -7.468, relative change = 2.557e-04) 
## ....................................................................................................
## Completed E-Step (7 seconds). 
## Completed M-Step. 
## Completing Iteration 27 (approx. per word bound = -7.466, relative change = 2.626e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 28 (approx. per word bound = -7.464, relative change = 2.634e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 29 (approx. per word bound = -7.462, relative change = 2.488e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 30 (approx. per word bound = -7.460, relative change = 2.161e-04) 
## Topic 1: у, и, на, се, су 
##  Topic 2: 🇷🇸, #српски, #сербский, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, buntcg, 🇷🇸, је 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, да, се 
##  Topic 6: је, у, су, и, од 
##  Topic 7: у, на, и, су, је 
##  Topic 8: да, у, је, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, infodefenseserbia, и 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 31 (approx. per word bound = -7.459, relative change = 1.811e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 32 (approx. per word bound = -7.458, relative change = 1.505e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 33 (approx. per word bound = -7.457, relative change = 1.322e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 34 (approx. per word bound = -7.456, relative change = 1.174e-04) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 35 (approx. per word bound = -7.455, relative change = 9.754e-05) 
## Topic 1: у, и, на, се, су 
##  Topic 2: 🇷🇸, #српски, #сербский, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, buntcg, 🇷🇸, је 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, да, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, на, су, је 
##  Topic 8: да, је, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 36 (approx. per word bound = -7.455, relative change = 8.653e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 37 (approx. per word bound = -7.454, relative change = 7.670e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 38 (approx. per word bound = -7.453, relative change = 6.629e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 39 (approx. per word bound = -7.453, relative change = 6.076e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 40 (approx. per word bound = -7.453, relative change = 5.765e-05) 
## Topic 1: у, и, на, се, су 
##  Topic 2: 🇷🇸, #српски, #сербский, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, buntcg, 🇷🇸, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, да, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, на, је, су 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 41 (approx. per word bound = -7.452, relative change = 5.435e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 42 (approx. per word bound = -7.452, relative change = 4.779e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 43 (approx. per word bound = -7.451, relative change = 4.647e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 44 (approx. per word bound = -7.451, relative change = 4.384e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 45 (approx. per word bound = -7.451, relative change = 4.200e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #српски, #сербский, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, buntcg, 🇷🇸, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, да, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, на, су 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 46 (approx. per word bound = -7.451, relative change = 4.196e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 47 (approx. per word bound = -7.450, relative change = 4.018e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 48 (approx. per word bound = -7.450, relative change = 3.912e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 49 (approx. per word bound = -7.450, relative change = 3.738e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 50 (approx. per word bound = -7.449, relative change = 3.579e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, 🇷🇸, buntcg, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, да, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, на, су 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 51 (approx. per word bound = -7.449, relative change = 3.285e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 52 (approx. per word bound = -7.449, relative change = 3.112e-05) 
## ....................................................................................................
## Completed E-Step (6 seconds). 
## Completed M-Step. 
## Completing Iteration 53 (approx. per word bound = -7.449, relative change = 3.062e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 54 (approx. per word bound = -7.448, relative change = 2.995e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 55 (approx. per word bound = -7.448, relative change = 2.788e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, 🇷🇸, buntcg, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, да, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, на, су 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 56 (approx. per word bound = -7.448, relative change = 2.699e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 57 (approx. per word bound = -7.448, relative change = 2.545e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 58 (approx. per word bound = -7.448, relative change = 2.478e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 59 (approx. per word bound = -7.447, relative change = 2.457e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 60 (approx. per word bound = -7.447, relative change = 2.332e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, 🇷🇸, buntcg, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, да, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, на, су 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 61 (approx. per word bound = -7.447, relative change = 2.124e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 62 (approx. per word bound = -7.447, relative change = 2.013e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 63 (approx. per word bound = -7.447, relative change = 1.982e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 64 (approx. per word bound = -7.447, relative change = 1.934e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 65 (approx. per word bound = -7.447, relative change = 1.828e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, 🇷🇸, buntcg, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, да, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, су, на 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 66 (approx. per word bound = -7.446, relative change = 1.708e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 67 (approx. per word bound = -7.446, relative change = 1.629e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 68 (approx. per word bound = -7.446, relative change = 1.663e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 69 (approx. per word bound = -7.446, relative change = 1.701e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 70 (approx. per word bound = -7.446, relative change = 1.690e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, 🇷🇸, buntcg, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, на, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, су, на 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 71 (approx. per word bound = -7.446, relative change = 1.636e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 72 (approx. per word bound = -7.446, relative change = 1.599e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 73 (approx. per word bound = -7.446, relative change = 1.541e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 74 (approx. per word bound = -7.445, relative change = 1.484e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 75 (approx. per word bound = -7.445, relative change = 1.407e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, 🇷🇸, buntcg, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, на, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, су, на 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, infodefenseserbia 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 76 (approx. per word bound = -7.445, relative change = 1.393e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 77 (approx. per word bound = -7.445, relative change = 1.477e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 78 (approx. per word bound = -7.445, relative change = 1.424e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 79 (approx. per word bound = -7.445, relative change = 1.324e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 80 (approx. per word bound = -7.445, relative change = 1.280e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, 🇷🇸, buntcg, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, на, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, су, на 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, 🇷🇺, и, за 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 81 (approx. per word bound = -7.445, relative change = 1.249e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 82 (approx. per word bound = -7.445, relative change = 1.180e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 83 (approx. per word bound = -7.445, relative change = 1.214e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 84 (approx. per word bound = -7.444, relative change = 1.253e-05) 
## ....................................................................................................
## Completed E-Step (4 seconds). 
## Completed M-Step. 
## Completing Iteration 85 (approx. per word bound = -7.444, relative change = 1.249e-05) 
## Topic 1: у, и, на, су, се 
##  Topic 2: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense 
##  Topic 3: t.me, у, 🇷🇸, buntcg, и 
##  Topic 4: да, је, и, се, у 
##  Topic 5: и, је, у, на, се 
##  Topic 6: је, у, су, на, и 
##  Topic 7: у, и, је, су, на 
##  Topic 8: је, да, у, и, на 
##  Topic 9: ⚡️, 🇷🇸, и, 🇷🇺, за 
##  Topic 10: 🇷🇸, ❤️, и, запратите, чат-бот 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 86 (approx. per word bound = -7.444, relative change = 1.273e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 87 (approx. per word bound = -7.444, relative change = 1.195e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 88 (approx. per word bound = -7.444, relative change = 1.086e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Completing Iteration 89 (approx. per word bound = -7.444, relative change = 1.015e-05) 
## ....................................................................................................
## Completed E-Step (5 seconds). 
## Completed M-Step. 
## Model Converged
# Top reči po temi
cat("\n=== TOP REČI PO TEMI ===\n")
## 
## === TOP REČI PO TEMI ===
topic_labels <- stm::labelTopics(stm_model, n = 10)
print(topic_labels)
## Topic 1 Top Words:
##       Highest Prob: у, и, на, су, се, од, је, снага, снаге, оружаних 
##       FREX: напредовале, напредовали, крилу, клешејевке, бердичи, богдановке, вербовог, авдејевски, повукле, напредујући 
##       Lift: напредовале, @colonelassad, @infidefenseserbia, @starshii_pogrannaryada, @vdv_vistrel, @voenkors, 01-02.05.24, 03-04.05.24, 04-05.05.24, 05.29-30.24 
##       Score: оружане, правцу, оружаних, непријатељ, северно, борбе, правац, украјине, непријатеља, напредовање 
## Topic 2 Top Words:
##       Highest Prob: 🇷🇸, #сербский, #српски, infodefenseserbia, infodefense, пратите, нас, извор, ‼️, у 
##       FREX: силовик, ирна, 🇵🇸, либана, ✡️, ☪️, либан, 🇱🇧, @irna_ru, јемена 
##       Lift: @zparabellummd, af, iranist, владлену, галичина, јасин, јунису, катарски, осама, рафе 
##       Score: #српски, #сербский, infodefense, пратите, infodefenseserbia, 🇷🇸, ‼️, нас, рввоенкор, т.ме 
## Topic 3 Top Words:
##       Highest Prob: t.me, у, 🇷🇸, buntcg, и, је, на, се, су, да 
##       FREX: вучић, bunkersrb, саду, студената, ухапшен, факултета, шиптари, приштини, митровици, снс-а 
##       Lift: #закон, #промена_пола, ✅️, 1991-1995, borislav, choose, cy, eludba, emerald, fighter 
##       Score: t.me, buntcg, narodnapatrola, србије, српске, саду, вучић, београду, србија, студенти 
## Topic 4 Top Words:
##       Highest Prob: да, је, и, се, у, не, то, на, ће, а 
##       FREX: залужни, резидент, проблем, п.с, јасно, дешава, једноставно, чак, много, онда 
##       Lift: •‎, 1-3, 20а, 25а, 30-40, bradley, fp-5, god, march, participants 
##       Score: то, не, ће, да, подољака, ако, јуриј, али, сам, ли 
## Topic 5 Top Words:
##       Highest Prob: и, је, у, на, се, да, су, од, који, за 
##       FREX: светог, сестре, црква, православне, христа, христос, ⚪️, господ, манастир, православна 
##       Lift: #патриотизам, #сећање, #слободазаигора, #смрт, @infodefensemailbot, @kompasinfo_rs, @krozistoriju, ↪️, ✉️, 1941-1945 
##       Score: t.me, срби, ☦️, сестре, buntcg, српски, српска, светог, српске, свети 
## Topic 6 Top Words:
##       Highest Prob: је, у, су, на, и, се, од, да, који, за 
##       FREX: фсб, гладков, погинуло, сериала, гувернер, истражни, цивиле, направа, гелера, повређено 
##       Lift: #11марта, #wyf2024, #азов, #артемовскбахмут, #артјомовск, #бебе, #бившизатвореници, #биолошкелабораторије, #блокадаводе, #буча 
##       Score: ▪️, гувернер, повређено, људи, оружаних, украјине, је, пожар, повређених, гладков 
## Topic 7 Top Words:
##       Highest Prob: у, је, и, су, на, снага, украјине, области, оружаних, ▪️ 
##       FREX: пво, летелице, беспилотне, противваздушне, складиште, посада, ланцет, искандер, противваздушна, хаубице 
##       Lift: ланцет, #dejanpetar, #dejanpetarzlatanovic, #dejanpetarzlatanović, #sns, #srbininfo, #srđannogo, #srdjannogo, #studentiublokadi, #важное 
##       Score: летелица, ракета, беспилотних, пво, беспилотне, оружаних, летелице, ракете, ▪️, дронова 
## Topic 8 Top Words:
##       Highest Prob: је, да, у, и, на, за, се, са, ће, сад 
##       FREX: милијарди, сједињене, сједињених, кина, санкција, бајдена, орбан, блумберг, что, самиту 
##       Lift: бајденова, брикс-а, политико, фицо, кина, санкција, сједињене, сједињених, что, #army2022 
##       Score: трамп, еу, ▪️, долара, 🇺🇸, сад, путин, председник, русија, украјини 
## Topic 9 Top Words:
##       Highest Prob: ⚡️, 🇷🇸, и, 🇷🇺, за, се, у, infodefenseserbia, infodefense, је 
##       FREX: палчеве, помолимо, @divgen, новопавловски, сумски, з-комитет, држимо, момке, курски, селидово-кураховски 
##       Lift: #jurijpodoljaka, američka, bukvalno, crvenom, divgen, dva, gaze, huta, huti, ih 
##       Score: ⚡️, палчеве, држимо, помолимо, момке, #јуријподољака, infodefense, #српски, #сербский, infodefenseserbia 
## Topic 10 Top Words:
##       Highest Prob: 🇷🇸, ❤️, и, запратите, чат-бот, infodefall, нас, истина, infodefenseserbia, знање 
##       FREX: ❤️, чат-бот, infodefall, знање, слобода, моћ, објаву, проследите, буревестник, доушник 
##       Lift: 955а, andré, appcloud-а, collonelassad, debeljaca1vrs, ees, eмиграната, eмигранти, fpv-дрон, grmi 
##       Score: ❤️, знање, чат-бот, infodefall, запратите, проследите, истина, објаву, слобода, драги
# Proporcije tema
topic_props <- as.data.table(stm_model$theta)
colnames(topic_props) <- paste0("Topic_", 1:K)
topic_props[, doc_id := seq_len(.N)]

# Prosečne proporcije tema
avg_topic_props <- colMeans(topic_props[, 1:K, with = FALSE])
cat("\n=== PROSEČNE PROPORCIJE TEMA ===\n")
## 
## === PROSEČNE PROPORCIJE TEMA ===
print(sort(avg_topic_props, decreasing = TRUE))
##    Topic_3    Topic_4    Topic_2    Topic_8    Topic_1    Topic_7    Topic_9 
## 0.15959777 0.13169090 0.11997297 0.10966132 0.09768904 0.09486269 0.08846085 
##    Topic_5    Topic_6   Topic_10 
## 0.08529751 0.07686452 0.03590243
# Vizualizacija tema
p6 <- ggplot(data.table(
  Topic = paste0("Topic_", 1:K),
  Proportion = avg_topic_props
), aes(x = reorder(Topic, Proportion), y = Proportion)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Proporcije tema",
    x = "Tema",
    y = "Proporcija"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = "bold"))

print(p6)

Mrežna Analiza (Conditional)

# Mrežna analiza deljenja
if ("forwarded_from" %in% names(dt)) {
  cat("=== MREŽNA ANALIZA DELJENJA ===\n")
  
  # Kreiraj edge list (izvor -> grupa)
  edges <- dt[!is.na(forwarded_from) & forwarded_from != "", 
              .(from = forwarded_from, to = group_anon)]
  
  if (nrow(edges) > 0) {
    # Kreiraj graf
    g <- igraph::graph_from_data_frame(edges, directed = TRUE)
    
    # Centralnosti
    degree_centrality <- igraph::degree(g, mode = "in")
    betweenness_centrality <- igraph::betweenness(g)
    
    # Top izvori (najviše deljeno)
    top_sources <- sort(degree_centrality, decreasing = TRUE)[1:min(10, length(degree_centrality))]
    cat("\nTop izvori (najviše deljeno):\n")
    print(head(top_sources, 10))
    
    # Top hub grupe
    top_hubs <- sort(igraph::degree(g, mode = "out"), decreasing = TRUE)[1:min(10, length(igraph::degree(g, mode = "out")))]
    cat("\nTop hub grupe (najviše deli):\n")
    print(head(top_hubs, 10))
    
    # Vizualizacija (pojednostavljena)
    if (igraph::vcount(g) <= 50) {  # Samo za manje mreže
      plot(g, 
           vertex.size = 5,
           vertex.label.cex = 0.6,
           edge.arrow.size = 0.3,
           layout = igraph::layout_with_fr(g))
    } else {
      cat("Mreža prevelika za vizualizaciju (", igraph::vcount(g), "čvorova)\n")
    }
  } else {
    cat("Nema podataka o deljenju za mrežnu analizu.\n")
  }
} else {
  cat("Kolona 'forwarded_from' nije dostupna. Preskačem mrežnu analizu.\n")
}

Sentiment i Intenzitet

cat("=== SENTIMENT ANALIZA ===\n")
## === SENTIMENT ANALIZA ===
cat("Napomena: Sentiment je pomoćna metrika, ne dokaz.\n\n")
## Napomena: Sentiment je pomoćna metrika, ne dokaz.
# Sentiment analiza (sentimentr)
# Napomena: sentimentr radi bolje sa engleskim, ali može dati osnovne indikatore

# Funkcija za merenje "uzbunjivanja"
measure_intensity <- function(text) {
  if (is.na(text) || text == "") return(0)
  
  intensity <- 0
  
  # ALL CAPS ratio
  caps_ratio <- str_count(text, "[A-ZА-Я]") / max(nchar(text), 1)
  if (caps_ratio > 0.3) intensity <- intensity + 1
  
  # Uzvičnici
  exclamation_count <- str_count(text, "!")
  intensity <- intensity + min(exclamation_count, 5) * 0.2
  
  # "Hitno", "Sramota", itd.
  alarm_words <- c("hitno", "urgentno", "sramota", "skandal", "katastrofa", 
                   "opasnost", "pazite", "oprez")
  for (word in alarm_words) {
    if (grepl(paste0("\\b", word, "\\b"), text, ignore.case = TRUE)) {
      intensity <- intensity + 0.5
    }
  }
  
  return(intensity)
}

dt[, intensity := sapply(text_clean, measure_intensity)]

cat("Prosečan intenzitet:", round(mean(dt$intensity, na.rm = TRUE), 2), "\n")
## Prosečan intenzitet: 0.04
cat("Postovi sa visokim intenzitetom (>= 2):", sum(dt$intensity >= 2, na.rm = TRUE), "\n")
## Postovi sa visokim intenzitetom (>= 2): 2
# Trend intenziteta
if ("date" %in% names(dt)) {
  intensity_by_date <- dt[!is.na(date), .(avg_intensity = mean(intensity, na.rm = TRUE)), 
                          by = date][order(date)]
  
  p7 <- ggplot(intensity_by_date, aes(x = date, y = avg_intensity)) +
    geom_line(color = "purple", linewidth = 1) +
    geom_point(color = "purple", alpha = 0.6) +
    labs(
      title = "Trend intenziteta (uzbunjivanja) kroz vreme",
      x = "Datum",
      y = "Prosečan intenzitet"
    ) +
    theme_minimal() +
    theme(plot.title = element_text(size = 14, face = "bold"))
  
  print(p7)
}

Findings (Sažetak Nalaza)

cat("=== SAŽETAK NALAZA ===\n\n")
## === SAŽETAK NALAZA ===
cat("1. OPIS DATASETA\n")
## 1. OPIS DATASETA
cat("   - Ukupan broj postova:", nrow(dt), "\n")
##    - Ukupan broj postova: 34609
cat("   - Broj grupa:", length(unique(dt$group_anon)), "\n")
##    - Broj grupa: 3
if ("date" %in% names(dt)) {
  cat("   - Vremenski raspon:", min(dt$date, na.rm = TRUE), "do", 
      max(dt$date, na.rm = TRUE), "\n")
}
##    - Vremenski raspon: 18894 do 20434
cat("\n2. KRAGUJEVAC\n")
## 
## 2. KRAGUJEVAC
cat("   - Postovi koji pominju Kragujevac:", sum(dt$mentions_kg, na.rm = TRUE), 
    "(", round(100 * mean(dt$mentions_kg, na.rm = TRUE), 2), "%)\n")
##    - Postovi koji pominju Kragujevac: 37 ( 0.11 %)
cat("\n3. ORGANIZOVANJE PROTESTA (CTA)\n")
## 
## 3. ORGANIZOVANJE PROTESTA (CTA)
cat("   - Postovi sa CTA karakteristikama:", sum(dt$flag_cta, na.rm = TRUE), 
    "(", round(100 * mean(dt$flag_cta, na.rm = TRUE), 2), "%)\n")
##    - Postovi sa CTA karakteristikama: 189 ( 0.55 %)
cat("   - Prosečan CTA score:", round(mean(dt$cta_score, na.rm = TRUE), 2), "\n")
##    - Prosečan CTA score: 0.04
cat("\n4. TEME I NARATIVI\n")
## 
## 4. TEME I NARATIVI
cat("   - Identifikovano tema:", K, "\n")
##    - Identifikovano tema: 10
cat("   - Top termini i bigrami su prikazani u sekciji 'Propagandni Narativi i Teme'\n")
##    - Top termini i bigrami su prikazani u sekciji 'Propagandni Narativi i Teme'
cat("\n5. INTENZITET\n")
## 
## 5. INTENZITET
cat("   - Prosečan intenzitet uzbunjivanja:", round(mean(dt$intensity, na.rm = TRUE), 2), "\n")
##    - Prosečan intenzitet uzbunjivanja: 0.04
cat("   - Postovi sa visokim intenzitetom:", sum(dt$intensity >= 2, na.rm = TRUE), "\n")
##    - Postovi sa visokim intenzitetom: 2
cat("\n=== NAPOMENA ===\n")
## 
## === NAPOMENA ===
cat("Ova analiza identifikuje obrasce u javnom diskursu.\n")
## Ova analiza identifikuje obrasce u javnom diskursu.
cat("Rezultati su agregirani i anonimizovani u skladu sa etičkim principima.\n")
## Rezultati su agregirani i anonimizovani u skladu sa etičkim principima.

Limitations

Ograničenja analize:

  1. Kvalitet podataka: Analiza zavisi od kvaliteta i kompletnosti CSV fajla
  2. Sentiment analiza: Sentiment alati su optimizovani za engleski jezik; rezultati za srpski su indikativni
  3. Topic modeling: Broj tema i interpretacija zavise od subjektivne procene
  4. CTA detekcija: Rečnik CTA fraza može biti nepotpun; threshold može zahtevati prilagodbu
  5. Vremenski filter: Ako je korišćen filter, rezultati se odnose samo na odabrani period
  6. Anonimizacija: Potpuna anonimizacija može uticati na interpretaciju rezultata

Export Rezultata

# Export flagovanih postova (anonimizovano)
export_data <- dt[, .(
  row_id,
  group_anon,
  date,
  cta_score,
  flag_cta,
  mentions_kg,
  intensity,
  text_preview = substr(text_clean, 1, 200)  # Samo preview
)]

# Sačuvaj u CSV
export_path_csv <- file.path(OUTPUT_DIR, "flagged_posts_anonimized.csv")
data.table::fwrite(export_data, export_path_csv)
cat("Eksportovano u:", export_path_csv, "\n")
## Eksportovano u: output/flagged_posts_anonimized.csv
# Sačuvaj u Parquet (brže za sledeće učitavanje)
if (requireNamespace("arrow", quietly = TRUE)) {
  export_path_parquet <- file.path(OUTPUT_DIR, "flagged_posts_anonimized.parquet")
  arrow::write_parquet(export_data, export_path_parquet)
  cat("Eksportovano u Parquet:", export_path_parquet, "\n")
}
## Eksportovano u Parquet: output/flagged_posts_anonimized.parquet
# Sačuvaj agregirane statistike
stats_summary <- data.table(
  metric = c(
    "total_posts",
    "total_groups",
    "posts_mentioning_kg",
    "posts_with_cta",
    "avg_cta_score",
    "avg_intensity"
  ),
  value = c(
    nrow(dt),
    length(unique(dt$group_anon)),
    sum(dt$mentions_kg, na.rm = TRUE),
    sum(dt$flag_cta, na.rm = TRUE),
    round(mean(dt$cta_score, na.rm = TRUE), 2),
    round(mean(dt$intensity, na.rm = TRUE), 2)
  )
)

stats_path <- file.path(OUTPUT_DIR, "summary_statistics.csv")
data.table::fwrite(stats_summary, stats_path)
cat("Statistike sačuvane u:", stats_path, "\n")
## Statistike sačuvane u: output/summary_statistics.csv

Appendix

Rečnici i Regex Patterns

cat("=== REČNICI ===\n\n")
## === REČNICI ===
cat("Kragujevac keywords:\n")
## Kragujevac keywords:
print(kragujevac_keywords)
##  [1] "Kragujevac"     "Kragujevcu"     "Kragujevca"     "Kragujevcem"   
##  [5] "KG"             "kg"             "K.G."           "Šumadija"      
##  [9] "Šumadije"       "Šumadiji"       "Šumadijom"      "Stanovo"       
## [13] "Stanova"        "Aerodrom"       "Aerodroma"      "Centar"        
## [17] "Centru"         "Centra"         "Kragujevčani"   "Kragujevčanima"
cat("\nCTA phrases:\n")
## 
## CTA phrases:
print(head(cta_phrases, 20))
##  [1] "okupljanje" "okupljanja" "okupljanju" "protest"    "protesta"  
##  [6] "protestu"   "proteste"   "večeras u"  "večeras u"  "sutra u"   
## [11] "sutra u"    "ponesite"   "ponesite"   "ponesite"   "blokada"   
## [16] "blokade"    "blokadi"    "skup"       "skupa"      "skupu"
cat("\n=== SESSION INFO ===\n")
## 
## === SESSION INFO ===
print(sessionInfo())
## R version 4.5.0 (2025-04-11 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Europe/Budapest
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.4.0   knitr_1.50         sentimentr_2.9.0   stringr_1.5.1     
##  [5] wordcloud_2.6      RColorBrewer_1.1-3 DT_0.33            ggplot2_3.5.2     
##  [9] igraph_2.2.1       stm_1.3.8          tidytext_0.4.2     quanteda_4.3.1    
## [13] lubridate_1.9.4    dplyr_1.1.4        arrow_22.0.0       data.table_1.17.4 
## 
## loaded via a namespace (and not attached):
##  [1] fastmatch_1.1-6   gtable_0.3.6      xfun_0.52         bslib_0.9.0      
##  [5] htmlwidgets_1.6.4 lattice_0.22-6    tzdb_0.5.0        vctrs_0.6.5      
##  [9] tools_4.5.0       generics_0.1.4    tibble_3.2.1      janeaustenr_1.0.0
## [13] pkgconfig_2.0.3   tokenizers_0.3.0  Matrix_1.7-3      assertthat_0.2.1 
## [17] lifecycle_1.0.4   compiler_4.5.0    farver_2.1.2      textshaping_1.0.1
## [21] codetools_0.2-20  qdapRegex_0.7.10  SnowballC_0.7.1   htmltools_0.5.8.1
## [25] sass_0.4.10       yaml_2.3.10       pillar_1.10.2     jquerylib_0.1.4  
## [29] cachem_1.1.0      lexicon_1.2.1     stopwords_2.3     tidyselect_1.2.1 
## [33] digest_0.6.37     stringi_1.8.7     purrr_1.0.4       labeling_0.4.3   
## [37] fastmap_1.2.0     grid_4.5.0        cli_3.6.5         magrittr_2.0.3   
## [41] textclean_0.9.3   withr_3.0.2       scales_1.4.0      bit64_4.6.0-1    
## [45] timechange_0.3.0  rmarkdown_2.29    matrixStats_1.5.0 bit_4.6.0        
## [49] evaluate_1.0.5    viridisLite_0.4.2 rlang_1.1.6       Rcpp_1.0.14      
## [53] syuzhet_1.0.7     glue_1.8.0        xml2_1.3.8        svglite_2.2.2    
## [57] rstudioapi_0.17.1 jsonlite_2.0.0    R6_2.6.1          systemfonts_1.3.1

Kraj izveštaja

Generisano: 2025-12-15