ANALIZA SIECI SPOŁECZNOŚCIOWYCH TWEETS

Majkowska Agata

semestr letni 2026

PAKIETY

library("tm")
library("SnowballC")
library("RColorBrewer")
library("wordcloud")
library("digest")
library("wordcloud2")
library("syuzhet")
library("ggplot2")
library("tidytext")
library("dplyr")
library(gridExtra)
library(readxl)
library(igraph)
library(RColorBrewer)
library(ggplot2)
library(ggrepel)

WCZYTYWANIE DANYCH

UWAGA!!!!!

Prosze podać swoją ścieżkę do pliku!

apple <- read.csv(file.choose(), header = T)

BUDOWA CORPUSU

corpus <- iconv(apple$text)
corpus <- Corpus(VectorSource(corpus))

CZYSZCZENIE DANYCH

corpus <- tm_map(corpus, tolower)
## Warning in tm_map.SimpleCorpus(corpus, tolower): transformation drops documents
corpus <- tm_map(corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
corpus <- tm_map(corpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
cleanset <- tm_map(corpus, removeWords, stopwords('english'))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
removeURL <- function(x) gsub('http[[:alnum:]]*', '', x)
cleanset <- tm_map(cleanset, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(cleanset, content_transformer(removeURL)):
## transformation drops documents
cleanset <- tm_map(cleanset, removeWords, c('aapl', 'apple'))
## Warning in tm_map.SimpleCorpus(cleanset, removeWords, c("aapl", "apple")):
## transformation drops documents
cleanset <- tm_map(cleanset, gsub, 
                   pattern = 'stocks', 
                   replacement = 'stock')
## Warning in tm_map.SimpleCorpus(cleanset, gsub, pattern = "stocks", replacement
## = "stock"): transformation drops documents
cleanset <- tm_map(cleanset, stripWhitespace)
## Warning in tm_map.SimpleCorpus(cleanset, stripWhitespace): transformation drops
## documents

PRZYGOTOWANIE DANYCH

# Term document matrix
tdm <- TermDocumentMatrix(cleanset)
tdm <- as.matrix(tdm)
tdm <- tdm[rowSums(tdm)>60,]
tdm[1:10,1:10]
##         Docs
## Terms    1 2 3 4 5 6 7 8 9 10
##   eps    1 1 0 0 0 1 1 1 0  2
##   est    1 1 0 0 0 2 2 0 0  0
##   almost 0 0 0 1 0 0 0 0 0  0
##   delay  0 0 0 1 0 0 0 0 0  0
##   fang   0 0 0 1 0 0 0 0 0  0
##   get    0 0 0 1 0 0 0 0 0  0
##   iphone 0 0 0 1 0 0 0 0 0  0
##   means  0 0 0 1 0 0 0 0 0  0
##   might  0 0 0 1 0 0 0 0 0  0
##   pos…   0 0 0 1 0 0 0 0 0  0
tdm[tdm>1] <- 1
termM <- tdm %*% t(tdm)
termM[1:10,1:10]
##         Terms
## Terms    eps est almost delay fang get iphone means might pos…
##   eps     88  69      0     0    0   0      1     0     0    0
##   est     69  73      0     0    0   0      2     0     0    0
##   almost   0   0     87    86   86  86     86    86    86   85
##   delay    0   0     86    86   86  86     86    86    86   85
##   fang     0   0     86    86   87  86     86    86    86   85
##   get      0   0     86    86   86 130     88    86    89   85
##   iphone   1   2     86    86   86  88    136    86    86   85
##   means    0   0     86    86   86  86     86    86    86   85
##   might    0   0     86    86   86  89     86    86    95   85
##   pos…     0   0     85    85   85  85     85    85    85   85

BUDOWA SIECI

g <- graph.adjacency(termM, weighted = T, mode = 'undirected')
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
g
## IGRAPH ba56293 UNW- 28 218 -- 
## + attr: name (v/c), weight (e/n)
## + edges from ba56293 (vertex names):
##  [1] eps   --eps      eps   --est      eps   --iphone   eps   --earnings
##  [5] eps   --…        eps   --reports  eps   --close    est   --est     
##  [9] est   --iphone   est   --earnings est   --…        est   --reports 
## [13] almost--almost   almost--delay    almost--fang     almost--get     
## [17] almost--iphone   almost--means    almost--might    almost--pos…    
## [21] almost--stock    almost--sylvacap almost--things   almost--ugly    
## [25] almost--earnings delay --delay    delay --fang     delay --get     
## [29] delay --iphone   delay --means    delay --might    delay --pos…    
## + ... omitted several edges
g <- simplify(g) # USUNIĘCIE PĘTLI

OPIS SIECI

g
## IGRAPH ba59dd8 UNW- 28 190 -- 
## + attr: name (v/c), weight (e/n)
## + edges from ba59dd8 (vertex names):
##  [1] eps   --est      eps   --iphone   eps   --earnings eps   --…       
##  [5] eps   --reports  eps   --close    est   --iphone   est   --earnings
##  [9] est   --…        est   --reports  almost--delay    almost--fang    
## [13] almost--get      almost--iphone   almost--means    almost--might   
## [17] almost--pos…     almost--stock    almost--sylvacap almost--things  
## [21] almost--ugly     almost--earnings delay --fang     delay --get     
## [25] delay --iphone   delay --means    delay --might    delay --pos…    
## [29] delay --stock    delay --sylvacap delay --things   delay --ugly    
## + ... omitted several edges

WIZUALIZACJA PROSTA SIECI

plot(g)

WIZUALIZACJA

plot(g,
     vertex.color=brewer.pal(8, "Pastel2"), 
     edge.arrow.size=0.1,
     vertex.size=5, 
     layout=layout.star) 

WIZUALIZACJA STOPNI WIERZCHOŁKÓW

PRZYGOTOWANIE DANYCH

deg<-degree(g, mode="all") # zapisanie do zmiennej degree
deg_dataframe <- as.data.frame(deg) # zapisujemy degree do dataframe
deg_dataframe$node <-row.names(deg_dataframe) # tworzenie kolumny o nazwie node i przypisanie nazwy wiesza
row.names(deg_dataframe) <- NULL # usuwanie nazwy wierszy z dataframe
nodes<-deg_dataframe %>%
  select(node,deg) %>% # wybieranie dwóch kolumn
  arrange(desc(deg))  # sortowanie malejące 
nodes
##        node deg
## 1  sylvacap  23
## 2  earnings  20
## 3    iphone  17
## 4   reports  17
## 5     close  17
## 6       get  16
## 7     stock  15
## 8      puts  15
## 9     might  14
## 10   things  14
## 11    calls  14
## 12     dont  14
## 13 anything  14
## 14   moving  14
## 15    today  13
## 16   expect  13
## 17   almost  12
## 18    delay  12
## 19     fang  12
## 20    means  12
## 21     ugly  12
## 22        …  12
## 23   rather  12
## 24    earth  12
## 25    head…  12
## 26     pos…  11
## 27      eps   6
## 28      est   5

HISTOGRAM STOPNI WIERZCHOŁKÓW

ggplot(data=nodes, aes(x=reorder(node,deg), y=deg))+
  geom_histogram(stat='identity') +
  xlab(" ") + 
  ylab("Liczebność") + 
  ggtitle("Histogram stopni wierzchołków")+
  coord_flip()
## Warning in geom_histogram(stat = "identity"): Ignoring unknown parameters:
## `binwidth` and `bins`

WIZUALIZACJA WEDŁUG STOPNIA WIERZCHOŁKÓW

plot(g,
     vertex.color=brewer.pal(8, "Pastel2"),
     edge.arrow.size=1,
     edge.color="black", ### KOLOR KRAWĘDZI
     vertex.size=degree(g),  ### WIELKOŚĆ WĘZŁA (AKTORA)
     vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA 
     layout=layout.svd) 
## Warning: `layout.svd()` was deprecated in igraph 2.1.0.
## ℹ Please use `layout_with_fr()` instead.
## ℹ The deprecated feature was likely used in the igraph package.
##   Please report the issue at <https://github.com/igraph/rigraph/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

WIZUALIZACJA WEDŁUG MIARY POSREDNICTWA

plot(g,
     vertex.color=brewer.pal(8, "Pastel2"),
     edge.arrow.size=1,
     edge.color="black", ### KOLOR KRAWĘDZI
     vertex.size=betweenness(g)*0.2,  ### WIELKOŚĆ WĘZŁA (AKTORA)
     vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA 
     layout=layout.graphopt)

WIZUALIZACJA - MODYFIKACJA KRAWĘDZI

E(g)$width <- (log(E(g)$weight)+1)/2
edge_attr(g)
## $weight
##   [1] 69  1 66 50  9  1  2 54 49 10 86 86 86 86 86 86 85 86 85 86 86  2 86 86 86
##  [26] 86 86 85 86 85 86 86  1 86 86 86 86 85 86 85 86 86  1 88 86 89 85 97 85 87
##  [51] 86  4 10  1  3  1 86 86 85 87 85 86 86 17  7  6  5 86 85 86 85 86 86  1 85
##  [76] 86 85 86 86  3  2  3 85 85 85 85 85 86 86 13  6  1  1 85 85 85 85 85 85 85
## [101] 85 85 85 85 85 85 85 86  2  2  2  1 92  6  3  7  3  7  1  1  1  1  1  1 85
## [126] 85 85 85 85 85 85 85 85 85 85 86 85 86 85 87 85 85 85 85 87 85 85 85 85 85
## [151] 85 85 85 85 85 85 86 85 85 85 85 85 85 85 85 86 85 85 86 85 85 85 85 85 85
## [176] 85 85 85 85 86 85 85 85 85 85 85 85 85 85 85
## 
## $width
##   [1] 2.6170533 0.5000000 2.5948274 2.4560115 1.5986123 0.5000000 0.8465736
##   [8] 2.4944920 2.4459101 1.6512925 2.7271736 2.7271736 2.7271736 2.7271736
##  [15] 2.7271736 2.7271736 2.7213256 2.7271736 2.7213256 2.7271736 2.7271736
##  [22] 0.8465736 2.7271736 2.7271736 2.7271736 2.7271736 2.7271736 2.7213256
##  [29] 2.7271736 2.7213256 2.7271736 2.7271736 0.5000000 2.7271736 2.7271736
##  [36] 2.7271736 2.7271736 2.7213256 2.7271736 2.7213256 2.7271736 2.7271736
##  [43] 0.5000000 2.7386684 2.7271736 2.7443182 2.7213256 2.7873555 2.7213256
##  [50] 2.7329541 2.7271736 1.1931472 1.6512925 0.5000000 1.0493061 0.5000000
##  [57] 2.7271736 2.7271736 2.7213256 2.7329541 2.7213256 2.7271736 2.7271736
##  [64] 1.9166067 1.4729551 1.3958797 1.3047190 2.7271736 2.7213256 2.7271736
##  [71] 2.7213256 2.7271736 2.7271736 0.5000000 2.7213256 2.7271736 2.7213256
##  [78] 2.7271736 2.7271736 1.0493061 0.8465736 1.0493061 2.7213256 2.7213256
##  [85] 2.7213256 2.7213256 2.7213256 2.7271736 2.7271736 1.7824747 1.3958797
##  [92] 0.5000000 0.5000000 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
##  [99] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [106] 2.7213256 2.7213256 2.7271736 0.8465736 0.8465736 0.8465736 0.5000000
## [113] 2.7608943 1.3958797 1.0493061 1.4729551 1.0493061 1.4729551 0.5000000
## [120] 0.5000000 0.5000000 0.5000000 0.5000000 0.5000000 2.7213256 2.7213256
## [127] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [134] 2.7213256 2.7213256 2.7271736 2.7213256 2.7271736 2.7213256 2.7329541
## [141] 2.7213256 2.7213256 2.7213256 2.7213256 2.7329541 2.7213256 2.7213256
## [148] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [155] 2.7213256 2.7213256 2.7271736 2.7213256 2.7213256 2.7213256 2.7213256
## [162] 2.7213256 2.7213256 2.7213256 2.7213256 2.7271736 2.7213256 2.7213256
## [169] 2.7271736 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [176] 2.7213256 2.7213256 2.7213256 2.7213256 2.7271736 2.7213256 2.7213256
## [183] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [190] 2.7213256
plot(g)

plot(g,
     vertex.color=brewer.pal(8, "Pastel2"),
     edge.arrow.size=1,
     edge.color="black", ### KOLOR KRAWĘDZI
     vertex.size=degree(g),  ### WIELKOŚĆ WĘZŁA (AKTORA)
     vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA 
     layout=layout.graphopt) 

WYKRYWANIE WSPÓLNOTOWE

prop <- cluster_label_prop(g)
plot(prop, g)

WYKRYWANIE SPOŁECZNOŚCI

comm <- cluster_edge_betweenness(g)
## Warning in cluster_edge_betweenness(g): At
## vendor/cigraph/src/community/edge_betweenness.c:503 : Membership vector will be
## selected based on the highest modularity score.
plot(comm, g)

WIZUALIZACJA KLIKI

kliki = cliques(g)
a<-largest_cliques(g)
clique1<-a[[1]]
g2<-induced.subgraph(graph=g,vids=clique1)
## Warning: `induced.subgraph()` was deprecated in igraph 2.0.0.
## ℹ Please use `induced_subgraph()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g2)

                                                                  Made by: 
                                                                  Majkowska Agata
                                                                  agata.majkowska@phdstud.ug.edu.pl