ANALIZA SIECI SPOŁECZNOŚCIOWYCH TWEETS

Majkowska Agata

semestr letni 2025

PAKIETY

library("tm")
library("SnowballC")
library("RColorBrewer")
library("wordcloud")
library("digest")
library("wordcloud2")
library("syuzhet")
library("ggplot2")
library("tidytext")
library("dplyr")
library(gridExtra)
library(readxl)
library(igraph)
library(RColorBrewer)
library(ggplot2)
library(ggrepel)

WCZYTYWANIE DANYCH

UWAGA!!!!!

Prosze podać swoją ścieżkę do pliku!

apple <- read.csv(file.choose(), header = T)

BUDOWA CORPUSU

corpus <- iconv(apple$text)
corpus <- Corpus(VectorSource(corpus))

CZYSZCZENIE DANYCH

corpus <- tm_map(corpus, tolower)
## Warning in tm_map.SimpleCorpus(corpus, tolower): transformation drops documents
corpus <- tm_map(corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
corpus <- tm_map(corpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
cleanset <- tm_map(corpus, removeWords, stopwords('english'))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
removeURL <- function(x) gsub('http[[:alnum:]]*', '', x)
cleanset <- tm_map(cleanset, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(cleanset, content_transformer(removeURL)):
## transformation drops documents
cleanset <- tm_map(cleanset, removeWords, c('aapl', 'apple'))
## Warning in tm_map.SimpleCorpus(cleanset, removeWords, c("aapl", "apple")):
## transformation drops documents
cleanset <- tm_map(cleanset, gsub, 
                   pattern = 'stocks', 
                   replacement = 'stock')
## Warning in tm_map.SimpleCorpus(cleanset, gsub, pattern = "stocks", replacement
## = "stock"): transformation drops documents
cleanset <- tm_map(cleanset, stripWhitespace)
## Warning in tm_map.SimpleCorpus(cleanset, stripWhitespace): transformation drops
## documents

PRZYGOTOWANIE DANYCH

# Term document matrix
tdm <- TermDocumentMatrix(cleanset)
tdm <- as.matrix(tdm)
tdm <- tdm[rowSums(tdm)>60,]
tdm[1:10,1:10]
##         Docs
## Terms    1 2 3 4 5 6 7 8 9 10
##   eps    1 1 0 0 0 1 1 1 0  2
##   est    1 1 0 0 0 2 2 0 0  0
##   almost 0 0 0 1 0 0 0 0 0  0
##   delay  0 0 0 1 0 0 0 0 0  0
##   fang   0 0 0 1 0 0 0 0 0  0
##   get    0 0 0 1 0 0 0 0 0  0
##   iphone 0 0 0 1 0 0 0 0 0  0
##   means  0 0 0 1 0 0 0 0 0  0
##   might  0 0 0 1 0 0 0 0 0  0
##   pos…   0 0 0 1 0 0 0 0 0  0
tdm[tdm>1] <- 1
termM <- tdm %*% t(tdm)
termM[1:10,1:10]
##         Terms
## Terms    eps est almost delay fang get iphone means might pos…
##   eps     92  69      0     0    0   3      1     0     0    0
##   est     69  73      0     0    0   0      2     0     0    0
##   almost   0   0     88    86   86  86     86    86    86   85
##   delay    0   0     86    86   86  86     86    86    86   85
##   fang     0   0     86    86   87  86     86    86    86   85
##   get      3   0     86    86   86 135     88    86    89   85
##   iphone   1   2     86    86   86  88    137    86    86   85
##   means    0   0     86    86   86  86     86    86    86   85
##   might    0   0     86    86   86  89     86    86    95   85
##   pos…     0   0     85    85   85  85     85    85    85   85

BUDOWA SIECI

g <- graph.adjacency(termM, weighted = T, mode = 'undirected')
g
## IGRAPH bc5d76c UNW- 27 208 -- 
## + attr: name (v/c), weight (e/n)
## + edges from bc5d76c (vertex names):
##  [1] eps   --eps      eps   --est      eps   --get      eps   --iphone  
##  [5] eps   --earnings eps   --reports  eps   --today    eps   --close   
##  [9] est   --est      est   --iphone   est   --earnings est   --reports 
## [13] almost--almost   almost--delay    almost--fang     almost--get     
## [17] almost--iphone   almost--means    almost--might    almost--pos…    
## [21] almost--stock    almost--sylvacap almost--things   almost--ugly    
## [25] almost--earnings delay --delay    delay --fang     delay --get     
## [29] delay --iphone   delay --means    delay --might    delay --pos…    
## + ... omitted several edges
g <- simplify(g) # USUNIĘCIE PĘTLI

OPIS SIECI

g
## IGRAPH bc625d8 UNW- 27 181 -- 
## + attr: name (v/c), weight (e/n)
## + edges from bc625d8 (vertex names):
##  [1] eps   --est      eps   --get      eps   --iphone   eps   --earnings
##  [5] eps   --reports  eps   --today    eps   --close    est   --iphone  
##  [9] est   --earnings est   --reports  almost--delay    almost--fang    
## [13] almost--get      almost--iphone   almost--means    almost--might   
## [17] almost--pos…     almost--stock    almost--sylvacap almost--things  
## [21] almost--ugly     almost--earnings delay --fang     delay --get     
## [25] delay --iphone   delay --means    delay --might    delay --pos…    
## [29] delay --stock    delay --sylvacap delay --things   delay --ugly    
## + ... omitted several edges

WIZUALIZACJA PROSTA SIECI

plot(g)

WIZUALIZACJA

plot(g,
     vertex.color=brewer.pal(8, "Pastel2"), 
     edge.arrow.size=0.1,
     vertex.size=5, 
     layout=layout.star) 

WIZUALIZACJA STOPNI WIERZCHOŁKÓW

PRZYGOTOWANIE DANYCH

deg<-degree(g, mode="all") # zapisanie do zmiennej degree
deg_dataframe <- as.data.frame(deg) # zapisujemy degree do dataframe
deg_dataframe$node <-row.names(deg_dataframe) # tworzenie kolumny o nazwie node i przypisanie nazwy wiesza
row.names(deg_dataframe) <- NULL # usuwanie nazwy wierszy z dataframe
nodes<-deg_dataframe %>%
  select(node,deg) %>% # wybieranie dwóch kolumn
  arrange(desc(deg))  # sortowanie malejące 
nodes
##        node deg
## 1  sylvacap  23
## 2  earnings  19
## 3       get  17
## 4    iphone  16
## 5   reports  16
## 6     close  16
## 7     today  15
## 8      puts  15
## 9     stock  14
## 10     dont  14
## 11   moving  14
## 12    might  13
## 13   things  13
## 14    calls  13
## 15 anything  13
## 16   expect  13
## 17   almost  12
## 18    delay  12
## 19     fang  12
## 20    means  12
## 21     ugly  12
## 22   rather  12
## 23    earth  12
## 24    head…  12
## 25     pos…  11
## 26      eps   7
## 27      est   4

HISTOGRAM STOPNI WIERZCHOŁKÓW

ggplot(data=nodes, aes(x=reorder(node,deg), y=deg))+
  geom_histogram(stat='identity') +
  xlab(" ") + 
  ylab("Liczebność") + 
  ggtitle("Histogram stopni wierzchołków")+
  coord_flip()
## Warning in geom_histogram(stat = "identity"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

WIZUALIZACJA WEDŁUG STOPNIA WIERZCHOŁKÓW

plot(g,
     vertex.color=brewer.pal(8, "Pastel2"),
     edge.arrow.size=1,
     edge.color="black", ### KOLOR KRAWĘDZI
     vertex.size=degree(g),  ### WIELKOŚĆ WĘZŁA (AKTORA)
     vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA 
     layout=layout.svd) 
## Warning in v(graph): SVD layout was removed, we use Fruchterman-Reingold
## instead.

WIZUALIZACJA WEDŁUG MIARY POSREDNICTWA

plot(g,
     vertex.color=brewer.pal(8, "Pastel2"),
     edge.arrow.size=1,
     edge.color="black", ### KOLOR KRAWĘDZI
     vertex.size=betweenness(g)*0.2,  ### WIELKOŚĆ WĘZŁA (AKTORA)
     vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA 
     layout=layout.graphopt)

WIZUALIZACJA - MODYFIKACJA KRAWĘDZI

E(g)$width <- (log(E(g)$weight)+1)/2
edge_attr(g)
## $weight
##   [1] 69  3  1 70  9  3  1  2 54 10 86 86 86 86 86 86 85 86 85 86 86  2 86 86 86
##  [26] 86 86 85 86 85 86 86  1 86 86 86 86 85 86 85 86 86  1 88 86 89 85 97 85 87
##  [51] 86  9  4  1  3  1 86 86 85 87 85 86 86 18  6  5 86 85 86 85 86 86  1 85 86
##  [76] 85 86 86  3  3 85 85 85 85 85 86 86 13  1  1 85 85 85 85 85 85 85 85 85 85
## [101] 85 85 85 85 86  2  2  1  6  3 11  3  9  2  1 85 85 85 85 85 85 85 85 85 85
## [126] 85 86 85 86 85 87 85 85 85 85 87 85 85 85 85 85 85 85 85 85 85 85 86 85 85
## [151] 85 85 85 85 85 85 86 85 85 86 85 85 85 85 85 85 85 85 85 85 86 85 85 85 85
## [176] 85 85 85 85 85 85
## 
## $width
##   [1] 2.6170533 1.0493061 0.5000000 2.6242476 1.5986123 1.0493061 0.5000000
##   [8] 0.8465736 2.4944920 1.6512925 2.7271736 2.7271736 2.7271736 2.7271736
##  [15] 2.7271736 2.7271736 2.7213256 2.7271736 2.7213256 2.7271736 2.7271736
##  [22] 0.8465736 2.7271736 2.7271736 2.7271736 2.7271736 2.7271736 2.7213256
##  [29] 2.7271736 2.7213256 2.7271736 2.7271736 0.5000000 2.7271736 2.7271736
##  [36] 2.7271736 2.7271736 2.7213256 2.7271736 2.7213256 2.7271736 2.7271736
##  [43] 0.5000000 2.7386684 2.7271736 2.7443182 2.7213256 2.7873555 2.7213256
##  [50] 2.7329541 2.7271736 1.5986123 1.1931472 0.5000000 1.0493061 0.5000000
##  [57] 2.7271736 2.7271736 2.7213256 2.7329541 2.7213256 2.7271736 2.7271736
##  [64] 1.9451859 1.3958797 1.3047190 2.7271736 2.7213256 2.7271736 2.7213256
##  [71] 2.7271736 2.7271736 0.5000000 2.7213256 2.7271736 2.7213256 2.7271736
##  [78] 2.7271736 1.0493061 1.0493061 2.7213256 2.7213256 2.7213256 2.7213256
##  [85] 2.7213256 2.7271736 2.7271736 1.7824747 0.5000000 0.5000000 2.7213256
##  [92] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
##  [99] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7271736
## [106] 0.8465736 0.8465736 0.5000000 1.3958797 1.0493061 1.6989476 1.0493061
## [113] 1.5986123 0.8465736 0.5000000 2.7213256 2.7213256 2.7213256 2.7213256
## [120] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [127] 2.7271736 2.7213256 2.7271736 2.7213256 2.7329541 2.7213256 2.7213256
## [134] 2.7213256 2.7213256 2.7329541 2.7213256 2.7213256 2.7213256 2.7213256
## [141] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [148] 2.7271736 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [155] 2.7213256 2.7213256 2.7271736 2.7213256 2.7213256 2.7271736 2.7213256
## [162] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [169] 2.7213256 2.7213256 2.7271736 2.7213256 2.7213256 2.7213256 2.7213256
## [176] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
plot(g)

plot(g,
     vertex.color=brewer.pal(8, "Pastel2"),
     edge.arrow.size=1,
     edge.color="black", ### KOLOR KRAWĘDZI
     vertex.size=degree(g),  ### WIELKOŚĆ WĘZŁA (AKTORA)
     vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA 
     layout=layout.graphopt) 

WYKRYWANIE WSPÓLNOTOWE

prop <- cluster_label_prop(g)
plot(prop, g)

WYKRYWANIE SPOŁECZNOŚCI

comm <- cluster_edge_betweenness(g)
## Warning in cluster_edge_betweenness(g): At
## core/community/edge_betweenness.c:493 : Membership vector will be selected
## based on the highest modularity score.
plot(comm, g)

WIZUALIZACJA KLIKI

kliki = cliques(g)
a<-largest_cliques(g)
clique1<-a[[1]]
g2<-induced.subgraph(graph=g,vids=clique1)
plot(g2)

                                                                  Made by: 
                                                                  Majkowska Agata
                                                                  agata.majkowska@phdstud.ug.edu.pl