PAKIETY
library("tm")
library("SnowballC")
library("RColorBrewer")
library("wordcloud")
library("digest")
library("wordcloud2")
library("syuzhet")
library("ggplot2")
library("tidytext")
library("dplyr")
library(gridExtra)
library(readxl)
library(igraph)
library(RColorBrewer)
library(ggplot2)
library(ggrepel)
WCZYTYWANIE DANYCH
UWAGA!!!!!
Prosze podać swoją ścieżkę do pliku!
apple <- read.csv(file.choose(), header = T)
BUDOWA CORPUSU
corpus <- iconv(apple$text)
corpus <- Corpus(VectorSource(corpus))
CZYSZCZENIE DANYCH
corpus <- tm_map(corpus, tolower)
## Warning in tm_map.SimpleCorpus(corpus, tolower): transformation drops documents
corpus <- tm_map(corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
corpus <- tm_map(corpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
cleanset <- tm_map(corpus, removeWords, stopwords('english'))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
removeURL <- function(x) gsub('http[[:alnum:]]*', '', x)
cleanset <- tm_map(cleanset, content_transformer(removeURL))
## Warning in tm_map.SimpleCorpus(cleanset, content_transformer(removeURL)):
## transformation drops documents
cleanset <- tm_map(cleanset, removeWords, c('aapl', 'apple'))
## Warning in tm_map.SimpleCorpus(cleanset, removeWords, c("aapl", "apple")):
## transformation drops documents
cleanset <- tm_map(cleanset, gsub,
pattern = 'stocks',
replacement = 'stock')
## Warning in tm_map.SimpleCorpus(cleanset, gsub, pattern = "stocks", replacement
## = "stock"): transformation drops documents
cleanset <- tm_map(cleanset, stripWhitespace)
## Warning in tm_map.SimpleCorpus(cleanset, stripWhitespace): transformation drops
## documents
PRZYGOTOWANIE DANYCH
# Term document matrix
tdm <- TermDocumentMatrix(cleanset)
tdm <- as.matrix(tdm)
tdm <- tdm[rowSums(tdm)>60,]
tdm[1:10,1:10]
## Docs
## Terms 1 2 3 4 5 6 7 8 9 10
## eps 1 1 0 0 0 1 1 1 0 2
## est 1 1 0 0 0 2 2 0 0 0
## almost 0 0 0 1 0 0 0 0 0 0
## delay 0 0 0 1 0 0 0 0 0 0
## fang 0 0 0 1 0 0 0 0 0 0
## get 0 0 0 1 0 0 0 0 0 0
## iphone 0 0 0 1 0 0 0 0 0 0
## means 0 0 0 1 0 0 0 0 0 0
## might 0 0 0 1 0 0 0 0 0 0
## pos… 0 0 0 1 0 0 0 0 0 0
tdm[tdm>1] <- 1
termM <- tdm %*% t(tdm)
termM[1:10,1:10]
## Terms
## Terms eps est almost delay fang get iphone means might pos…
## eps 88 69 0 0 0 0 1 0 0 0
## est 69 73 0 0 0 0 2 0 0 0
## almost 0 0 87 86 86 86 86 86 86 85
## delay 0 0 86 86 86 86 86 86 86 85
## fang 0 0 86 86 87 86 86 86 86 85
## get 0 0 86 86 86 130 88 86 89 85
## iphone 1 2 86 86 86 88 136 86 86 85
## means 0 0 86 86 86 86 86 86 86 85
## might 0 0 86 86 86 89 86 86 95 85
## pos… 0 0 85 85 85 85 85 85 85 85
BUDOWA SIECI
g <- graph.adjacency(termM, weighted = T, mode = 'undirected')
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
g
## IGRAPH ba56293 UNW- 28 218 --
## + attr: name (v/c), weight (e/n)
## + edges from ba56293 (vertex names):
## [1] eps --eps eps --est eps --iphone eps --earnings
## [5] eps --… eps --reports eps --close est --est
## [9] est --iphone est --earnings est --… est --reports
## [13] almost--almost almost--delay almost--fang almost--get
## [17] almost--iphone almost--means almost--might almost--pos…
## [21] almost--stock almost--sylvacap almost--things almost--ugly
## [25] almost--earnings delay --delay delay --fang delay --get
## [29] delay --iphone delay --means delay --might delay --pos…
## + ... omitted several edges
g <- simplify(g) # USUNIĘCIE PĘTLI
OPIS SIECI
g
## IGRAPH ba59dd8 UNW- 28 190 --
## + attr: name (v/c), weight (e/n)
## + edges from ba59dd8 (vertex names):
## [1] eps --est eps --iphone eps --earnings eps --…
## [5] eps --reports eps --close est --iphone est --earnings
## [9] est --… est --reports almost--delay almost--fang
## [13] almost--get almost--iphone almost--means almost--might
## [17] almost--pos… almost--stock almost--sylvacap almost--things
## [21] almost--ugly almost--earnings delay --fang delay --get
## [25] delay --iphone delay --means delay --might delay --pos…
## [29] delay --stock delay --sylvacap delay --things delay --ugly
## + ... omitted several edges
WIZUALIZACJA PROSTA SIECI
plot(g)
WIZUALIZACJA
plot(g,
vertex.color=brewer.pal(8, "Pastel2"),
edge.arrow.size=0.1,
vertex.size=5,
layout=layout.star)
WIZUALIZACJA STOPNI WIERZCHOŁKÓW
PRZYGOTOWANIE DANYCH
deg<-degree(g, mode="all") # zapisanie do zmiennej degree
deg_dataframe <- as.data.frame(deg) # zapisujemy degree do dataframe
deg_dataframe$node <-row.names(deg_dataframe) # tworzenie kolumny o nazwie node i przypisanie nazwy wiesza
row.names(deg_dataframe) <- NULL # usuwanie nazwy wierszy z dataframe
nodes<-deg_dataframe %>%
select(node,deg) %>% # wybieranie dwóch kolumn
arrange(desc(deg)) # sortowanie malejące
nodes
## node deg
## 1 sylvacap 23
## 2 earnings 20
## 3 iphone 17
## 4 reports 17
## 5 close 17
## 6 get 16
## 7 stock 15
## 8 puts 15
## 9 might 14
## 10 things 14
## 11 calls 14
## 12 dont 14
## 13 anything 14
## 14 moving 14
## 15 today 13
## 16 expect 13
## 17 almost 12
## 18 delay 12
## 19 fang 12
## 20 means 12
## 21 ugly 12
## 22 … 12
## 23 rather 12
## 24 earth 12
## 25 head… 12
## 26 pos… 11
## 27 eps 6
## 28 est 5
HISTOGRAM STOPNI WIERZCHOŁKÓW
ggplot(data=nodes, aes(x=reorder(node,deg), y=deg))+
geom_histogram(stat='identity') +
xlab(" ") +
ylab("Liczebność") +
ggtitle("Histogram stopni wierzchołków")+
coord_flip()
## Warning in geom_histogram(stat = "identity"): Ignoring unknown parameters:
## `binwidth` and `bins`
WIZUALIZACJA WEDŁUG STOPNIA WIERZCHOŁKÓW
plot(g,
vertex.color=brewer.pal(8, "Pastel2"),
edge.arrow.size=1,
edge.color="black", ### KOLOR KRAWĘDZI
vertex.size=degree(g), ### WIELKOŚĆ WĘZŁA (AKTORA)
vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA
layout=layout.svd)
## Warning: `layout.svd()` was deprecated in igraph 2.1.0.
## ℹ Please use `layout_with_fr()` instead.
## ℹ The deprecated feature was likely used in the igraph package.
## Please report the issue at <https://github.com/igraph/rigraph/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
WIZUALIZACJA WEDŁUG MIARY POSREDNICTWA
plot(g,
vertex.color=brewer.pal(8, "Pastel2"),
edge.arrow.size=1,
edge.color="black", ### KOLOR KRAWĘDZI
vertex.size=betweenness(g)*0.2, ### WIELKOŚĆ WĘZŁA (AKTORA)
vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA
layout=layout.graphopt)
WIZUALIZACJA - MODYFIKACJA KRAWĘDZI
E(g)$width <- (log(E(g)$weight)+1)/2
edge_attr(g)
## $weight
## [1] 69 1 66 50 9 1 2 54 49 10 86 86 86 86 86 86 85 86 85 86 86 2 86 86 86
## [26] 86 86 85 86 85 86 86 1 86 86 86 86 85 86 85 86 86 1 88 86 89 85 97 85 87
## [51] 86 4 10 1 3 1 86 86 85 87 85 86 86 17 7 6 5 86 85 86 85 86 86 1 85
## [76] 86 85 86 86 3 2 3 85 85 85 85 85 86 86 13 6 1 1 85 85 85 85 85 85 85
## [101] 85 85 85 85 85 85 85 86 2 2 2 1 92 6 3 7 3 7 1 1 1 1 1 1 85
## [126] 85 85 85 85 85 85 85 85 85 85 86 85 86 85 87 85 85 85 85 87 85 85 85 85 85
## [151] 85 85 85 85 85 85 86 85 85 85 85 85 85 85 85 86 85 85 86 85 85 85 85 85 85
## [176] 85 85 85 85 86 85 85 85 85 85 85 85 85 85 85
##
## $width
## [1] 2.6170533 0.5000000 2.5948274 2.4560115 1.5986123 0.5000000 0.8465736
## [8] 2.4944920 2.4459101 1.6512925 2.7271736 2.7271736 2.7271736 2.7271736
## [15] 2.7271736 2.7271736 2.7213256 2.7271736 2.7213256 2.7271736 2.7271736
## [22] 0.8465736 2.7271736 2.7271736 2.7271736 2.7271736 2.7271736 2.7213256
## [29] 2.7271736 2.7213256 2.7271736 2.7271736 0.5000000 2.7271736 2.7271736
## [36] 2.7271736 2.7271736 2.7213256 2.7271736 2.7213256 2.7271736 2.7271736
## [43] 0.5000000 2.7386684 2.7271736 2.7443182 2.7213256 2.7873555 2.7213256
## [50] 2.7329541 2.7271736 1.1931472 1.6512925 0.5000000 1.0493061 0.5000000
## [57] 2.7271736 2.7271736 2.7213256 2.7329541 2.7213256 2.7271736 2.7271736
## [64] 1.9166067 1.4729551 1.3958797 1.3047190 2.7271736 2.7213256 2.7271736
## [71] 2.7213256 2.7271736 2.7271736 0.5000000 2.7213256 2.7271736 2.7213256
## [78] 2.7271736 2.7271736 1.0493061 0.8465736 1.0493061 2.7213256 2.7213256
## [85] 2.7213256 2.7213256 2.7213256 2.7271736 2.7271736 1.7824747 1.3958797
## [92] 0.5000000 0.5000000 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [99] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [106] 2.7213256 2.7213256 2.7271736 0.8465736 0.8465736 0.8465736 0.5000000
## [113] 2.7608943 1.3958797 1.0493061 1.4729551 1.0493061 1.4729551 0.5000000
## [120] 0.5000000 0.5000000 0.5000000 0.5000000 0.5000000 2.7213256 2.7213256
## [127] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [134] 2.7213256 2.7213256 2.7271736 2.7213256 2.7271736 2.7213256 2.7329541
## [141] 2.7213256 2.7213256 2.7213256 2.7213256 2.7329541 2.7213256 2.7213256
## [148] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [155] 2.7213256 2.7213256 2.7271736 2.7213256 2.7213256 2.7213256 2.7213256
## [162] 2.7213256 2.7213256 2.7213256 2.7213256 2.7271736 2.7213256 2.7213256
## [169] 2.7271736 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [176] 2.7213256 2.7213256 2.7213256 2.7213256 2.7271736 2.7213256 2.7213256
## [183] 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256 2.7213256
## [190] 2.7213256
plot(g)
plot(g,
vertex.color=brewer.pal(8, "Pastel2"),
edge.arrow.size=1,
edge.color="black", ### KOLOR KRAWĘDZI
vertex.size=degree(g), ### WIELKOŚĆ WĘZŁA (AKTORA)
vertex.label.cex=1, ### WIELKOŚĆ ETYKIETY WĘZŁA
layout=layout.graphopt)
WYKRYWANIE WSPÓLNOTOWE
prop <- cluster_label_prop(g)
plot(prop, g)
WYKRYWANIE SPOŁECZNOŚCI
comm <- cluster_edge_betweenness(g)
## Warning in cluster_edge_betweenness(g): At
## vendor/cigraph/src/community/edge_betweenness.c:503 : Membership vector will be
## selected based on the highest modularity score.
plot(comm, g)
WIZUALIZACJA KLIKI
kliki = cliques(g)
a<-largest_cliques(g)
clique1<-a[[1]]
g2<-induced.subgraph(graph=g,vids=clique1)
## Warning: `induced.subgraph()` was deprecated in igraph 2.0.0.
## ℹ Please use `induced_subgraph()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g2)
Made by:
Majkowska Agata
agata.majkowska@phdstud.ug.edu.pl