El texto utilizado para este análisis habla la situación en la Premier League por violaciones a las reglas financieras y las preocupaciones de que tanto el Manchester City como el Chelsea podrían enfrentar un riesgo, ya que ambos enfrentan violaciones de las reglas financieras.
library("tm")
## Warning: package 'tm' was built under R version 4.3.2
## Loading required package: NLP
library("SnowballC")
library("wordcloud")
## Warning: package 'wordcloud' was built under R version 4.3.2
## Loading required package: RColorBrewer
library("RColorBrewer")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("rvest")
library("tm")
library("tidyr")
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.3.2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library("stringr")
library("readr")
##
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
##
## guess_encoding
library("janeaustenr")
## Warning: package 'janeaustenr' was built under R version 4.3.2
#choose.files()
text <- readLines("C:\\Users\\danbr\\OneDrive\\Desktop\\Mineria de Datos\\Actividad Constante - El Diario\\pl.txt")
str(text)
## chr [1:12] "Manchester City and Chelsea run the risk of relegation from the Premier League after Everton's 10-point deducti"| __truncated__ ...
pltext <- iconv(text,"WINDOWS-1252","UTF-8")
pltext <- Corpus(VectorSource(pltext))
# Convert the text to lower case
pl <- tm_map(pltext, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(pltext, content_transformer(tolower)):
## transformation drops documents
# Remove numbers
pl <- tm_map(pl, removeNumbers)
## Warning in tm_map.SimpleCorpus(pl, removeNumbers): transformation drops
## documents
# Remove english common stopwords
pl <- tm_map(pl, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(pl, removeWords, stopwords("english")):
## transformation drops documents
# Remove punctuations
pl <- tm_map(pl, removePunctuation)
## Warning in tm_map.SimpleCorpus(pl, removePunctuation): transformation drops
## documents
# Eliminate extra white spaces
pl <- tm_map(pl, stripWhitespace)
## Warning in tm_map.SimpleCorpus(pl, stripWhitespace): transformation drops
## documents
dtm <- TermDocumentMatrix(pl)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
View(d)
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 3,
max.words=Inf, random.order=F, rot.per=0.5,
colors=brewer.pal(5, "Dark2"))
findAssocs(dtm, terms = "city", corlimit = 0.2)
## $city
## former accounting adviser allegations also banker
## 0.77 0.67 0.67 0.67 0.67 0.67
## ceo counsel current dealing fan general
## 0.67 0.67 0.67 0.67 0.67 0.67
## harris historic issues journalist mail man
## 0.67 0.67 0.67 0.67 0.67 0.67
## nick plc responded sport view well
## 0.67 0.67 0.67 0.67 0.67 0.67
## ffp lawyer stefan across interesting financial
## 0.64 0.64 0.64 0.64 0.64 0.39
## chelsea evertons leagues payments sanctions
## 0.38 0.29 0.29 0.29 0.29
barplot(d[1:10,]$freq, las = 2, names.arg = d[1:10,]$word,
col ="lightblue", main ="Most frequent words",
ylab = "Word frequencies")