library("tm")
## Loading required package: NLP
library("ggplot2")
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library("SnowballC")
library("RColorBrewer")
library("syuzhet")
library("wordcloud")
print('Done Importing ibrary')
## [1] "Done Importing ibrary"
library(readxl)
text_mining <- read_excel("text mining.xlsx")
## New names:
## • `` -> `...1`
# Load the data as a corpus
TextDoc <- Corpus(VectorSource(text_mining$tweet))
length(text_mining)
## [1] 5
colnames(text_mining)
## [1] "...1" "date" "target" "insult" "tweet"
head(text_mining)
## # A tibble: 6 × 5
## ...1 date target insult tweet
## <chr> <chr> <chr> <chr> <chr>
## 1 1 41921 thomas-frieden fool "Can…
## 2 2 41921 thomas-frieden DOPE "Can…
## 3 3 42171 politicians all talk and no action "Big…
## 4 4 42179 ben-cardin It's politicians like Cardin that … "Pol…
## 5 5 42179 neil-young total hypocrite "For…
## 6 6 42179 rockin-in-the-free-world didn't love it ".@N…
# Build a term-document matrix
TextDoc_dtm <- TermDocumentMatrix(TextDoc)
dtm_m <- as.matrix(TextDoc_dtm)
# Sort by descearing value of frequency
dtm_v <- sort(rowSums(dtm_m),decreasing=TRUE)
dtm_d <- data.frame(word = names(dtm_v),freq=dtm_v)
# Display the top 5 most frequent words
head(dtm_d, 5)
## word freq
## the the 18004
## and and 8185
## for for 3578
## that that 3294
## they they 3287
# This is the barchart representation of the Top 5 Words most appeared
barplot(dtm_d[1:5,]$freq, las = 2, names.arg = dtm_d[1:5,]$word,
col ="pink", main ="Top 5 most frequent words",
ylab = "Word frequencies")

# Creating Pie Chart of Top 5 Most word appeared
x <- c(15696, 11329, 10921,6580,6019)
labels <- c("fake", "news", "will","democrat democrat","peopl peopl")
# Plot the chart.
pie(x, labels, main = "Pie Chart of Top 5 Most word appeared", col = rainbow(length(x)))
legend("topright", c("fake", "news", "will","democrat democrat","peopl peopl"), cex = 0.8,fill = rainbow(length(x)))
