df$text <- t
Error:
! Assigned data `t` must be compatible with existing data.
✖ Existing data has 9988 rows.
✖ Assigned data has 9989 rows.
ℹ Only vectors of size 1 are recycled.
Backtrace:
1. base::`$<-`(`*tmp*`, text, value = `<chr>`)
12. tibble (local) `<fn>`(`<vctrs___>`)
library(tidytext)
df <- data.frame('text' = df$text)
df$text <- removeWords(df$text,stopwords("english"))
Mtext <- tm_map(Mtext, stemDocument)
Warning: transformation drops documents
Mtext <- tm_map(Mtext, stemDocument)
Warning: transformation drops documents
# Remove stop words # Remove english common stopwords
Mtext <- tm_map(Mtext, removeWords, stopwords("english"))
Warning: transformation drops documents
t <- unlist(Mtext,use.names = F)
# To Lower case
Mtext <- tm_map(Mtext, content_transformer(tolower))
Warning: transformation drops documents
# Remove Num
Mtext <- tm_map(Mtext , removeNumbers)
Warning: transformation drops documents
# strip Whitespace
Mtext <- tm_map(Mtext , stripWhitespace)
Warning: transformation drops documents
# remove Punctuation
Mtext <- tm_map(Mtext , removePunctuation)
Warning: transformation drops documents
#Stem document
# Mtext <- tm_map(Mtext, stemDocument)
<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiRXJyb3I6IGF0dGVtcHQgdG8gdXNlIHplcm8tbGVuZ3RoIHZhcmlhYmxlIG5hbWVcbiJ9 -->
Error: attempt to use zero-length variable name
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
###
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuIyAtLS0tLS0tLS0tIFsgdGVybS1kb2N1bWVudCBdIC0tLS0tLS0tLS0gI1xuIyBCdWlsZCBhIHRlcm0tZG9jdW1lbnQgbWF0cml4XG5NdGV4dF90ZXJtIDwtIFRlcm1Eb2N1bWVudE1hdHJpeChNdGV4dClcblxuXG5cbk10ZXh0X2R0bSA9IHJlbW92ZVNwYXJzZVRlcm1zKE10ZXh0X3Rlcm0sIDAuOTk5KSAgXG5NVERNIDwtIGFzLm1hdHJpeChNdGV4dF9kdG0pXG5kdG1fdiA8LSBzb3J0KHJvd1N1bXMoTVRETSksZGVjcmVhc2luZz1UUlVFKVxuZHRtX2QgPC0gZGF0YS5mcmFtZSh3b3JkID0gbmFtZXMoZHRtX3YpLGZyZXE9ZHRtX3YpXG5cbmR0bV9kXG5gYGAifQ== -->
```r
# ---------- [ term-document ] ---------- #
# Build a term-document matrix
Mtext_term <- TermDocumentMatrix(Mtext)
Mtext_dtm = removeSparseTerms(Mtext_term, 0.999)
MTDM <- as.matrix(Mtext_dtm)
dtm_v <- sort(rowSums(MTDM),decreasing=TRUE)
dtm_d <- data.frame(word = names(dtm_v),freq=dtm_v)
dtm_d
# ---------- [ frequent words ] ---------- #
# Plot the most frequent words
barplot(dtm_d[1:70,]$freq, las = 1, names.arg = dtm_d[1:70,]$word,
col = "lightgreen", main ="Top most frequent words",
ylab = "Word frequencies")
ggplot(dtm_d[1:60,] , aes(y=word , x=freq)) + geom_bar(stat = "identity" , fill ="skyblue" )
#generate word cloud
set.seed(1234)
wordcloud(words = dtm_d$word[1:100], freq = dtm_d$freq[1:100], min.freq = 1,
max.words=100, random.order=FALSE, rot.per=0.01,
colors=brewer.pal(8, "Dark2"))
# Find associations, less than 0.10 should generate more associations.
head(dtm_d , 5)
findAssocs(Mtext_dtm, terms = c("nato"), corlimit = 0.2)
$nato
chief korea agai escal kyivindependent missiles
0.31 0.30 0.29 0.26 0.26 0.26
north direct decampdave recap massiv days
0.26 0.26 0.25 0.25 0.24 0.24
past corner forgiv levels manipul risk
0.24 0.24 0.24 0.24 0.23 0.22
dis bring missil suppli
0.22 0.22 0.21 0.20
RT22 <- data_frame(names(RTsen2) , RTsen2 )
Warning: `data_frame()` was deprecated in tibble 1.1.0.
Please use `tibble()` instead.Error in eval_tidy(xs[[j]], mask) : object 'RTsen2' not found
MTsen <- get_nrc_sentiment(MT22)
Warning: `spread_()` was deprecated in tidyr 1.2.0.
Please use `spread()` instead.