This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

Needed <- c("tm", "SnowballCC", "RColorBrewer", "ggplot2", "wordcloud", "biclust", 
    "cluster", "igraph", "fpc")
#install.packages(Needed, dependencies = TRUE)
#install.packages("Rcampdf", repos = "http://datacube.wu.ac.at/", type = "source")
cname <- file.path("C:\\Users\\frick\\Amazon Drive\\AWS\\keibaCloud\\", "file")   
cname   
[1] "C:\\Users\\frick\\Amazon Drive\\AWS\\keibaCloud\\/file"
dir(cname) 
[1] "file.txt"

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

library(tm)
library("wordcloud")
library("RColorBrewer")
options(encoding = "UTF-8")

# Read the text file from internet
filePath <- "C:\\Users\\frick\\Amazon Drive\\AWS\\keibaCloud\\cleanedfile.txt"
text <- readLines(filePath)
# Load the data as a corpus
docs <- Corpus(VectorSource(text))
  
summary(docs) 

# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("blabla1", "blabla2")) 
# Remove punctuations
docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
# docs <- tm_map(docs, stemDocument)
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KQWRkIGEgbmV3IGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqSW5zZXJ0IENodW5rKiBidXR0b24gb24gdGhlIHRvb2xiYXIgb3IgYnkgcHJlc3NpbmcgKkN0cmwrQWx0K0kqLg0KYGBge3J9DQpOZWVkZWQgPC0gYygidG0iLCAiU25vd2JhbGxDQyIsICJSQ29sb3JCcmV3ZXIiLCAiZ2dwbG90MiIsICJ3b3JkY2xvdWQiLCAiYmljbHVzdCIsIA0KICAgICJjbHVzdGVyIiwgImlncmFwaCIsICJmcGMiKQ0KDQojaW5zdGFsbC5wYWNrYWdlcyhOZWVkZWQsIGRlcGVuZGVuY2llcyA9IFRSVUUpDQojaW5zdGFsbC5wYWNrYWdlcygiUmNhbXBkZiIsIHJlcG9zID0gImh0dHA6Ly9kYXRhY3ViZS53dS5hYy5hdC8iLCB0eXBlID0gInNvdXJjZSIpDQojY25hbWUgPC0gZmlsZS5wYXRoKCJDOlxcVXNlcnNcXGZyaWNrXFxBbWF6b24gRHJpdmVcXEFXU1xca2VpYmFDbG91ZFxcIiwgImZpbGUiKSAgIA0KI2NuYW1lICAgDQojZGlyKGNuYW1lKSANCg0KYGBgDQoNCg0KV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDdHJsK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuDQoNCg0KYGBge3J9DQpsaWJyYXJ5KHRtKQ0KbGlicmFyeSgid29yZGNsb3VkIikNCmxpYnJhcnkoIlJDb2xvckJyZXdlciIpDQpvcHRpb25zKGVuY29kaW5nID0gIlVURi04IikNCg0KIyBSZWFkIHRoZSB0ZXh0IGZpbGUgZnJvbSBpbnRlcm5ldA0KZmlsZVBhdGggPC0gIkM6XFxVc2Vyc1xcZnJpY2tcXEFtYXpvbiBEcml2ZVxcQVdTXFxrZWliYUNsb3VkXFxjbGVhbmVkZmlsZS50eHQiDQp0ZXh0IDwtIHJlYWRMaW5lcyhmaWxlUGF0aCkNCiMgTG9hZCB0aGUgZGF0YSBhcyBhIGNvcnB1cw0KZG9jcyA8LSBDb3JwdXMoVmVjdG9yU291cmNlKHRleHQpKQ0KICANCnN1bW1hcnkoZG9jcykgDQoNCiMgQ29udmVydCB0aGUgdGV4dCB0byBsb3dlciBjYXNlDQpkb2NzIDwtIHRtX21hcChkb2NzLCBjb250ZW50X3RyYW5zZm9ybWVyKHRvbG93ZXIpKQ0KIyBSZW1vdmUgbnVtYmVycw0KZG9jcyA8LSB0bV9tYXAoZG9jcywgcmVtb3ZlTnVtYmVycykNCiMgUmVtb3ZlIGVuZ2xpc2ggY29tbW9uIHN0b3B3b3Jkcw0KZG9jcyA8LSB0bV9tYXAoZG9jcywgcmVtb3ZlV29yZHMsIHN0b3B3b3JkcygiZW5nbGlzaCIpKQ0KIyBSZW1vdmUgeW91ciBvd24gc3RvcCB3b3JkDQojIHNwZWNpZnkgeW91ciBzdG9wd29yZHMgYXMgYSBjaGFyYWN0ZXIgdmVjdG9yDQpkb2NzIDwtIHRtX21hcChkb2NzLCByZW1vdmVXb3JkcywgYygiYmxhYmxhMSIsICJibGFibGEyIikpIA0KIyBSZW1vdmUgcHVuY3R1YXRpb25zDQpkb2NzIDwtIHRtX21hcChkb2NzLCByZW1vdmVQdW5jdHVhdGlvbikNCiMgRWxpbWluYXRlIGV4dHJhIHdoaXRlIHNwYWNlcw0KZG9jcyA8LSB0bV9tYXAoZG9jcywgc3RyaXBXaGl0ZXNwYWNlKQ0KIyBUZXh0IHN0ZW1taW5nDQojIGRvY3MgPC0gdG1fbWFwKGRvY3MsIHN0ZW1Eb2N1bWVudCkNCmR0bSA8LSBUZXJtRG9jdW1lbnRNYXRyaXgoZG9jcykNCm0gPC0gYXMubWF0cml4KGR0bSkNCnYgPC0gc29ydChyb3dTdW1zKG0pLGRlY3JlYXNpbmc9VFJVRSkNCmQgPC0gZGF0YS5mcmFtZSh3b3JkID0gbmFtZXModiksZnJlcT12KQ0KaGVhZChkLCAxMCkNCnNldC5zZWVkKDEyMzQpDQp3b3JkY2xvdWQod29yZHMgPSBkJHdvcmQsIGZyZXEgPSBkJGZyZXEsIG1pbi5mcmVxID0gMSwNCiAgICAgICAgICBtYXgud29yZHM9MjAwLCByYW5kb20ub3JkZXI9RkFMU0UsIHJvdC5wZXI9MC4zNSwgDQogICAgICAgICAgY29sb3JzPWJyZXdlci5wYWwoOCwgIkRhcmsyIikpDQoNCmBgYA0KDQo=