library(tm)
## Loading required package: NLP
library(wordcloud)
## Loading required package: RColorBrewer
management2 = "https://www.gutenberg.org/files/3188/3188-0.txt"
management2 = Corpus(URISource(management2), readerControl = list(language="eng"))
management2 <- tm_map(management2, removePunctuation)
management2 <- tm_map(management2, removeNumbers)
management2 <- tm_map(management2, tolower)
management2 <- tm_map(management2, removeWords, stopwords("english"))
management2 <- tm_map(management2, stemDocument)
management2 <- tm_map(management2, stripWhitespace)
management2 <- tm_map(management2, PlainTextDocument)
inspect(management2)
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 1
##
## [[1]]
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 6630
dtm <- DocumentTermMatrix(management2)
inspect(dtm[1,-10])
## <<DocumentTermMatrix (documents: 1, terms: 383)>>
## Non-/sparse entries: 383/0
## Sparsity : 0%
## Maximal term length: 50
## Weighting : term frequency (tf)
##
## Terms
## Docs accepts accesskey accesskeyhhelpbutton accesskeym
## character(0) 1 1 1 1
## Terms
## Docs accesskeys actionebookssearch
## character(0) 1 1
## Terms
## Docs actionhttpswwwpaypalcomcgibinwebscr actionwcaptchaansw
## character(0) 1 1
## Terms
## Docs againp another around ask audio austen austentd avoid
## character(0) 1 1 1 1 3 1 3 1
## Terms
## Docs background belowp better bodi book books button callback
## character(0) 1 1 1 2 1 4 3 1
## Terms
## Docs canonicalurl captcha captchap cataudiotd charsetutf
## character(0) 1 3 1 1 1
## Terms
## Docs classbadg classbodi classborderless
## character(0) 2 1 1
## Terms
## Docs classcenterhttpwwwgutenbergorgwcaptchaquestionbr
## character(0) 1
## Terms
## Docs classflattrbutton classfoot classhelpbox classhid
## character(0) 1 1 2 1
## Terms
## Docs classhidden classicon classnoprint classnoprintdiv
## character(0) 1 3 1 1
## Terms
## Docs classnoscreen classpaypalbutton classrecaptchaonlyifaudio
## character(0) 1 1 1
## Terms
## Docs classrecaptchaonlyifaudioenter classrecaptchaonlyifimage
## character(0) 1 1
## Terms
## Docs classrecaptchaonlyifimageenter
## character(0) 1
## Terms
## Docs classrecaptchaonlyifincorrectsolincorrect clicked
## character(0) 1 1
## Terms
## Docs closeonescape color colspan content contentebook
## character(0) 1 1 1 1 1
## Terms
## Docs contentenus contentgutenbergnew
## character(0) 1 1
## Terms
## Docs contenthttpwwwgutenbergorgpicslogoxpng
## character(0) 1
## Terms
## Docs contenthttpwwwgutenbergorgwcaptchaquestion contentproject
## character(0) 1 5
## Terms
## Docs contentpublic contentsummary contenttextcss contenttexthtml
## character(0) 1 1 1 1
## Terms
## Docs contentwebsite contentwidthdevicewidth cookie cookies
## character(0) 1 1 1 1
## Terms
## Docs custom customthemewidget dialog dialogmessage dialogtitle
## character(0) 1 1 2 1 1
## Terms
## Docs display div dlg dlgdialog doctype downloaddiv downloadtd
## character(0) 1 24 1 1 1 1 1
## Terms
## Docs ebooks ebookssuggest enabledp enctypemultipartformdata enus
## character(0) 6 1 1 1 2
## Terms
## Docs except fals fast fblang feed flattr form free french
## character(0) 1 2 1 1 1 1 6 6 1
## Terms
## Docs function germantd gutenberg hamlettd head hearp help
## character(0) 2 1 12 1 2 1 1
## Terms
## Docs hrefcsspgdesktoponecss hrefebookssearcha
## character(0) 1 1
## Terms
## Docs hrefebookssearchsortorderreleasedatelatesta
## character(0) 1
## Terms
## Docs hrefhttpsflattrcomthingprojectgutenberg
## character(0) 1
## Terms
## Docs hrefhttpwwwgutenbergorgwcaptchaquestion
## character(0) 1
## Terms
## Docs hrefmgutenbergorgwcaptchaquestionformatmobile
## character(0) 1
## Terms
## Docs hrefpicsappletouchicon hrefpicsfavicon hreftermsofuseterms
## character(0) 1 1 1
## Terms
## Docs hrefwcaptchaquestionformatopds
## character(0) 1
## Terms
## Docs hrefwikigutenbergprojectgutenbergneedsyourdonation
## character(0) 1
## Terms
## Docs hrefwikimainpage hrefwwwgutenbergorgcatalogosdbooksxml html
## character(0) 1 1 3
## Terms
## Docs httpequivcontentlanguage httpequivcontentstyletype
## character(0) 1 1
## Terms
## Docs httpequivcontenttype httpwwwgutenbergorgwcaptchaquest
## character(0) 1 1
## Terms
## Docs httpwwwworgmarkupdtdxhtmlrdfadtd human icon iconflattrspan
## character(0) 1 2 2 1
## Terms
## Docs iconlogospan iconsmsearchspan idcaptcha idcont iddialog
## character(0) 1 1 1 1 1
## Terms
## Docs idfbrootdiv idflattrbadge idhelpbox idhelpbutton
## character(0) 1 1 1 1
## Terms
## Docs idhelpbuttoncel idid idlogo idmenubar idmenubarfirst
## character(0) 1 1 1 1 1
## Terms
## Docs idmenubarsearch idmwheaddummy idneedcookiesproject
## character(0) 1 1 1
## Terms
## Docs idneedjavascriptyou idpaypalbadge idprinthead
## character(0) 1 1 1
## Terms
## Docs idrecaptchaimagediv idrecaptcharesponsefield
## character(0) 1 1
## Terms
## Docs idrecaptchawidget idscreenhead idsearch idsearchbutton
## character(0) 1 1 1 1
## Terms
## Docs idsearchbuttoncel idsearchinput idsearchinputcel
## character(0) 1 1 1
## Terms
## Docs idtaglinebadges idtaglineproject image indicated input ipad
## character(0) 1 1 2 1 10 2
## Terms
## Docs iphone italiantd jane javascript jqueri jquery jquerycookie
## character(0) 2 1 1 1 1 3 1
## Terms
## Docs jquerycookiejquerycooki jqueryuidialog jsonsearch kindle
## character(0) 1 1 1 2
## Terms
## Docs lang langen latest lfckowsaaaaajlqwhpdhzsrkkrbzlhixw lfr
## character(0) 1 23 1 1 1
## Terms
## Docs lgermantd link lit load lot ltentergt lthgt ltsgt main make
## character(0) 1 6 2 1 1 2 1 1 1 1
## Terms
## Docs marginbottom matchtd menu meta methodget methodpost
## character(0) 1 1 1 17 1 2
## Terms
## Docs mgutenbergorgwcaptchaquestionformatmobil mobile mobileurl
## character(0) 1 1 1
## Terms
## Docs modal money moneydonatea msgloadmore nameaudiobutton
## character(0) 1 2 1 1 1
## Terms
## Docs nameclassification namecmd namedescription namehelpbutton
## character(0) 1 1 1 1
## Terms
## Docs namehostedbuttonid nameimagebutton namekeywords namequery
## character(0) 1 1 1 1
## Terms
## Docs namerecaptcharesponsefield namereloadbutton namesubmit
## character(0) 1 1 1
## Terms
## Docs namesubmitbutton nametitle nametwittercard nametwittersite
## character(0) 1 1 1 1
## Terms
## Docs nameviewport need needcookieshide needjavascripthide new
## character(0) 1 2 1 1 1
## Terms
## Docs nofollow non nook norepeat notd numbers offers
## character(0) 1 1 2 1 2 1 4
## Terms
## Docs onclickrecaptchareload onclickrecaptchashowhelp
## character(0) 1 1
## Terms
## Docs onclickrecaptchaswitchtype onrecaptchaload
## character(0) 2 1
## Terms
## Docs onrecaptchaloaded oper pag pagemode paypal penter please
## character(0) 1 1 1 1 1 1 1
## Terms
## Docs press project propertyfbappid propertyogdescription
## character(0) 1 5 1 1
## Terms
## Docs propertyogimage propertyogsitename propertyogtitle
## character(0) 1 1 1
## Terms
## Docs propertyogtype propertyogurl public punctuation put pyou
## character(0) 1 1 1 1 1 1
## Terms
## Docs queryth quite quixotetd really recaptchacreate
## character(0) 1 1 1 1 1
## Terms
## Docs recaptchafocusresponsefield recaptchaonlyifincorrectsol
## character(0) 1 1
## Terms
## Docs recaptchaopt recaptchaoptions recaptcharesponsefield
## character(0) 1 1 1
## Terms
## Docs recaptchawidget relalternate relappletouchicon relcanonical
## character(0) 1 1 1 1
## Terms
## Docs releases relsearch relshortcut relstylesheet require
## character(0) 1 1 1 1 1
## Terms
## Docs resizable resolve results†rowspan rowspanprefixesth
## character(0) 1 1 1 2 1
## Terms
## Docs screen script search seep separated sessionid shakespearetd
## character(0) 1 6 3 1 1 1 3
## Terms
## Docs sitemobilea smallalways spac spaces span
## character(0) 1 1 1 1 3
## Terms
## Docs srcjspgdesktoponejsscript
## character(0) 1
## Terms
## Docs srcwwwgooglecomrecaptchaapijsrecaptchaajaxjsscript
## character(0) 1
## Terms
## Docs srcwwwgutenbergorgpicspaypalenusgif stories storiestd style
## character(0) 1 2 2 4
## Terms
## Docs stylewidth sure tabindex tabl table targetblank tdabout
## character(0) 1 1 5 7 3 1 1
## Terms
## Docs tdaudio tdauthortd tdby tdcategorytd tdebook tdexact
## character(0) 1 1 2 1 3 1
## Terms
## Docs tdgroupingtd tdhamlet tdjuvenile tdlanguagetd tdlove
## character(0) 1 1 1 1 2
## Terms
## Docs tdnottd tdortd tdqui tdsubjecttd tdtitletd terms theme
## character(0) 1 1 1 1 1 2 1
## Terms
## Docs thesesmal thfindsth thisp thsuffixesth ththis titleare
## character(0) 1 1 1 1 1 1
## Terms
## Docs titlecaptchatitl titleexecute titlego titlelearn titleopds
## character(0) 1 1 2 1 1
## Terms
## Docs titleopen titleour titleread titlesearch titlesend
## character(0) 1 1 1 2 2
## Terms
## Docs titlestart today transparent tru try
## character(0) 1 1 1 1 1
## Terms
## Docs typeapplicationatomxmlprofileopdscatalog
## character(0) 1
## Terms
## Docs typeapplicationopensearchdescriptionxml typebutton
## character(0) 1 5
## Terms
## Docs typehidden typeimage typesubmit typetext typetextcss
## character(0) 2 1 2 2 3
## Terms
## Docs typetextjavascript typetextjavascriptcdata
## character(0) 3 1
## Terms
## Docs uidialogtitlebarclose urlpicsspritepng use usea used value
## character(0) 1 1 1 1 1 1
## Terms
## Docs valueget valuehelp valuesubmit valuesxclick
## character(0) 3 1 1 1
## Terms
## Docs valuexkalbzlypsn var verne wcdtd width words works
## character(0) 1 11 1 1 2 1 1
## Terms
## Docs xhtmlrdfa xmllangen xmllangenashakespearetd xmllangenath
## character(0) 1 3 1 1
## Terms
## Docs xmllangencatth xmllangenjane xmllangenjuvenile
## character(0) 1 1 1
## Terms
## Docs xmllangenlove xmllangenlth xmllangennth xmllangenquitd
## character(0) 2 1 1 1
## Terms
## Docs xmllangenshakespeare xmllangensshakespearetd xmllangensth
## character(0) 1 1 1
## Terms
## Docs xmllangentd xmllangenth xmllangentth xmllangenverne
## character(0) 1 4 1 1
## Terms
## Docs xmlnshttpwwwworgxhtml xxxx
## character(0) 1 1
tdm <- TermDocumentMatrix(management2)
inspect(tdm)
## <<TermDocumentMatrix (terms: 384, documents: 1)>>
## Non-/sparse entries: 384/0
## Sparsity : 0%
## Maximal term length: 50
## Weighting : term frequency (tf)
##
## Docs
## Terms character(0)
## accepts 1
## accesskey 1
## accesskeyhhelpbutton 1
## accesskeym 1
## accesskeys 1
## actionebookssearch 1
## actionhttpswwwpaypalcomcgibinwebscr 1
## actionwcaptchaansw 1
## againp 1
## android 2
## another 1
## around 1
## ask 1
## audio 3
## austen 1
## austentd 3
## avoid 1
## background 1
## belowp 1
## better 1
## bodi 2
## book 1
## books 4
## button 3
## callback 1
## canonicalurl 1
## captcha 3
## captchap 1
## cataudiotd 1
## charsetutf 1
## classbadg 2
## classbodi 1
## classborderless 1
## classcenterhttpwwwgutenbergorgwcaptchaquestionbr 1
## classflattrbutton 1
## classfoot 1
## classhelpbox 2
## classhid 1
## classhidden 1
## classicon 3
## classnoprint 1
## classnoprintdiv 1
## classnoscreen 1
## classpaypalbutton 1
## classrecaptchaonlyifaudio 1
## classrecaptchaonlyifaudioenter 1
## classrecaptchaonlyifimage 1
## classrecaptchaonlyifimageenter 1
## classrecaptchaonlyifincorrectsolincorrect 1
## clicked 1
## closeonescape 1
## color 1
## colspan 1
## content 1
## contentebook 1
## contentenus 1
## contentgutenbergnew 1
## contenthttpwwwgutenbergorgpicslogoxpng 1
## contenthttpwwwgutenbergorgwcaptchaquestion 1
## contentproject 5
## contentpublic 1
## contentsummary 1
## contenttextcss 1
## contenttexthtml 1
## contentwebsite 1
## contentwidthdevicewidth 1
## cookie 1
## cookies 1
## custom 1
## customthemewidget 1
## dialog 2
## dialogmessage 1
## dialogtitle 1
## display 1
## div 24
## dlg 1
## dlgdialog 1
## doctype 1
## downloaddiv 1
## downloadtd 1
## ebooks 6
## ebookssuggest 1
## enabledp 1
## enctypemultipartformdata 1
## enus 2
## except 1
## fals 2
## fast 1
## fblang 1
## feed 1
## flattr 1
## form 6
## free 6
## french 1
## function 2
## germantd 1
## gutenberg 12
## hamlettd 1
## head 2
## hearp 1
## help 1
## hrefcsspgdesktoponecss 1
## hrefebookssearcha 1
## hrefebookssearchsortorderreleasedatelatesta 1
## hrefhttpsflattrcomthingprojectgutenberg 1
## hrefhttpwwwgutenbergorgwcaptchaquestion 1
## hrefmgutenbergorgwcaptchaquestionformatmobile 1
## hrefpicsappletouchicon 1
## hrefpicsfavicon 1
## hreftermsofuseterms 1
## hrefwcaptchaquestionformatopds 1
## hrefwikigutenbergprojectgutenbergneedsyourdonation 1
## hrefwikimainpage 1
## hrefwwwgutenbergorgcatalogosdbooksxml 1
## html 3
## httpequivcontentlanguage 1
## httpequivcontentstyletype 1
## httpequivcontenttype 1
## httpwwwgutenbergorgwcaptchaquest 1
## httpwwwworgmarkupdtdxhtmlrdfadtd 1
## human 2
## icon 2
## iconflattrspan 1
## iconlogospan 1
## iconsmsearchspan 1
## idcaptcha 1
## idcont 1
## iddialog 1
## idfbrootdiv 1
## idflattrbadge 1
## idhelpbox 1
## idhelpbutton 1
## idhelpbuttoncel 1
## idid 1
## idlogo 1
## idmenubar 1
## idmenubarfirst 1
## idmenubarsearch 1
## idmwheaddummy 1
## idneedcookiesproject 1
## idneedjavascriptyou 1
## idpaypalbadge 1
## idprinthead 1
## idrecaptchaimagediv 1
## idrecaptcharesponsefield 1
## idrecaptchawidget 1
## idscreenhead 1
## idsearch 1
## idsearchbutton 1
## idsearchbuttoncel 1
## idsearchinput 1
## idsearchinputcel 1
## idtaglinebadges 1
## idtaglineproject 1
## image 2
## indicated 1
## input 10
## ipad 2
## iphone 2
## italiantd 1
## jane 1
## javascript 1
## jqueri 1
## jquery 3
## jquerycookie 1
## jquerycookiejquerycooki 1
## jqueryuidialog 1
## jsonsearch 1
## kindle 2
## lang 1
## langen 23
## latest 1
## lfckowsaaaaajlqwhpdhzsrkkrbzlhixw 1
## lfr 1
## lgermantd 1
## link 6
## lit 2
## load 1
## lot 1
## ltentergt 2
## lthgt 1
## ltsgt 1
## main 1
## make 1
## marginbottom 1
## matchtd 1
## menu 1
## meta 17
## methodget 1
## methodpost 2
## mgutenbergorgwcaptchaquestionformatmobil 1
## mobile 1
## mobileurl 1
## modal 1
## money 2
## moneydonatea 1
## msgloadmore 1
## nameaudiobutton 1
## nameclassification 1
## namecmd 1
## namedescription 1
## namehelpbutton 1
## namehostedbuttonid 1
## nameimagebutton 1
## namekeywords 1
## namequery 1
## namerecaptcharesponsefield 1
## namereloadbutton 1
## namesubmit 1
## namesubmitbutton 1
## nametitle 1
## nametwittercard 1
## nametwittersite 1
## nameviewport 1
## need 2
## needcookieshide 1
## needjavascripthide 1
## new 1
## nofollow 1
## non 1
## nook 2
## norepeat 1
## notd 2
## numbers 1
## offers 4
## onclickrecaptchareload 1
## onclickrecaptchashowhelp 1
## onclickrecaptchaswitchtype 2
## onrecaptchaload 1
## onrecaptchaloaded 1
## oper 1
## pag 1
## pagemode 1
## paypal 1
## penter 1
## please 1
## press 1
## project 5
## propertyfbappid 1
## propertyogdescription 1
## propertyogimage 1
## propertyogsitename 1
## propertyogtitle 1
## propertyogtype 1
## propertyogurl 1
## public 1
## punctuation 1
## put 1
## pyou 1
## queryth 1
## quite 1
## quixotetd 1
## really 1
## recaptchacreate 1
## recaptchafocusresponsefield 1
## recaptchaonlyifincorrectsol 1
## recaptchaopt 1
## recaptchaoptions 1
## recaptcharesponsefield 1
## recaptchawidget 1
## relalternate 1
## relappletouchicon 1
## relcanonical 1
## releases 1
## relsearch 1
## relshortcut 1
## relstylesheet 1
## require 1
## resizable 1
## resolve 1
## results†1
## rowspan 2
## rowspanprefixesth 1
## screen 1
## script 6
## search 3
## seep 1
## separated 1
## sessionid 1
## shakespearetd 3
## sitemobilea 1
## smallalways 1
## spac 1
## spaces 1
## span 3
## srcjspgdesktoponejsscript 1
## srcwwwgooglecomrecaptchaapijsrecaptchaajaxjsscript 1
## srcwwwgutenbergorgpicspaypalenusgif 1
## stories 2
## storiestd 2
## style 4
## stylewidth 1
## sure 1
## tabindex 5
## tabl 7
## table 3
## targetblank 1
## tdabout 1
## tdaudio 1
## tdauthortd 1
## tdby 2
## tdcategorytd 1
## tdebook 3
## tdexact 1
## tdgroupingtd 1
## tdhamlet 1
## tdjuvenile 1
## tdlanguagetd 1
## tdlove 2
## tdnottd 1
## tdortd 1
## tdqui 1
## tdsubjecttd 1
## tdtitletd 1
## terms 2
## theme 1
## thesesmal 1
## thfindsth 1
## thisp 1
## thsuffixesth 1
## ththis 1
## titleare 1
## titlecaptchatitl 1
## titleexecute 1
## titlego 2
## titlelearn 1
## titleopds 1
## titleopen 1
## titleour 1
## titleread 1
## titlesearch 2
## titlesend 2
## titlestart 1
## today 1
## transparent 1
## tru 1
## try 1
## typeapplicationatomxmlprofileopdscatalog 1
## typeapplicationopensearchdescriptionxml 1
## typebutton 5
## typehidden 2
## typeimage 1
## typesubmit 2
## typetext 2
## typetextcss 3
## typetextjavascript 3
## typetextjavascriptcdata 1
## uidialogtitlebarclose 1
## urlpicsspritepng 1
## use 1
## usea 1
## used 1
## value 1
## valueget 3
## valuehelp 1
## valuesubmit 1
## valuesxclick 1
## valuexkalbzlypsn 1
## var 11
## verne 1
## wcdtd 1
## width 2
## words 1
## works 1
## xhtmlrdfa 1
## xmllangen 3
## xmllangenashakespearetd 1
## xmllangenath 1
## xmllangencatth 1
## xmllangenjane 1
## xmllangenjuvenile 1
## xmllangenlove 2
## xmllangenlth 1
## xmllangennth 1
## xmllangenquitd 1
## xmllangenshakespeare 1
## xmllangensshakespearetd 1
## xmllangensth 1
## xmllangentd 1
## xmllangenth 4
## xmllangentth 1
## xmllangenverne 1
## xmlnshttpwwwworgxhtml 1
## xxxx 1
inspect(tdm[1:30,1])
## <<TermDocumentMatrix (terms: 30, documents: 1)>>
## Non-/sparse entries: 30/0
## Sparsity : 0%
## Maximal term length: 35
## Weighting : term frequency (tf)
##
## Docs
## Terms character(0)
## accepts 1
## accesskey 1
## accesskeyhhelpbutton 1
## accesskeym 1
## accesskeys 1
## actionebookssearch 1
## actionhttpswwwpaypalcomcgibinwebscr 1
## actionwcaptchaansw 1
## againp 1
## android 2
## another 1
## around 1
## ask 1
## audio 3
## austen 1
## austentd 3
## avoid 1
## background 1
## belowp 1
## better 1
## bodi 2
## book 1
## books 4
## button 3
## callback 1
## canonicalurl 1
## captcha 3
## captchap 1
## cataudiotd 1
## charsetutf 1
matx1=as.matrix(tdm)
matx1[1:10]
## [1] 1 1 1 1 1 1 1 1 1 2
sort2=sort(rowSums(matx1),decreasing=T)
sort2[1:10]
## div langen meta gutenberg var input tabl
## 24 23 17 12 11 10 7
## ebooks form free
## 6 6 6
di1=data.frame(Word=names(sort2),Frequency=sort2)
di1[1:10,]
## Word Frequency
## div div 24
## langen langen 23
## meta meta 17
## gutenberg gutenberg 12
## var var 11
## input input 10
## tabl tabl 7
## ebooks ebooks 6
## form form 6
## free free 6
wordcloud(di1$Word, di1$Frequency, max.words=100,colors=brewer.pal(6, "Dark2"))
library(wordcloud2)
library(wordcloud)