Section-1
§ 1.1
corpus = Corpus(VectorSource(tw$Tweet))
corpus = tm_map(corpus, content_transformer(tolower))
transformation drops documents
corpus = tm_map(corpus, removePunctuation)
transformation drops documents
corpus = tm_map(corpus, removeWords, stopwords("english"))
transformation drops documents
# corpus = tm_map(corpus, stemDocument)
dtm = DocumentTermMatrix(corpus)
# dtm = removeSparseTerms(dtm, 0.995); dtm
How many unique words are there across all the documents?
dim(dtm)
[1] 1181 3780
print("3780")
[1] "3780"
§ 1.2 What is the most compelling rationale for skipping this step when visualizing text data?
print("It will be easier to read and understand the word cloud if it includes full words instead of just the word stems")
[1] "It will be easier to read and understand the word cloud if it includes full words instead of just the word stems"
Section-2
§ 2.1 Which function can we apply to allTweets to get a vector of the words in our dataset, which we’ll pass as the first argument to wordcloud()?
print("colnames")
[1] "colnames"
§ 2.2 Which function should we apply to allTweets to obtain the frequency of each word across all tweets?
print("colSums")
[1] "colSums"
§ 2.3 What is the most common word across all the tweets (it will be the largest in the outputted word cloud)?
wordcloud(colnames(dtm), col_sums(dtm), scale=c(2, 0.25))

print("apple")
[1] "apple"
§ 2.4 Create a word cloud with the updated corpus. What is the most common word in this new corpus (the largest word in the outputted word cloud)?
corpus = Corpus(VectorSource(tw$Tweet))
corpus = tm_map(corpus, tolower)
transformation drops documents
corpus = tm_map(corpus, removePunctuation)
transformation drops documents
corpus = tm_map(corpus, removeWords,
c('apple',stopwords("english")) )
transformation drops documents
dtm = DocumentTermMatrix(corpus)
wordcloud(colnames(dtm), col_sums(dtm), scale=c(2, 0.25))

print("iphone")
[1] "iphone"
Section-3
Word Cloud A:
wordcloud(colnames(dtm), col_sums(dtm), scale=c(3, 0.4),
rot.per=0.5)

Word Cloud B:
wordcloud(colnames(dtm), col_sums(dtm), scale=c(3, 0.4),
min.freq=8, random.order=F) # B

Word Cloud C:
dtm1 = dtm[tw$Avg <= -1,]
wordcloud(colnames(dtm1), col_sums(dtm1), scale=c(3, 0.4),
colors = brewer.pal(9,"Purples")[6:9] ) # C

Word Cloud D:
wordcloud(colnames(dtm), col_sums(dtm), scale=c(3, 0.4),
min.freq=8, random.order=F, random.color=T,
colors = brewer.pal(9,"Purples")[6:9] ) # D

§ 3.1 Which word cloud is based only on the negative tweets (tweets with Avg value -1 or less)?
print("Word Cloud C")
[1] "Word Cloud C"
§ 3.2 Only one word cloud was created without modifying parameters min.freq or max.words. Which word cloud is this?
print("Word Cloud A")
[1] "Word Cloud A"
§ 3.3 Which word clouds were created with parameter random.order set to FALSE?
print("Word Cloud B , Word Cloud D")
[1] "Word Cloud B , Word Cloud D"
§ 3.4 Which word cloud was built with a non-default value for parameter rot.per?
print("Word Cloud A")
[1] "Word Cloud A"
§ 3.5 For which word cloud was the parameter random.color set to TRUE?
print("Word Cloud D")
[1] "Word Cloud D"
Section-4
§ 4.1 Which color palette would be most appropriate for use in a word cloud for which we want to use color to indicate word frequency?
library(RColorBrewer)
display.brewer.all()

§ 4.2 Which RColorBrewer palette name would be most appropriate to use when preparing an image for a document that must be in grayscale?
display.brewer.pal(7, "Greys")

print("Greys")
[1] "Greys"
§ 4.3 Which of the following commands addresses this issue by removing the first 4 elements of the 9-color palette of blue colors? Select all that apply.
brewer.pal(9, "Blues")[c(5,6,7,8,9)]
[1] "#6BAED6" "#4292C6" "#2171B5" "#08519C" "#08306B"
brewer.pal(9, "Blues")[c(-1,-2,-3,-4)]
[1] "#6BAED6" "#4292C6" "#2171B5" "#08519C" "#08306B"
LS0tDQp0aXRsZTogIkFTNy0zIFZpc3VhbGl6aW5nIFRleHQgRGF0YSBVc2luZyBXb3JkIENsb3VkcyINCmF1dGhvcjogIumZs+ato+isgCBsb3Vpc2FuMTIzIDIwMTgvMDcvMjkiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIyMjICBTZWN0aW9uLTENCl9fwqcgMS4xX18gDQpgYGB7cn0NCmNvcnB1cyA9IENvcnB1cyhWZWN0b3JTb3VyY2UodHckVHdlZXQpKQ0KY29ycHVzID0gdG1fbWFwKGNvcnB1cywgY29udGVudF90cmFuc2Zvcm1lcih0b2xvd2VyKSkNCmNvcnB1cyA9IHRtX21hcChjb3JwdXMsIHJlbW92ZVB1bmN0dWF0aW9uKQ0KY29ycHVzID0gdG1fbWFwKGNvcnB1cywgcmVtb3ZlV29yZHMsIHN0b3B3b3JkcygiZW5nbGlzaCIpKQ0KIyBjb3JwdXMgPSB0bV9tYXAoY29ycHVzLCBzdGVtRG9jdW1lbnQpDQpkdG0gPSBEb2N1bWVudFRlcm1NYXRyaXgoY29ycHVzKQ0KIyBkdG0gPSByZW1vdmVTcGFyc2VUZXJtcyhkdG0sIDAuOTk1KTsgZHRtDQpgYGANCg0KSG93IG1hbnkgdW5pcXVlIHdvcmRzIGFyZSB0aGVyZSBhY3Jvc3MgYWxsIHRoZSBkb2N1bWVudHM/DQpgYGB7cn0NCmRpbShkdG0pDQpwcmludCgiMzc4MCIpDQpgYGANCg0KX1/CpyAxLjJfXyANCldoYXQgaXMgdGhlIG1vc3QgY29tcGVsbGluZyByYXRpb25hbGUgZm9yIHNraXBwaW5nIHRoaXMgc3RlcCB3aGVuIHZpc3VhbGl6aW5nIHRleHQgZGF0YT8NCmBgYHtyfQ0KcHJpbnQoIkl0IHdpbGwgYmUgZWFzaWVyIHRvIHJlYWQgYW5kIHVuZGVyc3RhbmQgdGhlIHdvcmQgY2xvdWQgaWYgaXQgaW5jbHVkZXMgZnVsbCB3b3JkcyBpbnN0ZWFkIG9mIGp1c3QgdGhlIHdvcmQgc3RlbXMiKQ0KYGBgDQoNCiMjIyMgIFNlY3Rpb24tMg0KX1/CpyAyLjFfXw0KV2hpY2ggZnVuY3Rpb24gY2FuIHdlIGFwcGx5IHRvIGFsbFR3ZWV0cyB0byBnZXQgYSB2ZWN0b3Igb2YgdGhlIHdvcmRzIGluIG91ciBkYXRhc2V0LCB3aGljaCB3ZSdsbCBwYXNzIGFzIHRoZSBmaXJzdCBhcmd1bWVudCB0byB3b3JkY2xvdWQoKT8NCmBgYHtyfQ0KcHJpbnQoImNvbG5hbWVzIikNCmBgYA0KDQpfX8KnIDIuMl9fDQpXaGljaCBmdW5jdGlvbiBzaG91bGQgd2UgYXBwbHkgdG8gYWxsVHdlZXRzIHRvIG9idGFpbiB0aGUgZnJlcXVlbmN5IG9mIGVhY2ggd29yZCBhY3Jvc3MgYWxsIHR3ZWV0cz8NCmBgYHtyfQ0KcHJpbnQoImNvbFN1bXMiKQ0KYGBgDQoNCl9fwqcgMi4zX18NCldoYXQgaXMgdGhlIG1vc3QgY29tbW9uIHdvcmQgYWNyb3NzIGFsbCB0aGUgdHdlZXRzIChpdCB3aWxsIGJlIHRoZSBsYXJnZXN0IGluIHRoZSBvdXRwdXR0ZWQgd29yZCBjbG91ZCk/DQpgYGB7cn0NCndvcmRjbG91ZChjb2xuYW1lcyhkdG0pLCBjb2xfc3VtcyhkdG0pLCBzY2FsZT1jKDIsIDAuMjUpKQ0KcHJpbnQoImFwcGxlIikNCmBgYA0KDQpfX8KnIDIuNF9fDQpDcmVhdGUgYSB3b3JkIGNsb3VkIHdpdGggdGhlIHVwZGF0ZWQgY29ycHVzLiBXaGF0IGlzIHRoZSBtb3N0IGNvbW1vbiB3b3JkIGluIHRoaXMgbmV3IGNvcnB1cyAodGhlIGxhcmdlc3Qgd29yZCBpbiB0aGUgb3V0cHV0dGVkIHdvcmQgY2xvdWQpPyANCmBgYHtyfQ0KY29ycHVzID0gQ29ycHVzKFZlY3RvclNvdXJjZSh0dyRUd2VldCkpDQpjb3JwdXMgPSB0bV9tYXAoY29ycHVzLCB0b2xvd2VyKQ0KY29ycHVzID0gdG1fbWFwKGNvcnB1cywgcmVtb3ZlUHVuY3R1YXRpb24pDQpjb3JwdXMgPSB0bV9tYXAoY29ycHVzLCByZW1vdmVXb3JkcywgDQogICAgICAgICAgICAgICAgYygnYXBwbGUnLHN0b3B3b3JkcygiZW5nbGlzaCIpKSApDQpkdG0gPSBEb2N1bWVudFRlcm1NYXRyaXgoY29ycHVzKQ0Kd29yZGNsb3VkKGNvbG5hbWVzKGR0bSksIGNvbF9zdW1zKGR0bSksIHNjYWxlPWMoMiwgMC4yNSkpDQpgYGANCmBgYHtyfQ0KcHJpbnQoImlwaG9uZSIpDQpgYGANCg0KDQojIyMjICBTZWN0aW9uLTMNCioqV29yZCBDbG91ZCBBOioqDQpgYGB7ciBmaWcuaGVpZ2h0PTYsIGZpZy53aWR0aD02fQ0Kd29yZGNsb3VkKGNvbG5hbWVzKGR0bSksIGNvbF9zdW1zKGR0bSksIHNjYWxlPWMoMywgMC40KSwNCiAgICAgICAgICByb3QucGVyPTAuNSkgDQpgYGANCg0KKipXb3JkIENsb3VkIEI6KioNCmBgYHtyIGZpZy5oZWlnaHQ9NCwgZmlnLndpZHRoPTR9DQp3b3JkY2xvdWQoY29sbmFtZXMoZHRtKSwgY29sX3N1bXMoZHRtKSwgc2NhbGU9YygzLCAwLjQpLA0KICAgICAgICAgIG1pbi5mcmVxPTgsIHJhbmRvbS5vcmRlcj1GKSAgICAgIyBCDQpgYGANCg0KKipXb3JkIENsb3VkIEM6KioNCmBgYHtyIGZpZy5oZWlnaHQ9NCwgZmlnLndpZHRoPTR9DQpkdG0xID0gZHRtW3R3JEF2ZyA8PSAtMSxdDQp3b3JkY2xvdWQoY29sbmFtZXMoZHRtMSksIGNvbF9zdW1zKGR0bTEpLCBzY2FsZT1jKDMsIDAuNCksDQogICAgICAgICAgY29sb3JzID0gYnJld2VyLnBhbCg5LCJQdXJwbGVzIilbNjo5XSApICMgQw0KYGBgDQoNCioqV29yZCBDbG91ZCBEOioqDQpgYGB7ciBmaWcuaGVpZ2h0PTQsIGZpZy53aWR0aD00fQ0Kd29yZGNsb3VkKGNvbG5hbWVzKGR0bSksIGNvbF9zdW1zKGR0bSksIHNjYWxlPWMoMywgMC40KSwNCiAgbWluLmZyZXE9OCwgcmFuZG9tLm9yZGVyPUYsIHJhbmRvbS5jb2xvcj1ULA0KICBjb2xvcnMgPSBicmV3ZXIucGFsKDksIlB1cnBsZXMiKVs2OjldICkgIyBEDQpgYGANCl9fwqcgMy4xX18NCldoaWNoIHdvcmQgY2xvdWQgaXMgYmFzZWQgb25seSBvbiB0aGUgbmVnYXRpdmUgdHdlZXRzICh0d2VldHMgd2l0aCBBdmcgdmFsdWUgLTEgb3IgbGVzcyk/DQpgYGB7cn0NCnByaW50KCJXb3JkIENsb3VkIEMiKQ0KYGBgDQoNCl9fwqcgMy4yX18NCk9ubHkgb25lIHdvcmQgY2xvdWQgd2FzIGNyZWF0ZWQgd2l0aG91dCBtb2RpZnlpbmcgcGFyYW1ldGVycyBtaW4uZnJlcSBvciBtYXgud29yZHMuIFdoaWNoIHdvcmQgY2xvdWQgaXMgdGhpcz8NCmBgYHtyfQ0KcHJpbnQoIldvcmQgQ2xvdWQgQSIpDQpgYGANCg0KX1/CpyAzLjNfXw0KV2hpY2ggd29yZCBjbG91ZHMgd2VyZSBjcmVhdGVkIHdpdGggcGFyYW1ldGVyIHJhbmRvbS5vcmRlciBzZXQgdG8gRkFMU0U/DQpgYGB7cn0NCnByaW50KCJXb3JkIENsb3VkIEIgLCBXb3JkIENsb3VkIEQiKQ0KYGBgDQoNCl9fwqcgMy40X18NCldoaWNoIHdvcmQgY2xvdWQgd2FzIGJ1aWx0IHdpdGggYSBub24tZGVmYXVsdCB2YWx1ZSBmb3IgcGFyYW1ldGVyIHJvdC5wZXI/DQpgYGB7cn0NCnByaW50KCJXb3JkIENsb3VkIEEiKQ0KYGBgDQoNCl9fwqcgMy41X18NCkZvciB3aGljaCB3b3JkIGNsb3VkIHdhcyB0aGUgcGFyYW1ldGVyIHJhbmRvbS5jb2xvciBzZXQgdG8gVFJVRT8NCmBgYHtyfQ0KcHJpbnQoIldvcmQgQ2xvdWQgRCIpDQpgYGANCg0KIyMjIyAgU2VjdGlvbi00DQpfX8KnIDQuMV9fDQpXaGljaCBjb2xvciBwYWxldHRlIHdvdWxkIGJlIG1vc3QgYXBwcm9wcmlhdGUgZm9yIHVzZSBpbiBhIHdvcmQgY2xvdWQgZm9yIHdoaWNoIHdlIHdhbnQgdG8gdXNlIGNvbG9yIHRvIGluZGljYXRlIHdvcmQgZnJlcXVlbmN5Pw0KYGBge3J9DQpsaWJyYXJ5KFJDb2xvckJyZXdlcikNCmRpc3BsYXkuYnJld2VyLmFsbCgpDQpwcmludCgiWWxPclJkIikNCmBgYA0KDQpfX8KnIDQuMl9fDQpXaGljaCBSQ29sb3JCcmV3ZXIgcGFsZXR0ZSBuYW1lIHdvdWxkIGJlIG1vc3QgYXBwcm9wcmlhdGUgdG8gdXNlIHdoZW4gcHJlcGFyaW5nIGFuIGltYWdlIGZvciBhIGRvY3VtZW50IHRoYXQgbXVzdCBiZSBpbiBncmF5c2NhbGU/DQpgYGB7cn0NCmRpc3BsYXkuYnJld2VyLnBhbCg3LCAiR3JleXMiKQ0KcHJpbnQoIkdyZXlzIikNCmBgYA0KDQpfX8KnIDQuM19fDQpXaGljaCBvZiB0aGUgZm9sbG93aW5nIGNvbW1hbmRzIGFkZHJlc3NlcyB0aGlzIGlzc3VlIGJ5IHJlbW92aW5nIHRoZSBmaXJzdCA0IGVsZW1lbnRzIG9mIHRoZSA5LWNvbG9yIHBhbGV0dGUgb2YgYmx1ZSBjb2xvcnM/IFNlbGVjdCBhbGwgdGhhdCBhcHBseS4NCmBgYHtyfQ0KYnJld2VyLnBhbCg5LCAiQmx1ZXMiKVtjKDUsNiw3LDgsOSldDQpgYGANCg0KYGBge3J9DQpicmV3ZXIucGFsKDksICJCbHVlcyIpW2MoLTEsLTIsLTMsLTQpXSANCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=