Hi all! In this section, we will use packages such as NLP, tm, ggplot2, RColorBrewer & SnowballC to make things more interesting.
Special thanks to Yuhao of NUS ISS for teaching the codes.
Download music_survey.csv here
# load music survey
m <- read.csv("music_survey.csv", stringsAsFactor=FALSE)
# function to normalize text
f <- function(q){# q is survey question
# Loading all required libraries.
library(NLP)
library(tm)
# transform q -> c
v <- VectorSource(q)
c <- VCorpus(v)
# change text to lower cases
c <- tm_map(c, content_transformer(tolower))
# remove numbers and stop words
c <- tm_map(c, removeNumbers)
myStopwords <- c(stopwords('english'), 'the','and')
c <- tm_map(c, removeWords, myStopwords)
# word steming
c <- tm_map(c, stemDocument)
# remove spaces
c <- tm_map(c, stripWhitespace)
# create Document Term Matrix
dtm <- as.matrix(DocumentTermMatrix(c))
# frequency count of words
freq <- colSums(dtm)
data.frame(term = names(freq), count = freq)
}
# getting the frequencies of Q1 & Q2
freq1 <- f(m$Q1..What.do.you.like.most.about.this.portable.music.player.)
f1 <- subset(freq1, freq1$count > 15)
freq2 <- f(m$Q2..What.do.you.like.least.about.this.portable.music.player.)
f2 <- subset(freq2, freq2$count > 15)
# plots
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
ggplot(f1, aes(x = term, y = count)) + geom_bar(stat = "identity") + coord_flip()

ggplot(f2, aes(x = term, y = count)) + geom_bar(stat = "identity") + coord_flip()

# word clouds
library(RColorBrewer)
library(wordcloud)
d2 <- brewer.pal(6, "Dark2")
wordcloud(freq1$term, freq1$count, max.words = 100, rot.per=0.6, colors=d2)

wordcloud(freq2$term, freq2$count, max.words = 100, rot.per=0.6, colors=d2)
