Page 1

Frame 1

filePath <- “https://raw.githubusercontent.com/ryanmar814/datavizfinal/master/books_publisher.csv” text <- readLines(filePath) docs <- Corpus(VectorSource(text)) toSpace <- content_transformer(function(x, pattern ) gsub(pattern, " “, x)) docs <- tm_map(docs, toSpace,”/“) docs <- tm_map(docs, toSpace,”@“) docs <- tm_map(docs, toSpace,”\|“) docs <- tm_map(docs, content_transformer(tolower)) docs <- tm_map(docs, removeNumbers) docs <- tm_map(docs, removeWords, stopwords(”english“)) docs <- tm_map(docs, removeWords, c (”books“,”publishing“,”company")) #set stopwords? docs <- tm_map(docs, removePunctuation) #docs <- tm_map(docs, stripWhitespace)

dtm <- TermDocumentMatrix(docs) m <- as.matrix(dtm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) head(d, 10)

set.seed(1234) wordcloud(words = d\(word, freq = d\)freq, min.freq = 5, max.words=250, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, “Dark2”))


This is a wordcloud.

Frame 2

filePath <- “https://raw.githubusercontent.com/ryanmar814/datavizfinal/master/books_titles.csv” text <- readLines(filePath) docs <- Corpus(VectorSource(text)) toSpace <- content_transformer(function(x, pattern ) gsub(pattern, " “, x)) docs <- tm_map(docs, toSpace,”/“) docs <- tm_map(docs, toSpace,”@“) docs <- tm_map(docs, toSpace,”\|“) docs <- tm_map(docs, content_transformer(tolower)) docs <- tm_map(docs, removeNumbers) docs <- tm_map(docs, removeWords, stopwords(”english“)) docs <- tm_map(docs, removeWords, c (”part“,”make“,”made")) #set stopwords? docs <- tm_map(docs, removePunctuation) #docs <- tm_map(docs, stripWhitespace)

dtm <- TermDocumentMatrix(docs) m <- as.matrix(dtm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) head(d, 10)

set.seed(1234) wordcloud(words = d\(word, freq = d\)freq, min.freq = 5, max.words=100, random.order=TRUE, rot.per=0.35, colors=brewer.pal(8, “Dark2”))


This is also a word cloud.

rmarkdown::run