## Code to produce a wordcloud from Duncan Selbie's 2016 Friday messages

## Install R libraries
if(!require(tm))install.packages("tm")
## Loading required package: tm
## Loading required package: NLP
library(tm)
library(tidytext)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#load library
if(!require(SnowballC))install.packages("SnowballC")
## Loading required package: SnowballC
library(SnowballC)
if(!require(wordcloud))install.packages("SnowballC")
## Loading required package: wordcloud
## Loading required package: RColorBrewer
library(wordcloud)

## Import and clean text files
cname <- file.path("~", "Documents/R_projects/feeback/data/texts", "texts1")
docs <- Corpus(DirSource("~/Documents/R_Projects/feeback/data/texts/texts1"))
docs <- docs[4:32]

for(j in seq(docs))   
{   
  docs[[j]] <- gsub("/", " ", docs[[j]])   
  docs[[j]] <- gsub("@", " ", docs[[j]])   
  docs[[j]] <- gsub("\\|", " ", docs[[j]]) 
  docs[[j]] <- gsub("<", " ", docs[[j]])
}  


docs <- tm_map(docs, removeNumbers)
docs <- tm_map(docs, removeWords, stopwords('english'))

docs <- tm_map(docs, removePunctuation)
docs <- tm_map(docs, stripWhitespace)
docs <- tm_map(docs, tolower)
docs <- tm_map(docs, PlainTextDocument)

## Create document term matrix

dtm <- DocumentTermMatrix(docs)

## Create data frame
dstidy <- tidy(docs)

## Select fields
dstidy <- dstidy %>%
  mutate(doc = row_number()) %>%
  select(doc, text) 

## Count words
dswords <- dstidy %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words) %>%
  count(word, sort = TRUE)
## Joining, by = "word"
## Create word cloud
dswords %>% 
  with(wordcloud (word, n, min.freq = 10, max.words = 150, colors = brewer.pal(8, "Dark2")))