This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

Install necessary packages. Comment after installation

#install.packages('tm')
#install.packages('RColorBrewer')
#install.packages('wordcloud')

Include the packages.

library('tm')
## Loading required package: NLP
library('RColorBrewer')
library('wordcloud')

Process data

EntreleadershipData <- readRDS("Entreleadership.RDS")
Etweets <- EntreleadershipData$text


# swap out all non-alphanumeric characters
# Note that the definition of what constitutes a letter or a number or a punctuatution mark varies slightly depending upon your locale, so you may need to experiment a little to get exactly what you want.
# str_replace_all(tweets, "[^[:alnum:]]", " ")
# iconv(tweets, from = 'UTF-8', to = 'ASCII//TRANSLIT')
# Encoding(Etweets)  <- "UTF-8"

# Function to clean tweets
clean.text = function(x)
{
  # remove rt
  x = gsub("rt", "", x)
  # remove at
  x = gsub("@\\w+", "", x)
  # remove punctuation
  x = gsub("[[:punct:]]", "", x)
  # remove numbers
  x = gsub("[[:digit:]]", "", x)
  # remove links http
  x = gsub("http\\w+", "", x)
  # remove tabs
  x = gsub("[ |\t]{2,}", "", x)
  # remove blank spaces at the beginning
  x = gsub("^ ", "", x)
  # remove blank spaces at the end
  x = gsub(" $", "", x)
  # tolower
  x = tolower(x)
  return(x)
}

# clean tweets
Etweets = clean.text(Etweets)

# Word cloud section
corpus = Corpus(VectorSource(Etweets))

# create term-document matrix
tdm = TermDocumentMatrix(
  corpus,
  control = list(
    wordLengths=c(3,20),
    removePunctuation = TRUE,
    stopwords = c("the", "a", stopwords("english")),
    removeNumbers = TRUE, 
# tolower may cause trouble on Window because UTF-8 encoding, changed to FALSE  
    tolower = FALSE) )

# convert as matrix. It may consume near 1g of your RAM
tdm = as.matrix(tdm)

# get word counts in decreasing order
word_freqs = sort(rowSums(tdm), decreasing=TRUE) 

#check top 50 most mentioned words
head(word_freqs, 50)
##           people            rtthe    communication            great 
##               55               48               44               36 
##             team             will          caliber            goals 
##               34               30               29               29 
##        intensity            match          leaders            right 
##               29               29               25               23 
##           right…       determines             gold              hat 
##               22               22               22               22 
##         received              way             wear            from… 
##               22               22               22               21 
## <U+0001F40E>entreleadership          doesn’t            event             it’s 
##               20               19               15               15 
##             move         business  entreleadership           action 
##               15               14               14               14 
##          changes          emotion            moves      necessarily 
##               14               13               13               13 
##               p…             dont       everything             miss 
##               13               13               13               13 
##     weekspodcast            light              get                … 
##               13               13               12               12 
##         action”…            gears     conversation          rtsolid 
##               12               12               12               12 
##             rtdo        challenge   rt“information              amp 
##               12               11               11               10 
##   intentionality         building 
##               10                9
#remove the top words which do not generate insights such as "the", "a", "and", etc.
#word_freqs = word_freqs[-(1:5)]  #Here [-(1:5)] is 1st-5th words in the list we want to remove 
#commented because in this case we want to keep the first to fifth words in the list
#create a data frame with words and their frequencies
dm = data.frame(word=names(word_freqs), freq=word_freqs)

#Plot corpus in a clored graph; need RColorBrewer package

wordcloud(head(dm$word, 50), head(dm$freq, 50), random.order=FALSE, colors=brewer.pal(8, "Dark2"))

#check top 50 most mentioned words
head(word_freqs, 50)
##           people            rtthe    communication            great 
##               55               48               44               36 
##             team             will          caliber            goals 
##               34               30               29               29 
##        intensity            match          leaders            right 
##               29               29               25               23 
##           right…       determines             gold              hat 
##               22               22               22               22 
##         received              way             wear            from… 
##               22               22               22               21 
## <U+0001F40E>entreleadership          doesn’t            event             it’s 
##               20               19               15               15 
##             move         business  entreleadership           action 
##               15               14               14               14 
##          changes          emotion            moves      necessarily 
##               14               13               13               13 
##               p…             dont       everything             miss 
##               13               13               13               13 
##     weekspodcast            light              get                … 
##               13               13               12               12 
##         action”…            gears     conversation          rtsolid 
##               12               12               12               12 
##             rtdo        challenge   rt“information              amp 
##               12               11               11               10 
##   intentionality         building 
##               10                9
#------------------------------------Wait until data runs first time then parse it

Bigram

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)      # data manipulation & plotting
## -- Attaching packages ---------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0     v readr   1.3.1
## v tibble  2.0.1     v purrr   0.3.0
## v tidyr   0.8.2     v stringr 1.3.1
## v ggplot2 3.1.0     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x dplyr::filter()     masks stats::filter()
## x dplyr::lag()        masks stats::lag()
library(stringr)        # text cleaning and regular expressions
library(tidytext)       # provides additional text mining functions


titles <- c("Entreleadership")

books <- list(Etweets)
  
series <- tibble()

# for(i in seq_along(titles)) {
for(i in 1) {
        
        clean <- tibble(chapter = seq_along(books[[i]]),
                        text = books[[i]]) %>%
          # Number of gram
             unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
             mutate(book = titles[i]) %>%
             select(book, everything())

        series <- rbind(series, clean)
}

# series

series %>%
        count(bigram, sort = TRUE)
## # A tibble: 1,138 x 2
##    bigram            n
##    <chr>         <int>
##  1 of your          59
##  2 your team        32
##  3 in the           30
##  4 caliber of       29
##  5 intensity of     29
##  6 match the        29
##  7 should match     29
##  8 team should      29
##  9 the intensity    29
## 10 your goals       29
## # ... with 1,128 more rows
series %>%
        separate(bigram, c("word1", "word2"), sep = " ") %>%
        filter(!word1 %in% stop_words$word,
               !word2 %in% stop_words$word) %>%
        count(word1, word2, sort = TRUE)
## # A tibble: 245 x 3
##    word1       word2               n
##    <chr>       <chr>           <int>
##  1 rtthe       caliber            27
##  2 goals       entreleadership    22
##  3 wear        determines         22
##  4 rtthe       hat                21
##  5 move        people             14
##  6 action      it’s               13
##  7 doesn’t     necessarily        13
##  8 dont        miss               13
##  9 information doesn’t            13
## 10 it’s        emotion            13
## # ... with 235 more rows
#Mega test case, let's see what happens
#run search on specific terms brought up in data


index = grep("small", Etweets)
Etweets[index]
##  [1] "this was how all the multinational firms staedthey all staed small never despise small beginnings just do…"
##  [2] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>er despise small beginnings just do…"
##  [3] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>er despise small beginnings just do…"
##  [4] "if your a small business looking for the best payroll accounting tax and bookkeeping solu…"                
##  [5] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"        
##  [6] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"        
##  [7] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"        
##  [8] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"        
##  [9] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"        
## [10] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"        
## [11] "had a great time speaking to oversmall business owners at ourday event a few weeks ago…"                   
## [12] "dave ramsey entreleadership good book for small business sta ups"
index2 = grep("communication", Etweets)
Etweets[index2]
##  [1] "not all communication is goodis sharing the communication that will kill your team"                                    
##  [2] "one of the best podcasts on effectivecommunication i have yet listened to great episode byon ho…"                      
##  [3] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
##  [4] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
##  [5] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
##  [6] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
##  [7] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
##  [8] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
##  [9] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [10] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [11] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [12] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [13] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [14] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [15] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [16] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [17] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [18] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [19] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [20] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [21] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [22] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [23] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from…"                       
## [24] "the hat you wear determines the way your communication will be received\n\nthis is gold from"                          
## [25] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind…"
## [26] "communication what do you want your audience to know how do you want them to feel and what do you want them to do…"    
## [27] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [28] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind…"
## [29] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind…"
## [30] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind…"
## [31] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind…"
## [32] "communication is the grease in the gears you can have great gears in your company but it will still freeze up gr…"     
## [33] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [34] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [35] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [36] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [37] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [38] "such a very true statement mr ramsey communication is the key to success america <U+0001F44D><U+0001F511><U+0001F60A>s great…"
## [39] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [40] "rtgreat leaders and great communication go hand in hand find out how to master this skill this week with …"            
## [41] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [42] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [43] "rtgreat leaders and great communication go hand in hand find out how to master this skill this week with …"            
## [44] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [45] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [46] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p…"               
## [47] "solid communication changes everything leaders dont miss this weekspodcast great conversatio…"                         
## [48] "rtgreat leaders and great communication go hand in hand find out how to master this skill this week with …"            
## [49] "great leaders and great communication go hand in hand find out how to master this skill this week with…"
index3 = grep("people", Etweets)
Etweets[index3]
##  [1] "“the caliber of your team should match the intensity of your goals <U+0001F40E>”entreleadership\n\nright people in the right…"
##  [2] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
##  [3] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
##  [4] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
##  [5] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
##  [6] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
##  [7] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
##  [8] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
##  [9] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [10] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [11] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [12] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [13] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [14] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [15] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [16] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [17] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [18] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [19] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [20] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [21] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right…"
## [22] "the caliber of your team should match the intensity of your goals <U+0001F40E><U+0001F40E>entreleadership\n\nright people in the right…"
## [23] "leadership is the a of giving people a platform for spreading ideas that workseth godin"                            
## [24] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [25] "rti disagree it’s easy to tell people what they want to hear it requires a strong leader to move people t…"         
## [26] "i disagree it’s easy to tell people what they want to hear it requires a strong leader to move p…"                  
## [27] "ninety percent of leadership is the ability to communicate something people wantdianne feinstein"                   
## [28] "people often say motivation doesnt last well neither does bathing thats why we recommend it daily zig ziglar"       
## [29] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [30] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [31] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [32] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [33] "“information doesn’t necessarily move people to action it’s emotion that moves people to action”\n—…"               
## [34] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [35] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [36] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [37] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [38] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [39] "rt“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                
## [40] "“information doesn’t necessarily move people to action it’s emotion that moves people to action”…"                  
## [41] "if you want to study successful people then study how they think not what they do so many golden nuggets in this e…"
index4 = grep("small business", Etweets)
Etweets[index4]
## [1] "if your a small business looking for the best payroll accounting tax and bookkeeping solu…"        
## [2] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"
## [3] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"
## [4] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"
## [5] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"
## [6] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"
## [7] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"
## [8] "had a great time speaking to oversmall business owners at ourday event a few weeks ago…"           
## [9] "dave ramsey entreleadership good book for small business sta ups"
index5 = grep("business", Etweets)
Etweets[index5]
##  [1] "if your a small business looking for the best payroll accounting tax and bookkeeping solu…"                       
##  [2] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"               
##  [3] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"               
##  [4] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"               
##  [5] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"               
##  [6] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"               
##  [7] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou…"               
##  [8] "had a great time speaking to oversmall business owners at ourday event a few weeks ago…"                          
##  [9] "no that is awesomerunning a business on our principles have you checked out"                                      
## [10] "entreleadershipyears of practical business wisdom from the trenches dave ramsey …"                                
## [11] "grow the business manage the growth"                                                                              
## [12] "hi all if youre looking for a good podcast to help understand and hone practical business skills check out entre…"
## [13] "dave ramsey entreleadership good book for small business sta ups"                                                 
## [14] "need a kick in your business butt i just signed up for daveramsey entreleadershipday challenge daily tips f…"

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.