This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
Install necessary packages. Comment after installation
#install.packages('tm')
#install.packages('RColorBrewer')
#install.packages('wordcloud')
Include the packages.
library('tm')
## Loading required package: NLP
library('RColorBrewer')
library('wordcloud')
Process data
EntreleadershipData <- readRDS("Entreleadership.RDS")
Etweets <- EntreleadershipData$text
# swap out all non-alphanumeric characters
# Note that the definition of what constitutes a letter or a number or a punctuatution mark varies slightly depending upon your locale, so you may need to experiment a little to get exactly what you want.
# str_replace_all(tweets, "[^[:alnum:]]", " ")
# iconv(tweets, from = 'UTF-8', to = 'ASCII//TRANSLIT')
# Encoding(Etweets) <- "UTF-8"
# Function to clean tweets
clean.text = function(x)
{
# remove rt
x = gsub("rt", "", x)
# remove at
x = gsub("@\\w+", "", x)
# remove punctuation
x = gsub("[[:punct:]]", "", x)
# remove numbers
x = gsub("[[:digit:]]", "", x)
# remove links http
x = gsub("http\\w+", "", x)
# remove tabs
x = gsub("[ |\t]{2,}", "", x)
# remove blank spaces at the beginning
x = gsub("^ ", "", x)
# remove blank spaces at the end
x = gsub(" $", "", x)
# tolower
x = tolower(x)
return(x)
}
# clean tweets
Etweets = clean.text(Etweets)
# Word cloud section
corpus = Corpus(VectorSource(Etweets))
# create term-document matrix
tdm = TermDocumentMatrix(
corpus,
control = list(
wordLengths=c(3,20),
removePunctuation = TRUE,
stopwords = c("the", "a", stopwords("english")),
removeNumbers = TRUE,
# tolower may cause trouble on Window because UTF-8 encoding, changed to FALSE
tolower = FALSE) )
# convert as matrix. It may consume near 1g of your RAM
tdm = as.matrix(tdm)
# get word counts in decreasing order
word_freqs = sort(rowSums(tdm), decreasing=TRUE)
#check top 50 most mentioned words
head(word_freqs, 50)
## people rtthe communication great
## 55 48 44 36
## team will caliber goals
## 34 30 29 29
## intensity match leaders right
## 29 29 25 23
## right
determines gold hat
## 22 22 22 22
## received way wear from
## 22 22 22 21
## <U+0001F40E>entreleadership doesnt event its
## 20 19 15 15
## move business entreleadership action
## 15 14 14 14
## changes emotion moves necessarily
## 14 13 13 13
## p
dont everything miss
## 13 13 13 13
## weekspodcast light get
## 13 13 12 12
## action
gears conversation rtsolid
## 12 12 12 12
## rtdo challenge rtinformation amp
## 12 11 11 10
## intentionality building
## 10 9
#remove the top words which do not generate insights such as "the", "a", "and", etc.
#word_freqs = word_freqs[-(1:5)] #Here [-(1:5)] is 1st-5th words in the list we want to remove
#commented because in this case we want to keep the first to fifth words in the list
#create a data frame with words and their frequencies
dm = data.frame(word=names(word_freqs), freq=word_freqs)
#Plot corpus in a clored graph; need RColorBrewer package
wordcloud(head(dm$word, 50), head(dm$freq, 50), random.order=FALSE, colors=brewer.pal(8, "Dark2"))
#check top 50 most mentioned words
head(word_freqs, 50)
## people rtthe communication great
## 55 48 44 36
## team will caliber goals
## 34 30 29 29
## intensity match leaders right
## 29 29 25 23
## right
determines gold hat
## 22 22 22 22
## received way wear from
## 22 22 22 21
## <U+0001F40E>entreleadership doesnt event its
## 20 19 15 15
## move business entreleadership action
## 15 14 14 14
## changes emotion moves necessarily
## 14 13 13 13
## p
dont everything miss
## 13 13 13 13
## weekspodcast light get
## 13 13 12 12
## action
gears conversation rtsolid
## 12 12 12 12
## rtdo challenge rtinformation amp
## 12 11 11 10
## intentionality building
## 10 9
#------------------------------------Wait until data runs first time then parse it
Bigram
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse) # data manipulation & plotting
## -- Attaching packages ---------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v readr 1.3.1
## v tibble 2.0.1 v purrr 0.3.0
## v tidyr 0.8.2 v stringr 1.3.1
## v ggplot2 3.1.0 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr) # text cleaning and regular expressions
library(tidytext) # provides additional text mining functions
titles <- c("Entreleadership")
books <- list(Etweets)
series <- tibble()
# for(i in seq_along(titles)) {
for(i in 1) {
clean <- tibble(chapter = seq_along(books[[i]]),
text = books[[i]]) %>%
# Number of gram
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
mutate(book = titles[i]) %>%
select(book, everything())
series <- rbind(series, clean)
}
# series
series %>%
count(bigram, sort = TRUE)
## # A tibble: 1,138 x 2
## bigram n
## <chr> <int>
## 1 of your 59
## 2 your team 32
## 3 in the 30
## 4 caliber of 29
## 5 intensity of 29
## 6 match the 29
## 7 should match 29
## 8 team should 29
## 9 the intensity 29
## 10 your goals 29
## # ... with 1,128 more rows
series %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word) %>%
count(word1, word2, sort = TRUE)
## # A tibble: 245 x 3
## word1 word2 n
## <chr> <chr> <int>
## 1 rtthe caliber 27
## 2 goals entreleadership 22
## 3 wear determines 22
## 4 rtthe hat 21
## 5 move people 14
## 6 action its 13
## 7 doesnt necessarily 13
## 8 dont miss 13
## 9 information doesnt 13
## 10 its emotion 13
## # ... with 235 more rows
#Mega test case, let's see what happens
#run search on specific terms brought up in data
index = grep("small", Etweets)
Etweets[index]
## [1] "this was how all the multinational firms staedthey all staed small never despise small beginnings just do
"
## [2] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>er despise small beginnings just do
"
## [3] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>er despise small beginnings just do
"
## [4] "if your a small business looking for the best payroll accounting tax and bookkeeping solu
"
## [5] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [6] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [7] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [8] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [9] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [10] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [11] "had a great time speaking to oversmall business owners at ourday event a few weeks ago
"
## [12] "dave ramsey entreleadership good book for small business sta ups"
index2 = grep("communication", Etweets)
Etweets[index2]
## [1] "not all communication is goodis sharing the communication that will kill your team"
## [2] "one of the best podcasts on effectivecommunication i have yet listened to great episode byon ho
"
## [3] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [4] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [5] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [6] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [7] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [8] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [9] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [10] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [11] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [12] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [13] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [14] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [15] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [16] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [17] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [18] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [19] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [20] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [21] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [22] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [23] "rtthe hat you wear determines the way your communication will be received\n\nthis is gold from
"
## [24] "the hat you wear determines the way your communication will be received\n\nthis is gold from"
## [25] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind
"
## [26] "communication what do you want your audience to know how do you want them to feel and what do you want them to do
"
## [27] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [28] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind
"
## [29] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind
"
## [30] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind
"
## [31] "rtcommunication is the grease in the gears you can have great gears in your company but it will still freeze up grind
"
## [32] "communication is the grease in the gears you can have great gears in your company but it will still freeze up gr
"
## [33] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [34] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [35] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [36] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [37] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [38] "such a very true statement mr ramsey communication is the key to success america <U+0001F44D><U+0001F511><U+0001F60A>s great
"
## [39] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [40] "rtgreat leaders and great communication go hand in hand find out how to master this skill this week with
"
## [41] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [42] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [43] "rtgreat leaders and great communication go hand in hand find out how to master this skill this week with
"
## [44] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [45] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [46] "rtsolid communication changes everything leaders dont miss this weekspodcast great conversation with p
"
## [47] "solid communication changes everything leaders dont miss this weekspodcast great conversatio
"
## [48] "rtgreat leaders and great communication go hand in hand find out how to master this skill this week with
"
## [49] "great leaders and great communication go hand in hand find out how to master this skill this week with
"
index3 = grep("people", Etweets)
Etweets[index3]
## [1] "the caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [2] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [3] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [4] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [5] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [6] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [7] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [8] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [9] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [10] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [11] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [12] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [13] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [14] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [15] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [16] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [17] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [18] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [19] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [20] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [21] "rtthe caliber of your team should match the intensity of your goals <U+0001F40E>entreleadership\n\nright people in the right
"
## [22] "the caliber of your team should match the intensity of your goals <U+0001F40E><U+0001F40E>entreleadership\n\nright people in the right
"
## [23] "leadership is the a of giving people a platform for spreading ideas that workseth godin"
## [24] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [25] "rti disagree its easy to tell people what they want to hear it requires a strong leader to move people t
"
## [26] "i disagree its easy to tell people what they want to hear it requires a strong leader to move p
"
## [27] "ninety percent of leadership is the ability to communicate something people wantdianne feinstein"
## [28] "people often say motivation doesnt last well neither does bathing thats why we recommend it daily zig ziglar"
## [29] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [30] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [31] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [32] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [33] "information doesnt necessarily move people to action its emotion that moves people to action\n
"
## [34] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [35] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [36] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [37] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [38] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [39] "rtinformation doesnt necessarily move people to action its emotion that moves people to action
"
## [40] "information doesnt necessarily move people to action its emotion that moves people to action
"
## [41] "if you want to study successful people then study how they think not what they do so many golden nuggets in this e
"
index4 = grep("small business", Etweets)
Etweets[index4]
## [1] "if your a small business looking for the best payroll accounting tax and bookkeeping solu
"
## [2] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [3] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [4] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [5] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [6] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [7] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [8] "had a great time speaking to oversmall business owners at ourday event a few weeks ago
"
## [9] "dave ramsey entreleadership good book for small business sta ups"
index5 = grep("business", Etweets)
Etweets[index5]
## [1] "if your a small business looking for the best payroll accounting tax and bookkeeping solu
"
## [2] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [3] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [4] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [5] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [6] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [7] "rthad a great time speaking to oversmall business owners at ourday event a few weeks ago check ou
"
## [8] "had a great time speaking to oversmall business owners at ourday event a few weeks ago
"
## [9] "no that is awesomerunning a business on our principles have you checked out"
## [10] "entreleadershipyears of practical business wisdom from the trenches dave ramsey
"
## [11] "grow the business manage the growth"
## [12] "hi all if youre looking for a good podcast to help understand and hone practical business skills check out entre
"
## [13] "dave ramsey entreleadership good book for small business sta ups"
## [14] "need a kick in your business butt i just signed up for daveramsey entreleadershipday challenge daily tips f
"
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.