This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
Install necessary packages. Comment after installation
# install.packages('tm')
# install.packages('RColorBrewer')
# install.packages('wordcloud')
# installed.packages('tidytext')
# installed.packages('dplyr')
# install.packages("readr")
# install.packages("plyr")
# install.packages("stringr")
# install.packages("stringi")
# install.packages('plotly')
Include the packages.
library('tm')
## Loading required package: NLP
library('RColorBrewer')
library('wordcloud')
library('readr')
library('tidytext')
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("plyr")
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library("stringr")
library("stringi")
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Process data
entrepreneurshipData <- readRDS("entrepreneurship.RDS")
BreneData <- readRDS("BreneBrown.RDS")
Etweets <- entrepreneurshipData$text
Btweets <- BreneData$text
# Read dictionaries
money.words = scan('moneyWords.txt', what='character', comment.char=';')
fear.words = scan('fearWords.txt', what='character', comment.char=';')
pos.words = scan('positive-words.txt', what='character', comment.char=';')
neg.words = scan('negative-words.txt', what='character', comment.char=';')
# Clean data
clean.text = function(x)
{
# remove rt
x = gsub("rt", "", x)
# remove Rt
x = gsub("Rt", "", x)
# remove at
x = gsub("@\\w+", "", x)
# remove punctuation
x = gsub("[[:punct:]]", "", x)
# remove numbers
x = gsub("[[:digit:]]", "", x)
# remove links http
x = gsub("http\\w+", "", x)
# remove https
x = gsub("https", "", x)
# remove tabs
x = gsub("[ |\t]{2,}", "", x)
# remove blank spaces at the beginning
x = gsub("^ ", "", x)
# remove blank spaces at the end
x = gsub(" $", "", x)
# tolower
# x = tolower(x)
return(x)
}
# clean tweets
Etweets = clean.text(Etweets)
Btweets = clean.text(Btweets)
score.topic = function(sentences, dict, .progress='none')
{
# we got a vector of sentences. plyr will handle a list
# or a vector as an "l" for us
# we want a simple array of scores back, so we use
# "l" + "a" + "ply" = "laply":
scores = laply(sentences, function(sentence, dict) {
# clean up sentences with R's regex-driven global substitute, gsub():
sentence = gsub('[[:punct:]]', '', sentence)
sentence = gsub('[[:cntrl:]]', '', sentence)
sentence = gsub('\\d+', '', sentence)
# and convert to lower case:
sentence = tolower(sentence)
# split into words. str_split is in the stringr package
word.list = str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words = unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
topic.matches = match(words, dict)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
topic.matches = !is.na(topic.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score = sum(topic.matches)
return(score)
}, dict, .progress=.progress )
topicscores.df = data.frame(score=scores, text=sentences)
return(topicscores.df)
}
topic.scores= score.topic(Btweets, money.words, .progress='none')
#topic.scores= score.topic(Etweets, fear.words, .progress='none')
topic.mentioned = subset(topic.scores, score !=0)
N= nrow(topic.scores)
Nmentioned = nrow(topic.mentioned)
dftemp=data.frame(topic=c("Mentioned", "Not Mentioned"),
number=c(Nmentioned,N-Nmentioned))
p <- plot_ly(data=dftemp, labels = ~topic, values = ~number, type = 'pie') %>%
layout(title = 'Pie Chart of Tweets Mentioning money',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
p
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
# we got a vector of sentences. plyr will handle a list
# or a vector as an "l" for us
# we want a simple array of scores back, so we use
# "l" + "a" + "ply" = "laply":
scores = laply(sentences, function(sentence, pos.words, neg.words) {
# clean up sentences with R's regex-driven global substitute, gsub():
sentence = gsub('[[:punct:]]', '', sentence)
sentence = gsub('[[:cntrl:]]', '', sentence)
sentence = gsub('\\d+', '', sentence)
# and convert to lower case:
sentence = tolower(sentence)
# split into words. str_split is in the stringr package
word.list = str_split(sentence, '\\s+')
# sometimes a list() is one level of hierarchy too much
words = unlist(word.list)
# compare our words to the dictionaries of positive & negative terms
pos.matches = match(words, pos.words)
neg.matches = match(words, neg.words)
# match() returns the position of the matched term or NA
# we just want a TRUE/FALSE:
pos.matches = !is.na(pos.matches)
neg.matches = !is.na(neg.matches)
# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
score = sum(pos.matches) - sum(neg.matches)
return(score)
}, pos.words, neg.words, .progress=.progress )
scores.df = data.frame(score=scores, text=sentences)
return(scores.df)
}
sentiment.scores= score.sentiment(Btweets, pos.words, neg.words, .progress='none')
score <- sentiment.scores$score
p <- plot_ly(x = ~score, type = "histogram")
p
require(tm)
require(wordcloud)
require(RColorBrewer)
negativeTweets = subset(sentiment.scores, score < 0)$text
corpus = Corpus(VectorSource(negativeTweets))
# corpus = Corpus(VectorSource(cmail))
# create term-document matrix
tdm = TermDocumentMatrix(
corpus,
control = list(
wordLengths=c(3,20),
removePunctuation = TRUE,
stopwords = c("the", "a", stopwords("english")),
removeNumbers = TRUE, tolower = FALSE) )
# convert as matrix
tdm = as.matrix(tdm)
# get word counts in decreasing order
word_freqs = sort(rowSums(tdm), decreasing=TRUE)
#word_freqs = word_freqs[-(1:12)]
# create a data frame with words and their frequencies
dm = data.frame(word=names(word_freqs), freq=word_freqs)
#Plot corpus in a clored graph; need RColorBrewer package
wordcloud(head(dm$word, 100), head(dm$freq, 100), random.order=FALSE, colors=brewer.pal(8, "Dark2"))
#check top 50 most mentioned words
head(word_freqs, 50)
## imperfect also vulnerable truth brave
## 79 73 72 68 66
## amp afraid change doesnt sometimes
## 63 59 58 56 56
## RTYes wor
Im shame youre
## 53 53 48 48 46
## emotions arena hea love interested
## 43 41 41 40 40
## Not getting ass kicked often
## 39 38 37 37 37
## ability RTWithout lead manage selfawareness
## 35 34 34 34 34
## unknowingly is
vulnerability live wohy
## 34 33 32 32 30
## occasion people in
mandate hard
## 30 29 29 29 28
## RTI struggle The
wired
## 28 27 26 25 22
## fear will numb can hold
## 21 21 21 21 21
#Grouping of some examples of tweets with negatively coded words, like 'afraid'. Keyword may be replaced for different insights as needed
index = grep("afraid", negativeTweets)
negativeTweets[index]
## [1] Its not failure were afraid ofits shame amp blame <U+0001F61E> \nBeing vulnerablecourage BreneBrown says it best TED
## [2] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [3] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [4] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [5] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [6] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [7] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [8] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [9] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [10] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [11] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [12] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [13] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [14] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [15] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [16] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [17] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [18] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [19] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [20] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [21] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [22] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [23] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [24] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [25] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [26] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [27] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [28] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [29] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [30] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [31] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [32] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [33] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [34] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [35] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [36] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [37] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [38] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [39] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [40] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [41] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [42] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [43] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [44] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [45] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [46] Yes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave an
## [47] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [48] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [49] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [50] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [51] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [52] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [53] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [54] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [55] RTYes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave and wor
## [56] Yes I am imperfect and vulnerable and sometimes afraid but that doesnt change the truth that I am also brave an
## [57] Reading it tonight Im afraid to move forward But cant bare to stand still
## [58] RTMen walk this tightrope where any sign of weakness illicits shame amp so theyre afraid to make themselves vulnerable for fe
## [59] RTMen walk this tightrope where any sign of weakness illicits shame amp so theyre afraid to make themselves vulnerable for fe
## 2346 Levels: \n
... Yowza
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.