library(rtweet)
library(tidyverse)
── Attaching packages ─────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.0 ✔ purrr 0.2.5
✔ tibble 2.0.0 ✔ dplyr 0.7.8
✔ tidyr 0.8.2 ✔ stringr 1.3.1
✔ readr 1.3.1 ✔ forcats 0.3.0
── Conflicts ────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ purrr::flatten() masks rtweet::flatten()
✖ dplyr::lag() masks stats::lag()
library(tidytext)
library(DT)
library(plotly)
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(wordcloud2)
breastfeeding_tweets <- get_timeline("@BreastfeedingM
", n = 5000)
breastfeeding_words <- breastfeeding_tweets %>%
unnest_tokens(word, text) %>%
select(screen_name, word)
breastfeeding_words %>%
count(word, sort = T)
breastfeeding_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T)
Joining, by = "word"
breastfeeding_words %>%
anti_join(get_stopwords()) %>%
filter(!word == "https",
!word == "t.co") %>%
count(word, sort = T)
Joining, by = "word"
breastfeeding_words %>%
anti_join(get_stopwords()) %>%
filter(!word == "https",
!word == "t.co") %>%
count(word, sort = T) %>%
top_n(200) %>%
wordcloud2(size = 1)
Joining, by = "word"
Selecting by n
bing <- get_sentiments("bing")
bing
breastfeeding_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE)
Joining, by = "word"
breastfeeding_words %>%
inner_join(bing) %>%
filter(!word == "breastfeeding") %>%
count(word, sentiment, sort = TRUE)%>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "breastfeeding words positive vs negative ",
x = NULL) +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

breastfeeding_words %>%
inner_join(nrc) %>%
filter(!word == "breast") %>%
count(word, sentiment, sort = TRUE)%>%
group_by(sentiment) %>%
top_n(5) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Breastfeeding verbage",
x = NULL) +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

breastfeeding_tweets %>%
select(text) %>% # this selects just the text of the tweets
unnest_tokens(words, text, token = "ngrams", n = 2) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate them temporarily
filter(!word1 %in% stop_words$word) %>% # remove if first word is a stop word
filter(!word2 %in% stop_words$word) %>% # remove if second word is a stop word
unite(words, word1, word2, sep = " ") # put them back together
NA
remove_words = c("https", "t.co")
breastfeeding_tweets %>%
select(text) %>%
unnest_tokens(words, text, token = "ngrams", n = 2) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate them temporarily
filter(!word1 %in% stop_words$word) %>% # remove if first word is a stop word
filter(!word2 %in% stop_words$word) %>% # remove if second word is a stop word
filter(!word1 %in% remove_words) %>% # these two lines remove our remove_words
filter(!word2 %in% remove_words) %>%
unite(words, word1, word2, sep = " ") # put them back together
remove_words = c("https", "t.co")
breastfeeding_tweets %>%
select(text) %>%
unnest_tokens(words, text, token = "ngrams", n = 2) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate them temporarily
filter(!word1 %in% stop_words$word) %>% # remove if first word is a stop word
filter(!word2 %in% stop_words$word) %>% # remove if second word is a stop word
filter(!word1 %in% remove_words) %>% # these two lines remove our remove_words
filter(!word2 %in% remove_words) %>%
unite(words, word1, word2, sep = " ") -> breastfeeding_bigrams # put them back together
breastfeeding_bigrams %>%
count(words, sort = T)
breastfeeding_bigrams %>%
count(words, sort = T) %>%
top_n(100) %>%
wordcloud2(size = .5)
Selecting by n
first_word <- c("wine", "breastfeeding") # these need to be lowercase
breastfeeding_bigrams %>%
count(words, sort = TRUE) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn)
first_word <- c("benifits", "breastfeeding") # these need to be lowercase
breastfeeding_bigrams %>%
count(words, sort = TRUE) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn) %>%
mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>% # put the words in order
group_by(word1) %>%
top_n(5) %>%
ggplot(aes(word2, total, fill = word1)) + #
scale_fill_viridis_d() + # set the color palette
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL, title = "Breastfeeding") +
facet_wrap(~word1, scales = "free") +
coord_flip()
Selecting by total

breastfeeding_words %>%
mutate(word_length = nchar(word)) %>%
ggplot(aes(word_length)) +
geom_histogram(binwidth = 1)

LS0tCnRpdGxlOiAiYnJlYXN0ZmVlZGluZyB0ZXh0IGFuYWx5c2lzIGZpbmFsIHByb2plY3QiCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0CiAgd29yZF9kb2N1bWVudDogZGVmYXVsdAotLS0KCmBgYHtyfQpsaWJyYXJ5KHJ0d2VldCkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkodGlkeXRleHQpCmxpYnJhcnkoRFQpCmxpYnJhcnkocGxvdGx5KQpsaWJyYXJ5KHdvcmRjbG91ZDIpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3R3ZWV0cyA8LSBnZXRfdGltZWxpbmUoIkBCcmVhc3RmZWVkaW5nTQoiLCBuID0gNTAwMCkKYGBgCgoKYGBge3J9CmJyZWFzdGZlZWRpbmdfd29yZHMgPC0gYnJlYXN0ZmVlZGluZ190d2VldHMgJT4lIAogIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCkgJT4lIAogIHNlbGVjdChzY3JlZW5fbmFtZSwgd29yZCkKCmBgYAoKCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ193b3JkcyAlPiUKICBjb3VudCh3b3JkLCBzb3J0ID0gVCkKYGBgCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgY291bnQod29yZCwgc29ydCA9IFQpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgZmlsdGVyKCF3b3JkID09ICJodHRwcyIsCiAgICAgICAgICF3b3JkID09ICJ0LmNvIikgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFQpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgZmlsdGVyKCF3b3JkID09ICJodHRwcyIsCiAgICAgICAgICF3b3JkID09ICJ0LmNvIikgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFQpICU+JQogIHRvcF9uKDIwMCkgJT4lCiAgd29yZGNsb3VkMihzaXplID0gMSkKYGBgCgoKCmBgYHtyfQpiaW5nIDwtIGdldF9zZW50aW1lbnRzKCJiaW5nIikKYmluZwpgYGAKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBpbm5lcl9qb2luKGJpbmcpICU+JSAKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKQpgYGAKYGBge3J9CmJyZWFzdGZlZWRpbmdfd29yZHMgJT4lIAogIGlubmVyX2pvaW4oYmluZykgJT4lIAogIGZpbHRlcighd29yZCA9PSAiYnJlYXN0ZmVlZGluZyIpICU+JQogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpJT4lCiAgZ3JvdXBfYnkoc2VudGltZW50KSAlPiUKICB0b3BfbigxMCkgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkLCBuLCBmaWxsID0gc2VudGltZW50KSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBmYWNldF93cmFwKHZhcnMoc2VudGltZW50KSwgc2NhbGVzID0gImZyZWUiKSArCiAgbGFicyh5ID0gImJyZWFzdGZlZWRpbmcgd29yZHMgcG9zaXRpdmUgdnMgbmVnYXRpdmUgIiwKICAgICAgIHggPSBOVUxMKSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgoKCgoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBpbm5lcl9qb2luKG5yYykgJT4lIAogIGZpbHRlcighd29yZCA9PSAiYnJlYXN0IikgJT4lCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSklPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDUpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJCcmVhc3RmZWVkaW5nIHZlcmJhZ2UiLAogICAgICAgeCA9IE5VTEwpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgoKCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ190d2VldHMgJT4lCiAgc2VsZWN0KHRleHQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHRoaXMgc2VsZWN0cyBqdXN0IHRoZSB0ZXh0IG9mIHRoZSB0d2VldHMKICB1bm5lc3RfdG9rZW5zKHdvcmRzLCB0ZXh0LCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lIAogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAgICAjIHNlcGFyYXRlIHRoZW0gdGVtcG9yYXJpbHkKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgIyByZW1vdmUgaWYgZmlyc3Qgd29yZCBpcyBhIHN0b3Agd29yZAogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSBpZiBzZWNvbmQgd29yZCBpcyBhIHN0b3Agd29yZCAgIAogIHVuaXRlKHdvcmRzLCB3b3JkMSwgd29yZDIsIHNlcCA9ICIgIikgICAgICAgICAgICAgICAgICAgICAgICAjIHB1dCB0aGVtIGJhY2sgdG9nZXRoZXIKCmBgYAoKCmBgYHtyfQpyZW1vdmVfd29yZHMgPSBjKCJodHRwcyIsICJ0LmNvIikKCmJyZWFzdGZlZWRpbmdfdHdlZXRzICU+JQogIHNlbGVjdCh0ZXh0KSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgdW5uZXN0X3Rva2Vucyh3b3JkcywgdGV4dCwgdG9rZW4gPSAibmdyYW1zIiwgbiA9IDIpICU+JSAKICBzZXBhcmF0ZSh3b3JkcywgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUgICAgICAgICAgIyBzZXBhcmF0ZSB0aGVtIHRlbXBvcmFyaWx5CiAgZmlsdGVyKCF3b3JkMSAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lICAgICAgICAgICAgICAgICAgICAgICMgcmVtb3ZlIGlmIGZpcnN0IHdvcmQgaXMgYSBzdG9wIHdvcmQKICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgIyByZW1vdmUgaWYgc2Vjb25kIHdvcmQgaXMgYSBzdG9wIHdvcmQgICAKICBmaWx0ZXIoIXdvcmQxICVpbiUgcmVtb3ZlX3dvcmRzKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSB0d28gbGluZXMgcmVtb3ZlIG91ciByZW1vdmVfd29yZHMKICBmaWx0ZXIoIXdvcmQyICVpbiUgcmVtb3ZlX3dvcmRzKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgCiAgdW5pdGUod29yZHMsIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAgICAgICAgICAgICAgICAgICAgICAgICMgcHV0IHRoZW0gYmFjayB0b2dldGhlcgpgYGAKCgpgYGB7cn0KcmVtb3ZlX3dvcmRzID0gYygiaHR0cHMiLCAidC5jbyIpCgpicmVhc3RmZWVkaW5nX3R3ZWV0cyAlPiUKICBzZWxlY3QodGV4dCkgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogIHVubmVzdF90b2tlbnMod29yZHMsIHRleHQsIHRva2VuID0gIm5ncmFtcyIsIG4gPSAyKSAlPiUgCiAgc2VwYXJhdGUod29yZHMsIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lICAgICAgICAgICMgc2VwYXJhdGUgdGhlbSB0ZW1wb3JhcmlseQogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSBpZiBmaXJzdCB3b3JkIGlzIGEgc3RvcCB3b3JkCiAgZmlsdGVyKCF3b3JkMiAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lICAgICAgICAgICAgICAgICAgICAgICMgcmVtb3ZlIGlmIHNlY29uZCB3b3JkIGlzIGEgc3RvcCB3b3JkICAgCiAgZmlsdGVyKCF3b3JkMSAlaW4lIHJlbW92ZV93b3JkcykgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICMgdGhlc2UgdHdvIGxpbmVzIHJlbW92ZSBvdXIgcmVtb3ZlX3dvcmRzCiAgZmlsdGVyKCF3b3JkMiAlaW4lIHJlbW92ZV93b3JkcykgJT4lICAgICAgICAgICAgICAgICAgICAgICAgIAogIHVuaXRlKHdvcmRzLCB3b3JkMSwgd29yZDIsIHNlcCA9ICIgIikgLT4gYnJlYXN0ZmVlZGluZ19iaWdyYW1zICAgICAgICAgICAgICAgICAgICAgICAjIHB1dCB0aGVtIGJhY2sgdG9nZXRoZXIKYGBgCgoKCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ19iaWdyYW1zICU+JSAKICBjb3VudCh3b3Jkcywgc29ydCA9IFQpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX2JpZ3JhbXMgJT4lIAogIGNvdW50KHdvcmRzLCBzb3J0ID0gVCkgJT4lCiAgdG9wX24oMTAwKSAlPiUKICB3b3JkY2xvdWQyKHNpemUgPSAuNSkKYGBgCgoKYGBge3J9CmZpcnN0X3dvcmQgPC0gYygid2luZSIsICJicmVhc3RmZWVkaW5nIikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSBuZWVkIHRvIGJlIGxvd2VyY2FzZQoKYnJlYXN0ZmVlZGluZ19iaWdyYW1zICU+JSAgICAgICAgICAgICAKICBjb3VudCh3b3Jkcywgc29ydCA9IFRSVUUpICU+JQogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAjIHNlcGFyYXRlIHRoZSB0d28gd29yZHMKICBmaWx0ZXIod29yZDEgJWluJSBmaXJzdF93b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICMgZmluZCBmaXJzdCB3b3JkcyBmcm9tIG91ciBsaXN0CiAgY291bnQod29yZDEsIHdvcmQyLCB3dCA9IG4sIHNvcnQgPSBUUlVFKSAlPiUgCiAgcmVuYW1lKHRvdGFsID0gbm4pCmBgYAoKCgpgYGB7cn0KZmlyc3Rfd29yZCA8LSBjKCJiZW5pZml0cyIsICJicmVhc3RmZWVkaW5nIikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSBuZWVkIHRvIGJlIGxvd2VyY2FzZQoKYnJlYXN0ZmVlZGluZ19iaWdyYW1zICU+JSAgICAgICAgICAgICAKICBjb3VudCh3b3Jkcywgc29ydCA9IFRSVUUpICU+JQogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAjIHNlcGFyYXRlIHRoZSB0d28gd29yZHMKICBmaWx0ZXIod29yZDEgJWluJSBmaXJzdF93b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICMgZmluZCBmaXJzdCB3b3JkcyBmcm9tIG91ciBsaXN0CiAgY291bnQod29yZDEsIHdvcmQyLCB3dCA9IG4sIHNvcnQgPSBUUlVFKSAlPiUgCiAgcmVuYW1lKHRvdGFsID0gbm4pICU+JQogIG11dGF0ZSh3b3JkMiA9IGZhY3Rvcih3b3JkMiwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkMikpKSkgJT4lICAgICAjIHB1dCB0aGUgd29yZHMgaW4gb3JkZXIKICBncm91cF9ieSh3b3JkMSkgJT4lIAogIHRvcF9uKDUpICU+JSAKICBnZ3Bsb3QoYWVzKHdvcmQyLCB0b3RhbCwgZmlsbCA9IHdvcmQxKSkgKyAgICAgICAgICAgICAgICAgICAgICAgICAgIwogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkgKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHNldCB0aGUgY29sb3IgcGFsZXR0ZQogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gTlVMTCwgdGl0bGUgPSAiQnJlYXN0ZmVlZGluZyIpICsKICBmYWNldF93cmFwKH53b3JkMSwgc2NhbGVzID0gImZyZWUiKSArCiAgY29vcmRfZmxpcCgpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JQogIG11dGF0ZSh3b3JkX2xlbmd0aCA9IG5jaGFyKHdvcmQpKSAlPiUgCiAgZ2dwbG90KGFlcyh3b3JkX2xlbmd0aCkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEpCmBgYAoKCgoKCg==