library(rtweet)
library(tidyverse)
library(tidytext)
library(DT)
library(plotly)
library(wordcloud2)
breastfeeding_tweets <- get_timeline("@BreastfeedingM
", n = 5000)
breastfeeding_words <- breastfeeding_tweets %>%
unnest_tokens(word, text) %>%
select(screen_name, word)
breastfeeding_words %>%
count(word, sort = T)
breastfeeding_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T)
Joining, by = "word"
breastfeeding_words %>%
anti_join(get_stopwords()) %>%
filter(!word == "https",
!word == "t.co") %>%
count(word, sort = T)
Joining, by = "word"
breastfeeding_words %>%
anti_join(get_stopwords()) %>%
filter(!word == "https",
!word == "t.co") %>%
count(word, sort = T) %>%
top_n(200) %>%
wordcloud2(size = 1)
Joining, by = "word"
Selecting by n
bing <- get_sentiments("bing")
bing
breastfeeding_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE)
Joining, by = "word"
breastfeeding_words %>%
inner_join(bing) %>%
filter(!word == "breastfeeding") %>%
count(word, sentiment, sort = TRUE)%>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "breastfeeding words positive vs negative ",
x = NULL) +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

breastfeeding_words %>%
inner_join(nrc) %>%
filter(!word == "breast") %>%
count(word, sentiment, sort = TRUE)%>%
group_by(sentiment) %>%
top_n(5) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Breastfeeding verbage",
x = NULL) +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

breastfeeding_tweets %>%
select(text) %>% # this selects just the text of the tweets
unnest_tokens(words, text, token = "ngrams", n = 2) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate them temporarily
filter(!word1 %in% stop_words$word) %>% # remove if first word is a stop word
filter(!word2 %in% stop_words$word) %>% # remove if second word is a stop word
unite(words, word1, word2, sep = " ") # put them back together
remove_words = c("https", "t.co")
breastfeeding_tweets %>%
select(text) %>%
unnest_tokens(words, text, token = "ngrams", n = 2) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate them temporarily
filter(!word1 %in% stop_words$word) %>% # remove if first word is a stop word
filter(!word2 %in% stop_words$word) %>% # remove if second word is a stop word
filter(!word1 %in% remove_words) %>% # these two lines remove our remove_words
filter(!word2 %in% remove_words) %>%
unite(words, word1, word2, sep = " ") # put them back together
remove_words = c("https", "t.co")
breastfeeding_tweets %>%
select(text) %>%
unnest_tokens(words, text, token = "ngrams", n = 2) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate them temporarily
filter(!word1 %in% stop_words$word) %>% # remove if first word is a stop word
filter(!word2 %in% stop_words$word) %>% # remove if second word is a stop word
filter(!word1 %in% remove_words) %>% # these two lines remove our remove_words
filter(!word2 %in% remove_words) %>%
unite(words, word1, word2, sep = " ") -> breastfeeding_bigrams # put them back together
breastfeeding_bigrams %>%
count(words, sort = T)
breastfeeding_bigrams %>%
count(words, sort = T) %>%
top_n(100) %>%
wordcloud2(size = .5)
Selecting by n
first_word <- c("wine", "breastfeeding") # these need to be lowercase
breastfeeding_bigrams %>%
count(words, sort = TRUE) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn)
first_word <- c("benifits", "breastfeeding") # these need to be lowercase
breastfeeding_bigrams %>%
count(words, sort = TRUE) %>%
separate(words, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn) %>%
mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>% # put the words in order
group_by(word1) %>%
top_n(5) %>%
ggplot(aes(word2, total, fill = word1)) + #
scale_fill_viridis_d() + # set the color palette
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL, title = "Breastfeeding") +
facet_wrap(~word1, scales = "free") +
coord_flip()
Selecting by total

breastfeeding_words %>%
mutate(word_length = nchar(word)) %>%
ggplot(aes(word_length)) +
geom_histogram(binwidth = 1)

LS0tCnRpdGxlOiAiYnJlYXN0ZmVlZGluZyBmaW5hbCB0ZXh0IGFuYWx5c2lzIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAogIHBkZl9kb2N1bWVudDogZGVmYXVsdAogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKLS0tCgpgYGB7cn0KbGlicmFyeShydHdlZXQpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KERUKQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeSh3b3JkY2xvdWQyKQpgYGAKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3R3ZWV0cyA8LSBnZXRfdGltZWxpbmUoIkBCcmVhc3RmZWVkaW5nTQoiLCBuID0gNTAwMCkKYGBgCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ193b3JkcyA8LSBicmVhc3RmZWVkaW5nX3R3ZWV0cyAlPiUgCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0KSAlPiUgCiAgc2VsZWN0KHNjcmVlbl9uYW1lLCB3b3JkKQoKYGBgCgoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JQogIGNvdW50KHdvcmQsIHNvcnQgPSBUKQpgYGAKCgoKYGBge3J9CmJyZWFzdGZlZWRpbmdfd29yZHMgJT4lIAogIGFudGlfam9pbihnZXRfc3RvcHdvcmRzKCkpICU+JSAKICBjb3VudCh3b3JkLCBzb3J0ID0gVCkKYGBgCgoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgZmlsdGVyKCF3b3JkID09ICJodHRwcyIsCiAgICAgICAgICF3b3JkID09ICJ0LmNvIikgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFQpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgZmlsdGVyKCF3b3JkID09ICJodHRwcyIsCiAgICAgICAgICF3b3JkID09ICJ0LmNvIikgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFQpICU+JQogIHRvcF9uKDIwMCkgJT4lCiAgd29yZGNsb3VkMihzaXplID0gMSkKYGBgCgoKYGBge3J9CmJpbmcgPC0gZ2V0X3NlbnRpbWVudHMoImJpbmciKQpiaW5nCmBgYAoKYGBge3J9CmJyZWFzdGZlZWRpbmdfd29yZHMgJT4lIAogIGlubmVyX2pvaW4oYmluZykgJT4lIAogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBpbm5lcl9qb2luKGJpbmcpICU+JSAKICBmaWx0ZXIoIXdvcmQgPT0gImJyZWFzdGZlZWRpbmciKSAlPiUKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSU+JQogIGdyb3VwX2J5KHNlbnRpbWVudCkgJT4lCiAgdG9wX24oMTApICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJicmVhc3RmZWVkaW5nIHdvcmRzIHBvc2l0aXZlIHZzIG5lZ2F0aXZlICIsCiAgICAgICB4ID0gTlVMTCkgKwogIGNvb3JkX2ZsaXAoKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBpbm5lcl9qb2luKG5yYykgJT4lIAogIGZpbHRlcighd29yZCA9PSAiYnJlYXN0IikgJT4lCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSklPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDUpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJCcmVhc3RmZWVkaW5nIHZlcmJhZ2UiLAogICAgICAgeCA9IE5VTEwpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgoKCgoKCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ190d2VldHMgJT4lCiAgc2VsZWN0KHRleHQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHRoaXMgc2VsZWN0cyBqdXN0IHRoZSB0ZXh0IG9mIHRoZSB0d2VldHMKICB1bm5lc3RfdG9rZW5zKHdvcmRzLCB0ZXh0LCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lIAogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAgICAjIHNlcGFyYXRlIHRoZW0gdGVtcG9yYXJpbHkKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgIyByZW1vdmUgaWYgZmlyc3Qgd29yZCBpcyBhIHN0b3Agd29yZAogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSBpZiBzZWNvbmQgd29yZCBpcyBhIHN0b3Agd29yZCAgIAogIHVuaXRlKHdvcmRzLCB3b3JkMSwgd29yZDIsIHNlcCA9ICIgIikgICAgICAgICAgICAgICAgICAgICAgICAjIHB1dCB0aGVtIGJhY2sgdG9nZXRoZXIKYGBgCgoKCmBgYHtyfQpyZW1vdmVfd29yZHMgPSBjKCJodHRwcyIsICJ0LmNvIikKCmJyZWFzdGZlZWRpbmdfdHdlZXRzICU+JQogIHNlbGVjdCh0ZXh0KSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgdW5uZXN0X3Rva2Vucyh3b3JkcywgdGV4dCwgdG9rZW4gPSAibmdyYW1zIiwgbiA9IDIpICU+JSAKICBzZXBhcmF0ZSh3b3JkcywgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUgICAgICAgICAgIyBzZXBhcmF0ZSB0aGVtIHRlbXBvcmFyaWx5CiAgZmlsdGVyKCF3b3JkMSAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lICAgICAgICAgICAgICAgICAgICAgICMgcmVtb3ZlIGlmIGZpcnN0IHdvcmQgaXMgYSBzdG9wIHdvcmQKICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgIyByZW1vdmUgaWYgc2Vjb25kIHdvcmQgaXMgYSBzdG9wIHdvcmQgICAKICBmaWx0ZXIoIXdvcmQxICVpbiUgcmVtb3ZlX3dvcmRzKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSB0d28gbGluZXMgcmVtb3ZlIG91ciByZW1vdmVfd29yZHMKICBmaWx0ZXIoIXdvcmQyICVpbiUgcmVtb3ZlX3dvcmRzKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgCiAgdW5pdGUod29yZHMsIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAgICAgICAgICAgICAgICAgICAgICAgICMgcHV0IHRoZW0gYmFjayB0b2dldGhlcgpgYGAKCgoKYGBge3J9CnJlbW92ZV93b3JkcyA9IGMoImh0dHBzIiwgInQuY28iKQoKYnJlYXN0ZmVlZGluZ190d2VldHMgJT4lCiAgc2VsZWN0KHRleHQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICB1bm5lc3RfdG9rZW5zKHdvcmRzLCB0ZXh0LCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lIAogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAgICAjIHNlcGFyYXRlIHRoZW0gdGVtcG9yYXJpbHkKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgIyByZW1vdmUgaWYgZmlyc3Qgd29yZCBpcyBhIHN0b3Agd29yZAogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSBpZiBzZWNvbmQgd29yZCBpcyBhIHN0b3Agd29yZCAgIAogIGZpbHRlcighd29yZDEgJWluJSByZW1vdmVfd29yZHMpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAjIHRoZXNlIHR3byBsaW5lcyByZW1vdmUgb3VyIHJlbW92ZV93b3JkcwogIGZpbHRlcighd29yZDIgJWluJSByZW1vdmVfd29yZHMpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAKICB1bml0ZSh3b3Jkcywgd29yZDEsIHdvcmQyLCBzZXAgPSAiICIpIC0+IGJyZWFzdGZlZWRpbmdfYmlncmFtcyAgICAgICAgICAgICAgICAgICAgICAgIyBwdXQgdGhlbSBiYWNrIHRvZ2V0aGVyCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX2JpZ3JhbXMgJT4lIAogIGNvdW50KHdvcmRzLCBzb3J0ID0gVCkKYGBgCgoKCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ19iaWdyYW1zICU+JSAKICBjb3VudCh3b3Jkcywgc29ydCA9IFQpICU+JQogIHRvcF9uKDEwMCkgJT4lCiAgd29yZGNsb3VkMihzaXplID0gLjUpCmBgYAoKCgoKYGBge3J9CmZpcnN0X3dvcmQgPC0gYygid2luZSIsICJicmVhc3RmZWVkaW5nIikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSBuZWVkIHRvIGJlIGxvd2VyY2FzZQoKYnJlYXN0ZmVlZGluZ19iaWdyYW1zICU+JSAgICAgICAgICAgICAKICBjb3VudCh3b3Jkcywgc29ydCA9IFRSVUUpICU+JQogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAjIHNlcGFyYXRlIHRoZSB0d28gd29yZHMKICBmaWx0ZXIod29yZDEgJWluJSBmaXJzdF93b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICMgZmluZCBmaXJzdCB3b3JkcyBmcm9tIG91ciBsaXN0CiAgY291bnQod29yZDEsIHdvcmQyLCB3dCA9IG4sIHNvcnQgPSBUUlVFKSAlPiUgCiAgcmVuYW1lKHRvdGFsID0gbm4pCmBgYAoKCmBgYHtyfQpmaXJzdF93b3JkIDwtIGMoImJlbmlmaXRzIiwgImJyZWFzdGZlZWRpbmciKSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHRoZXNlIG5lZWQgdG8gYmUgbG93ZXJjYXNlCgpicmVhc3RmZWVkaW5nX2JpZ3JhbXMgJT4lICAgICAgICAgICAgIAogIGNvdW50KHdvcmRzLCBzb3J0ID0gVFJVRSkgJT4lCiAgc2VwYXJhdGUod29yZHMsIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lICAgICAgICMgc2VwYXJhdGUgdGhlIHR3byB3b3JkcwogIGZpbHRlcih3b3JkMSAlaW4lIGZpcnN0X3dvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgIyBmaW5kIGZpcnN0IHdvcmRzIGZyb20gb3VyIGxpc3QKICBjb3VudCh3b3JkMSwgd29yZDIsIHd0ID0gbiwgc29ydCA9IFRSVUUpICU+JSAKICByZW5hbWUodG90YWwgPSBubikgJT4lCiAgbXV0YXRlKHdvcmQyID0gZmFjdG9yKHdvcmQyLCBsZXZlbHMgPSByZXYodW5pcXVlKHdvcmQyKSkpKSAlPiUgICAgICMgcHV0IHRoZSB3b3JkcyBpbiBvcmRlcgogIGdyb3VwX2J5KHdvcmQxKSAlPiUgCiAgdG9wX24oNSkgJT4lIAogIGdncGxvdChhZXMod29yZDIsIHRvdGFsLCBmaWxsID0gd29yZDEpKSArICAgICAgICAgICAgICAgICAgICAgICAgICAjCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgc2V0IHRoZSBjb2xvciBwYWxldHRlCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGxhYnMoeCA9IE5VTEwsIHkgPSBOVUxMLCB0aXRsZSA9ICJCcmVhc3RmZWVkaW5nIikgKwogIGZhY2V0X3dyYXAofndvcmQxLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkKYGBgCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ193b3JkcyAlPiUKICBtdXRhdGUod29yZF9sZW5ndGggPSBuY2hhcih3b3JkKSkgJT4lIAogIGdncGxvdChhZXMod29yZF9sZW5ndGgpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAxKQpgYGAKCgoKCgoKCgoKCgoKCgoKCgo=