library(rtweet)
library(tidyverse)
library(tidytext)
library(DT)
library(plotly)
library(wordcloud2)
breastfeeding_tweets <- get_timeline("@BreastfeedingM
", n = 5000)
breastfeeding_words <- breastfeeding_tweets %>% 
  unnest_tokens(word, text) %>% 
  select(screen_name, word)
breastfeeding_words %>%
  count(word, sort = T)
breastfeeding_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T)
Joining, by = "word"
breastfeeding_words %>% 
  anti_join(get_stopwords()) %>% 
  filter(!word == "https",
         !word == "t.co") %>%
  count(word, sort = T)
Joining, by = "word"
breastfeeding_words %>% 
  anti_join(get_stopwords()) %>% 
  filter(!word == "https",
         !word == "t.co") %>%
  count(word, sort = T) %>%
  top_n(200) %>%
  wordcloud2(size = 1)
Joining, by = "word"
Selecting by n
bing <- get_sentiments("bing")
bing
breastfeeding_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE)
Joining, by = "word"
breastfeeding_words %>% 
  inner_join(bing) %>% 
  filter(!word == "breastfeeding") %>%
  count(word, sentiment, sort = TRUE)%>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "breastfeeding words positive vs negative ",
       x = NULL) +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n

breastfeeding_words %>% 
  inner_join(nrc) %>% 
  filter(!word == "breast") %>%
  count(word, sentiment, sort = TRUE)%>%
  group_by(sentiment) %>%
  top_n(5) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Breastfeeding verbage",
       x = NULL) +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n

breastfeeding_tweets %>%
  select(text) %>%                                                 # this selects just the text of the tweets
  unnest_tokens(words, text, token = "ngrams", n = 2) %>% 
  separate(words, c("word1", "word2"), sep = " ") %>%          # separate them temporarily
  filter(!word1 %in% stop_words$word) %>%                      # remove if first word is a stop word
  filter(!word2 %in% stop_words$word) %>%                      # remove if second word is a stop word   
  unite(words, word1, word2, sep = " ")                        # put them back together
remove_words = c("https", "t.co")

breastfeeding_tweets %>%
  select(text) %>%                                                 
  unnest_tokens(words, text, token = "ngrams", n = 2) %>% 
  separate(words, c("word1", "word2"), sep = " ") %>%          # separate them temporarily
  filter(!word1 %in% stop_words$word) %>%                      # remove if first word is a stop word
  filter(!word2 %in% stop_words$word) %>%                      # remove if second word is a stop word   
  filter(!word1 %in% remove_words) %>%                         # these two lines remove our remove_words
  filter(!word2 %in% remove_words) %>%                         
  unite(words, word1, word2, sep = " ")                        # put them back together
remove_words = c("https", "t.co")

breastfeeding_tweets %>%
  select(text) %>%                                                 
  unnest_tokens(words, text, token = "ngrams", n = 2) %>% 
  separate(words, c("word1", "word2"), sep = " ") %>%          # separate them temporarily
  filter(!word1 %in% stop_words$word) %>%                      # remove if first word is a stop word
  filter(!word2 %in% stop_words$word) %>%                      # remove if second word is a stop word   
  filter(!word1 %in% remove_words) %>%                         # these two lines remove our remove_words
  filter(!word2 %in% remove_words) %>%                         
  unite(words, word1, word2, sep = " ") -> breastfeeding_bigrams                       # put them back together
breastfeeding_bigrams %>% 
  count(words, sort = T)
breastfeeding_bigrams %>% 
  count(words, sort = T) %>%
  top_n(100) %>%
  wordcloud2(size = .5)
Selecting by n
first_word <- c("wine", "breastfeeding")                                  # these need to be lowercase

breastfeeding_bigrams %>%             
  count(words, sort = TRUE) %>%
  separate(words, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn)
first_word <- c("benifits", "breastfeeding")                                  # these need to be lowercase

breastfeeding_bigrams %>%             
  count(words, sort = TRUE) %>%
  separate(words, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%     # put the words in order
  group_by(word1) %>% 
  top_n(5) %>% 
  ggplot(aes(word2, total, fill = word1)) +                          #
  scale_fill_viridis_d() +                                           # set the color palette
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL, title = "Breastfeeding") +
  facet_wrap(~word1, scales = "free") +
  coord_flip()
Selecting by total

breastfeeding_words %>%
  mutate(word_length = nchar(word)) %>% 
  ggplot(aes(word_length)) +
  geom_histogram(binwidth = 1)

LS0tCnRpdGxlOiAiYnJlYXN0ZmVlZGluZyBmaW5hbCB0ZXh0IGFuYWx5c2lzIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAogIHBkZl9kb2N1bWVudDogZGVmYXVsdAogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKLS0tCgpgYGB7cn0KbGlicmFyeShydHdlZXQpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KERUKQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeSh3b3JkY2xvdWQyKQpgYGAKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3R3ZWV0cyA8LSBnZXRfdGltZWxpbmUoIkBCcmVhc3RmZWVkaW5nTQoiLCBuID0gNTAwMCkKYGBgCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ193b3JkcyA8LSBicmVhc3RmZWVkaW5nX3R3ZWV0cyAlPiUgCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0ZXh0KSAlPiUgCiAgc2VsZWN0KHNjcmVlbl9uYW1lLCB3b3JkKQoKYGBgCgoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JQogIGNvdW50KHdvcmQsIHNvcnQgPSBUKQpgYGAKCgoKYGBge3J9CmJyZWFzdGZlZWRpbmdfd29yZHMgJT4lIAogIGFudGlfam9pbihnZXRfc3RvcHdvcmRzKCkpICU+JSAKICBjb3VudCh3b3JkLCBzb3J0ID0gVCkKYGBgCgoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgZmlsdGVyKCF3b3JkID09ICJodHRwcyIsCiAgICAgICAgICF3b3JkID09ICJ0LmNvIikgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFQpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgZmlsdGVyKCF3b3JkID09ICJodHRwcyIsCiAgICAgICAgICF3b3JkID09ICJ0LmNvIikgJT4lCiAgY291bnQod29yZCwgc29ydCA9IFQpICU+JQogIHRvcF9uKDIwMCkgJT4lCiAgd29yZGNsb3VkMihzaXplID0gMSkKYGBgCgoKYGBge3J9CmJpbmcgPC0gZ2V0X3NlbnRpbWVudHMoImJpbmciKQpiaW5nCmBgYAoKYGBge3J9CmJyZWFzdGZlZWRpbmdfd29yZHMgJT4lIAogIGlubmVyX2pvaW4oYmluZykgJT4lIAogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBpbm5lcl9qb2luKGJpbmcpICU+JSAKICBmaWx0ZXIoIXdvcmQgPT0gImJyZWFzdGZlZWRpbmciKSAlPiUKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSU+JQogIGdyb3VwX2J5KHNlbnRpbWVudCkgJT4lCiAgdG9wX24oMTApICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJicmVhc3RmZWVkaW5nIHdvcmRzIHBvc2l0aXZlIHZzIG5lZ2F0aXZlICIsCiAgICAgICB4ID0gTlVMTCkgKwogIGNvb3JkX2ZsaXAoKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX3dvcmRzICU+JSAKICBpbm5lcl9qb2luKG5yYykgJT4lIAogIGZpbHRlcighd29yZCA9PSAiYnJlYXN0IikgJT4lCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSklPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDUpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJCcmVhc3RmZWVkaW5nIHZlcmJhZ2UiLAogICAgICAgeCA9IE5VTEwpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgoKCgoKCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ190d2VldHMgJT4lCiAgc2VsZWN0KHRleHQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHRoaXMgc2VsZWN0cyBqdXN0IHRoZSB0ZXh0IG9mIHRoZSB0d2VldHMKICB1bm5lc3RfdG9rZW5zKHdvcmRzLCB0ZXh0LCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lIAogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAgICAjIHNlcGFyYXRlIHRoZW0gdGVtcG9yYXJpbHkKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgIyByZW1vdmUgaWYgZmlyc3Qgd29yZCBpcyBhIHN0b3Agd29yZAogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSBpZiBzZWNvbmQgd29yZCBpcyBhIHN0b3Agd29yZCAgIAogIHVuaXRlKHdvcmRzLCB3b3JkMSwgd29yZDIsIHNlcCA9ICIgIikgICAgICAgICAgICAgICAgICAgICAgICAjIHB1dCB0aGVtIGJhY2sgdG9nZXRoZXIKYGBgCgoKCmBgYHtyfQpyZW1vdmVfd29yZHMgPSBjKCJodHRwcyIsICJ0LmNvIikKCmJyZWFzdGZlZWRpbmdfdHdlZXRzICU+JQogIHNlbGVjdCh0ZXh0KSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgdW5uZXN0X3Rva2Vucyh3b3JkcywgdGV4dCwgdG9rZW4gPSAibmdyYW1zIiwgbiA9IDIpICU+JSAKICBzZXBhcmF0ZSh3b3JkcywgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUgICAgICAgICAgIyBzZXBhcmF0ZSB0aGVtIHRlbXBvcmFyaWx5CiAgZmlsdGVyKCF3b3JkMSAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lICAgICAgICAgICAgICAgICAgICAgICMgcmVtb3ZlIGlmIGZpcnN0IHdvcmQgaXMgYSBzdG9wIHdvcmQKICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgIyByZW1vdmUgaWYgc2Vjb25kIHdvcmQgaXMgYSBzdG9wIHdvcmQgICAKICBmaWx0ZXIoIXdvcmQxICVpbiUgcmVtb3ZlX3dvcmRzKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSB0d28gbGluZXMgcmVtb3ZlIG91ciByZW1vdmVfd29yZHMKICBmaWx0ZXIoIXdvcmQyICVpbiUgcmVtb3ZlX3dvcmRzKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgCiAgdW5pdGUod29yZHMsIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAgICAgICAgICAgICAgICAgICAgICAgICMgcHV0IHRoZW0gYmFjayB0b2dldGhlcgpgYGAKCgoKYGBge3J9CnJlbW92ZV93b3JkcyA9IGMoImh0dHBzIiwgInQuY28iKQoKYnJlYXN0ZmVlZGluZ190d2VldHMgJT4lCiAgc2VsZWN0KHRleHQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICB1bm5lc3RfdG9rZW5zKHdvcmRzLCB0ZXh0LCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lIAogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAgICAjIHNlcGFyYXRlIHRoZW0gdGVtcG9yYXJpbHkKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgIyByZW1vdmUgaWYgZmlyc3Qgd29yZCBpcyBhIHN0b3Agd29yZAogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSBpZiBzZWNvbmQgd29yZCBpcyBhIHN0b3Agd29yZCAgIAogIGZpbHRlcighd29yZDEgJWluJSByZW1vdmVfd29yZHMpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAjIHRoZXNlIHR3byBsaW5lcyByZW1vdmUgb3VyIHJlbW92ZV93b3JkcwogIGZpbHRlcighd29yZDIgJWluJSByZW1vdmVfd29yZHMpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAKICB1bml0ZSh3b3Jkcywgd29yZDEsIHdvcmQyLCBzZXAgPSAiICIpIC0+IGJyZWFzdGZlZWRpbmdfYmlncmFtcyAgICAgICAgICAgICAgICAgICAgICAgIyBwdXQgdGhlbSBiYWNrIHRvZ2V0aGVyCmBgYAoKCmBgYHtyfQpicmVhc3RmZWVkaW5nX2JpZ3JhbXMgJT4lIAogIGNvdW50KHdvcmRzLCBzb3J0ID0gVCkKYGBgCgoKCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ19iaWdyYW1zICU+JSAKICBjb3VudCh3b3Jkcywgc29ydCA9IFQpICU+JQogIHRvcF9uKDEwMCkgJT4lCiAgd29yZGNsb3VkMihzaXplID0gLjUpCmBgYAoKCgoKYGBge3J9CmZpcnN0X3dvcmQgPC0gYygid2luZSIsICJicmVhc3RmZWVkaW5nIikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSBuZWVkIHRvIGJlIGxvd2VyY2FzZQoKYnJlYXN0ZmVlZGluZ19iaWdyYW1zICU+JSAgICAgICAgICAgICAKICBjb3VudCh3b3Jkcywgc29ydCA9IFRSVUUpICU+JQogIHNlcGFyYXRlKHdvcmRzLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAjIHNlcGFyYXRlIHRoZSB0d28gd29yZHMKICBmaWx0ZXIod29yZDEgJWluJSBmaXJzdF93b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICMgZmluZCBmaXJzdCB3b3JkcyBmcm9tIG91ciBsaXN0CiAgY291bnQod29yZDEsIHdvcmQyLCB3dCA9IG4sIHNvcnQgPSBUUlVFKSAlPiUgCiAgcmVuYW1lKHRvdGFsID0gbm4pCmBgYAoKCmBgYHtyfQpmaXJzdF93b3JkIDwtIGMoImJlbmlmaXRzIiwgImJyZWFzdGZlZWRpbmciKSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHRoZXNlIG5lZWQgdG8gYmUgbG93ZXJjYXNlCgpicmVhc3RmZWVkaW5nX2JpZ3JhbXMgJT4lICAgICAgICAgICAgIAogIGNvdW50KHdvcmRzLCBzb3J0ID0gVFJVRSkgJT4lCiAgc2VwYXJhdGUod29yZHMsIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lICAgICAgICMgc2VwYXJhdGUgdGhlIHR3byB3b3JkcwogIGZpbHRlcih3b3JkMSAlaW4lIGZpcnN0X3dvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgIyBmaW5kIGZpcnN0IHdvcmRzIGZyb20gb3VyIGxpc3QKICBjb3VudCh3b3JkMSwgd29yZDIsIHd0ID0gbiwgc29ydCA9IFRSVUUpICU+JSAKICByZW5hbWUodG90YWwgPSBubikgJT4lCiAgbXV0YXRlKHdvcmQyID0gZmFjdG9yKHdvcmQyLCBsZXZlbHMgPSByZXYodW5pcXVlKHdvcmQyKSkpKSAlPiUgICAgICMgcHV0IHRoZSB3b3JkcyBpbiBvcmRlcgogIGdyb3VwX2J5KHdvcmQxKSAlPiUgCiAgdG9wX24oNSkgJT4lIAogIGdncGxvdChhZXMod29yZDIsIHRvdGFsLCBmaWxsID0gd29yZDEpKSArICAgICAgICAgICAgICAgICAgICAgICAgICAjCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgc2V0IHRoZSBjb2xvciBwYWxldHRlCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGxhYnMoeCA9IE5VTEwsIHkgPSBOVUxMLCB0aXRsZSA9ICJCcmVhc3RmZWVkaW5nIikgKwogIGZhY2V0X3dyYXAofndvcmQxLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkKYGBgCgpgYGB7cn0KYnJlYXN0ZmVlZGluZ193b3JkcyAlPiUKICBtdXRhdGUod29yZF9sZW5ndGggPSBuY2hhcih3b3JkKSkgJT4lIAogIGdncGxvdChhZXMod29yZF9sZW5ndGgpKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSAxKQpgYGAKCgoKCgoKCgoKCgoKCgoKCgo=