Load the rtweet and other needed R packages.

introducing 2 new packages lower in this lesson: igraph and ggraph.

# load twitter library - the rtweet library is recommended now over twitteR
#install.packages("rtweet")
#install.packages("tidytext")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(rtweet)
## 
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
## 
##     flatten
# plotting and pipes - tidyverse!
library(ggplot2)
library(dplyr)
# text mining library
library(tidytext)

Create a twitter developer account

https://cran.r-project.org/web/packages/rtweet/vignettes/auth.html

store api keys ( own keys)

api_key <- “djPufbwIyRqftHHHWfCAIvf0n” api_secret_key <- “HgA2x4ZXRX0rM7sknaCYHQWnJSVg6UZytyzeowQlgfzh2vE4Rv” access_token <- “347824918-JGHuXEqxmJB0RQEj75d4cU7hoDF3oy2MMMriFFB1” access_token_secret <- “KPNNECnuAWVrZhDYslNYLr667vud1x0VubTCbC3WSQn8j”

api_key <- "djPufbwIyRqftHHHWfCAIvf0n"
api_secret_key <- "HgA2x4ZXRX0rM7sknaCYHQWnJSVg6UZytyzeowQlgfzh2vE4Rv"
access_token <- "347824918-JGHuXEqxmJB0RQEj75d4cU7hoDF3oy2MMMriFFB1"
access_token_secret <- "KPNNECnuAWVrZhDYslNYLr667vud1x0VubTCbC3WSQn8j"

## authenticate via web browser
token <- create_token(
  app = "DATA110First",
  consumer_key = api_key,
  consumer_secret = api_secret_key,
  access_token = access_token,
  access_secret = access_token_secret)

Authorization in future R sessions

The create_token() function saves your token as an environment variable

Explore Networks of Words

words that occur together in tweets

ngrams specifies pairs and 2 is the number of words together

#library(devtools)
#install_github("dgrtwo/widyr")
library(widyr)

# remove punctuation, convert to lowercase, add id for each tweet!
climate_tweets_paired_words <- climate_tweets %>%
  dplyr::select(stripped_text) %>%
  unnest_tokens(paired_words, stripped_text, token = "ngrams", n = 2)

climate_tweets_paired_words %>%
  count(paired_words, sort = TRUE)
## # A tibble: 25,941 x 2
##    paired_words         n
##    <chr>            <int>
##  1 greenhouse gas     706
##  2 climate change     533
##  3 gas emissions      465
##  4 greenhouse gases   314
##  5 of the             257
##  6 in the             172
##  7 of greenhouse      150
##  8 the climate        113
##  9 the greenhouse      94
## 10 to the              93
## # … with 25,931 more rows

Again, eliminate the stop words from the paired words

library(tidyr)
climate_tweets_separated_words <- climate_tweets_paired_words %>%
  separate(paired_words, c("word1", "word2"), sep = " ")

climate_tweets_filtered <- climate_tweets_separated_words %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word)

# new bigram counts:
climate_words_counts <- climate_tweets_filtered %>%
  count(word1, word2, sort = TRUE)

head(climate_words_counts)
## # A tibble: 6 x 3
##   word1      word2          n
##   <chr>      <chr>      <int>
## 1 greenhouse gas          706
## 2 climate    change       533
## 3 gas        emissions    465
## 4 greenhouse gases        314
## 5 greenhouse effect        88
## 6 reduce     greenhouse    73

Finally, plot the data

# plotting packages
# install.packages("ggraph")
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(ggraph)

# plot climate  word network
# (plotting graph edges is currently broken)
climate_words_counts %>%
        filter(n >= 24) %>%
        graph_from_data_frame() %>%
        ggraph(layout = "fr") +
        geom_edge_link(aes(edge_alpha = n, edge_width = n)) +
        geom_node_point(color = "blue4", size = 3) +
        geom_node_text(aes(label = name), vjust = 1.8, size = 3) +
        labs(title = "Word Network: Tweets using the hashtag - Climate and Greenhouse",
             subtitle = "Text mining twitter data ",
             x = "", y = "")