# load twitter library - the rtweet library is recommended now over twitteR
library(rtweet)
# plotting and pipes - tidyverse!
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# text mining library
library(tidytext)
# plotting packages
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(ggraph)
climate_tweets <- search_tweets(q = "#climatechange", n = 10000,
lang = "en",
include_rts = FALSE)
# check data to see if there are emojis
head(climate_tweets$text)
## [1] "Planet/Climate change takes millenials if not epochs of time #ClimateChange \nClimate Change is being used by the United Nations Sustainable Development Gang #UNSDG to push their Agenda21\n#cdnpoli #elxn43 #polcan\n \nhttps://t.co/jr919AHJwt\n\nI Choose CANADA!\nhttps://t.co/TV9gk0UBzC"
## [2] "@LeonardRoxon Planet/Climate change takes millenials if not epochs of time #ClimateChange \nClimate Change is being used by the United Nations Sustainable Development Gang to push their Agenda21\n#cdnpoli #elxn43 #polcan\n \nhttps://t.co/jr919AHJwt\n\nChoose CANADA \nhttps://t.co/TV9gk0UBzC"
## [3] "TURDeau has been convinced & convinced cdns he is doing it supposedly for a greater good. Scaring humanity into believing the PlanetChange LIE #ClimateChange Goal of achieving United Nations Sustainable Development Goals #UNSDG Gang by lowering CDN'S standard of living #cdnpoli https://t.co/5RlY5Rx8L0"
## [4] "Where r the jews in all this #unsdg 1 world, new world order, power currupts absolutely, big climate change lie, not planet change but ClimateChange? #ClimateChange I care about humanity more than u know, starts at homele$$ne$$ No excuse justify deceiving cdns #ppc drill water https://t.co/RmkndpnsXv"
## [5] "@Historian_Matt @theJagmeetSingh Told you jagmeet Singh is now a globalist new world order coolaid drinking PlanetChange #ClimateChange in our lifetime believer.\n #cdnpoli #elxn43 #prorep #EqualSenate #2termPM #ndp ppc2019 #PPC2019"
## [6] "@Tenenbrae @MandysTake TURDeau has been convinced & convinced cdns he is doing it supposedly for a greater good. Scaring humanity into believing the PlanetChange LIE #ClimateChange Goal of achieving United Nations Sustainable Development Goals #UNSDG Gang by lowering CDN'S standard of living #cdnpoli https://t.co/BKHPk0SrKU"
## Warning: Rate limit exceeded - 88
## [1] "I LAMENT @ScottMorrisonMP , I Lament Him Belittling And Demeaning #GretaThunberg , I Lament Him Belittling And Demeaning All Kids , I Lament Him For Not Showing Leadership On #ClimateChange , I Lament Him Not Being The PM Australia Needs \nhttps://t.co/rlxjhoGJGT\n#auspol #insiders"
## [2] "Why Conservatives Keep #Gaslighting About #ClimateChange . \nIn recent years, leaders of @GOP , @LiberalAus and @The_Nationals have become aware that denying the existence of global warming makes them look like idiots.\nhttps://t.co/gd0BkyTBrv\n#auspol #insiders"
## [3] "@ScottMorrisonMP Said At The UN #ClimateSummit He Was Doing His Part On #ClimateChange . Under @LiberalAus Australia Is Ranked 55 Out Of 60 In The Worst Performing Counties On #ClimateAction \nhttps://t.co/G35I1JkUvL"
## [4] "@KnuckleHead6971 @elyasgarad @shell_kon @BelindaJones68 @Greg_MarineLab @simonahac @MadamEarth @AngryTheInch @MortPhil @vanbadham @MsVeruca @Bloss55 @AOC @Noodles_Romanov @AngryGranny1 @GGeoffwitha @JacktheInsider @LaLegale @GOP @LiberalAus Why conservatives keep gaslighting about #climatechange \nIn recent years, leaders of @GOP and @LiberalAus and @The_Nationals have become aware that denying the existence of global warming makes them look like idiots\nhttps://t.co/gd0BkyTBrv"
## [5] "U.N. Head Antonio Guterres urges world leaders great ‘ambition and action’ on #climatechange, #SustainableDevelopment is needed, saying there is not time to loose in the face of climate change, rising inequality, increasing hatred and intolerance.\nhttps://t.co/iqRWJw9YzM"
## [6] "Experts And The UN Have Accused @ScottMorrisonMP Of Demeaning , Belittling And Talking Down To #GretaThunberg While Playing Completely Ignoring The Science And Facts On #ClimateChange To Parrot Trump\nhttps://t.co/EiLWRNU7vG\n#auspol #insiders"
# Find tweet using climate+change together in them
climate_tweets <- search_tweets(q = "climate+change", n = 10000, lang = "en",
include_rts = FALSE)
## Warning: Rate limit exceeded - 88
## Warning: Rate limit exceeded
# remove urls tidyverse is failing here for some reason
# climate_tweets %>%
# mutate_at(c("stripped_text"), gsub("http.*","",.))
# remove http elements manually
climate_tweets$stripped_text <- gsub("http.*","", climate_tweets$text)
climate_tweets$stripped_text <- gsub("https.*","", climate_tweets$stripped_text)
# note the words that are recognized as unique by R
a_list_of_words <- c("Dog", "dog", "dog", "cat", "cat", ",")
unique(a_list_of_words)
## [1] "Dog" "dog" "cat" ","
## [1] "Dog" "dog" "cat" ","
# remove punctuation, convert to lowercase, add id for each tweet!
climate_tweets_clean <- climate_tweets %>%
dplyr::select(stripped_text) %>%
unnest_tokens(word, stripped_text)
# plot the top 15 words -- notice any issues?
climate_tweets_clean %>%
count(word, sort = TRUE) %>%
top_n(15) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(x = "Count",
y = "Unique words",
title = "Count of unique words found in tweets")
## Selecting by n

# load list of stop words - from the tidytext package
data("stop_words")
# view first 6 words
head(stop_words)
## # A tibble: 6 x 2
## word lexicon
## <chr> <chr>
## 1 a SMART
## 2 a's SMART
## 3 able SMART
## 4 about SMART
## 5 above SMART
## 6 according SMART
## # A tibble: 6 x 2
## word lexicon
## <chr> <chr>
## 1 a SMART
## 2 a's SMART
## 3 able SMART
## 4 about SMART
## 5 above SMART
## 6 according SMART
nrow(climate_tweets_clean)
## [1] 211562
## [1] 264381
# remove stop words from your list of words
cleaned_tweet_words <- climate_tweets_clean %>%
anti_join(stop_words)
## Joining, by = "word"
# there should be fewer words now
nrow(cleaned_tweet_words)
## [1] 104715
## [1] 130578
# plot the top 15 words -- notice any issues?
cleaned_tweet_words %>%
count(word, sort = TRUE) %>%
top_n(15) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(y = "Count",
x = "Unique words",
title = "Count of unique words found in tweets",
subtitle = "Stop words removed from the list")
## Selecting by n

# library(devtools)
# install_github("dgrtwo/widyr")
library(widyr)
# remove punctuation, convert to lowercase, add id for each tweet!
climate_tweets_paired_words <- climate_tweets %>%
dplyr::select(stripped_text) %>%
unnest_tokens(paired_words, stripped_text, token = "ngrams", n = 2)
climate_tweets_paired_words %>%
count(paired_words, sort = TRUE)
## # A tibble: 109,095 x 2
## paired_words n
## <chr> <int>
## 1 climate change 7367
## 2 change is 904
## 3 of the 561
## 4 about climate 544
## 5 on climate 544
## 6 the climate 537
## 7 in the 526
## 8 of climate 497
## 9 is a 465
## 10 change and 398
## # … with 109,085 more rows
## A tibble: 130,552 x 2
## paired_words n
## <chr> <int>
## 1 climate change 9164
## 2 change is 1140
## 3 of the 694
## 4 about climate 691
## 5 on climate 667
## 6 the climate 657
## 7 in the 649
## 8 of climate 626
## 9 is a 591
## 10 change and 498
## … with 130,542 more rows
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:igraph':
##
## crossing
climate_tweets_separated_words <- climate_tweets_paired_words %>%
separate(paired_words, c("word1", "word2"), sep = " ")
climate_tweets_filtered <- climate_tweets_separated_words %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
# new bigram counts:
climate_words_counts <- climate_tweets_filtered %>%
count(word1, word2, sort = TRUE)
head(climate_words_counts)
## # A tibble: 6 x 3
## word1 word2 n
## <chr> <chr> <int>
## 1 climate change 7367
## 2 greta thunberg 230
## 3 global warming 227
## 4 fight climate 120
## 5 jason momoa 96
## 6 change deniers 95
## # A tibble: 6 x 3
## word1 word2 n
## <chr> <chr> <int>
## 1 climate change 9164
## 2 global warming 295
## 3 greta thunberg 282
## 4 fight climate 152
## 5 jason momoa 118
## 6 change deniers 112
library(igraph)
library(ggraph)
# plot climate change word network
# (plotting graph edges is currently broken)
climate_words_counts %>%
filter(n >= 24) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
# geom_edge_link(aes(edge_alpha = n, edge_width = n))
# geom_edge_link(aes(edge_alpha = n, edge_width = n)) +
geom_node_point(color = "darkslategray4", size = 3) +
geom_node_text(aes(label = name), vjust = 1.8, size = 3) +
labs(title = "Word Network: Tweets using the hashtag - Climate Change",
subtitle = "Text mining twitter data ",
x = "", y = "")
