library("rtweet")
library("ggplot2")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("tidytext")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.4 ✓ purrr 0.3.4
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag() masks stats::lag()
library("igraph")
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library("ggraph")
library("tidyr")
library("wordcloud2")
library("textdata")
appname <- "mytwitterapp"
twitter_token <- create_token(
app = appname,
consumer_key = "GHgjnXIex7qjWHly8MTjhi4Bv",
consumer_secret = "GFPIOETMiBobEeEMil3mdv7ESvZN2vWg2nQ9v0BAps4HOfe6pe",
access_token = "475638518-9CvROETaYyp9lEGaumxFAk9vCUY8ShGi5dEtotVn",
access_secret = "FxkHNzvf9ixqWF6TIT5hDoyW29d9TQK8ts1Ywb4LR6bnX")
Q1
park_tweets_all <- search_tweets(q = "Piedmont Park", n = 200)
head(park_tweets_all, n = 10)
## # A tibble: 10 × 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 1262808354256084992 145454962… 2021-10-30 20:43:50 BonifacioB… "El Ja… Twitt…
## 2 1395865041056980993 145454457… 2021-10-30 20:23:45 739lsil "El Ja… Twitt…
## 3 233997042 145453876… 2021-10-30 20:00:41 put_up_or_… "I’ll … Twitt…
## 4 1381392917273710594 145452445… 2021-10-30 19:03:47 NellieC059… "Piedm… Twitt…
## 5 1192138537279348736 145447049… 2021-10-30 15:29:24 lappsga "It’s … Twitt…
## 6 1309321100 145445723… 2021-10-30 14:36:41 jballen5 "It’s … Twitt…
## 7 1309321100 145422429… 2021-10-29 23:11:05 jballen5 "#Scho… Twitt…
## 8 1020141119521378305 145445324… 2021-10-30 14:20:49 FAToomerPTA "It’s … Twitt…
## 9 1083564197055815681 145445142… 2021-10-30 14:13:36 Tish123456… "It’s … Twitt…
## 10 1083564197055815681 145422041… 2021-10-29 22:55:39 Tish123456… "#Scho… Twitt…
## # … with 84 more variables: display_text_width <dbl>, reply_to_status_id <chr>,
## # reply_to_user_id <chr>, reply_to_screen_name <chr>, is_quote <lgl>,
## # is_retweet <lgl>, favorite_count <int>, retweet_count <int>,
## # quote_count <int>, reply_count <int>, hashtags <list>, symbols <list>,
## # urls_url <list>, urls_t.co <list>, urls_expanded_url <list>,
## # media_url <list>, media_t.co <list>, media_expanded_url <list>,
## # media_type <list>, ext_media_url <list>, ext_media_t.co <list>, …
Q3
table(park_tweets_all$is_retweet)
##
## FALSE TRUE
## 51 149
Q4
park_tweets <- park_tweets_all[, sapply(park_tweets_all, Negate(anyNA)), drop = FALSE]
table(park_tweets$source)
##
## Buffer Hootsuite Inc. IFTTT Instagram
## 1 5 2 1
## Post Studio shareist True Anthem TweetDeck
## 1 1 3 2
## Twitter for Android Twitter for iPad Twitter for iPhone Twitter Web App
## 122 1 38 23
Q6
my_twts_clean <- my_twts %>%
dplyr::select(cleanedTxt) %>%
unnest_tokens(word, cleanedTxt)
nrow(my_twts_clean)
## [1] 1025
Q5 (in Step 5)
my_twts_clean %>%
count(word, sort = TRUE) %>%
top_n(15) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(x = "words",
y = "counts",
title = "Figure 2-1: Unique wordcounts found in tweets, with no stop words")
## Selecting by n

Q6
cleanTokens <- my_twts_clean %>% anti_join(stop_words)
## Joining, by = "word"
nrow(cleanTokens)
## [1] 577
Figure 4
library(widyr)
my_twts_ngram <- my_twts %>%
dplyr::select(cleanedTxt) %>%
unnest_tokens(paired_words, cleanedTxt, token = "ngrams", n = 3)
my_twts_ngram %>%
count(paired_words, sort = TRUE)
## # A tibble: 844 × 2
## paired_words n
## <chr> <int>
## 1 apa mla dissertation 6
## 2 college essays graduate 6
## 3 deadline goals school 6
## 4 dissertation thesis major 6
## 5 essayhelp papers help 6
## 6 essays graduate apa 6
## 7 grade essayhelp papers 6
## 8 graduate apa mla 6
## 9 graduation deadline goals 6
## 10 help graduation deadline 6
## # … with 834 more rows
library(tidyr)
my_twts_ngram <- my_twts_ngram %>%
separate(paired_words, c("word1", "word2"), sep = " ")
## Warning: Expected 2 pieces. Additional pieces discarded in 965 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
my_twts_filtered <- my_twts_ngram %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
my_words_counts <- my_twts_filtered %>%
count(word1, word2, sort = TRUE)
head(my_twts_filtered)
## # A tibble: 6 × 2
## word1 word2
## <chr> <chr>
## 1 firehazard ucsb
## 2 multimillion wrongful
## 3 wrongful death
## 4 death student
## 5 student suicide
## 6 suicide lawsuits
my_words_counts %>%
filter(n >= 2) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = .6, edge_width = n)) +
geom_node_point(color = "darkslategray4", size = 3) +
geom_node_text(aes(label = name), vjust = 1.8, size = 4) +
labs(title = "Figure 4: Word Network: Tweets using my hashtag",
subtitle = "Text mining twitter data",
x = "", y = "")
