packages <- c("RedditExtractoR", "anytime", "magrittr", "httr", "tidytext", "tidyverse", "igraph", "ggraph", "wordcloud2", "textdata", "sf", "tmap", "here")
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
invisible(lapply(packages, library, character.only = TRUE))
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'igraph'
##
##
## The following objects are masked from 'package:lubridate':
##
## %--%, union
##
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
##
## The following objects are masked from 'package:purrr':
##
## compose, simplify
##
##
## The following object is masked from 'package:tidyr':
##
## crossing
##
##
## The following object is masked from 'package:tibble':
##
## as_data_frame
##
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
##
## The following object is masked from 'package:base':
##
## union
##
##
##
## Attaching package: 'textdata'
##
##
## The following object is masked from 'package:httr':
##
## cache_info
##
##
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
##
##
## Attaching package: 'tmap'
##
##
## The following object is masked from 'package:datasets':
##
## rivers
##
##
## here() starts at /Users/ryannation
library(sentimentr)
###How do Reddit users feel towards traffic?
traffic_threads <- find_thread_urls(keywords = "traffic",
sort_by = 'relevance',
period = 'all') %>%
drop_na()
## parsing URLs on page 1...
## parsing URLs on page 2...
## parsing URLs on page 3...
rownames(traffic_threads) <- NULL
colnames(traffic_threads)
## [1] "date_utc" "timestamp" "title" "text" "subreddit" "comments"
## [7] "url"
head(traffic_threads, 3) %>% knitr::kable()
| date_utc | timestamp | title | text | subreddit | comments | url |
|---|---|---|---|---|---|---|
| 2024-07-03 | 1720016123 | Drugs disguised as Taco Bell burritos found during Tennessee traffic stop | nottheonion | 218 | https://www.reddit.com/r/nottheonion/comments/1duff3g/drugs_disguised_as_taco_bell_burritos_found/ | |
| 2024-07-28 | 1722179181 | Trump Traffic Brigade | Just saw this in my town. There was a looong line of Trump supporters with their flags and whatnot driving slowly like a funeral procession near my house. Then they blocked traffic to allow these jerks to turn right all together. This man blocking traffic also was flipping off people he was blocking. Boomers vest said Trump Traffic Control. Really hoping it’s an indication that this was a Trump political career funeral procession. | BoomersBeingFools | 306 | https://www.reddit.com/r/BoomersBeingFools/comments/1ee9j43/trump_traffic_brigade/ |
| 2020-03-06 | 1583525566 | Ever wonder what creates traffic? This guy. | IdiotsInCars | 5638 | https://www.reddit.com/r/IdiotsInCars/comments/fejeaw/ever_wonder_what_creates_traffic_this_guy/ |
###In order to analyze how Reddit users feel towards traffic, we download data regarding traffic from particular threads that mention the word “traffic.”
data("stop_words")
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&|<|>"
traffic_clean <- traffic_threads %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
unnest_tokens(word, text, token = "words") %>%
anti_join(stop_words, by = "word") %>%
filter(str_detect(word, "[a-z]")) %>%
filter(word != "traffic")
traffic_clean %>%
count(word, sort = TRUE) %>%
wordcloud2()
###This word cloud shows some of the mpst frequent words. Many of them, such as “car, road, and move” we could expect. However sentiment analysis will most likely rate some of these words as positive or negative, which will help us answer our research question.
data("stop_words")
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&|<|>"
tri_grams <- traffic_threads %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
unnest_tokens(trigram, text, token = "ngrams", n = 3) %>%
separate(trigram, into = c("word1", "word2", "word3"), sep = " ") %>%
filter(str_detect(word1, "^[a-z]+$"),
str_detect(word2, "^[a-z]+$"),
str_detect(word3, "^[a-z]+$")) %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word,
!word3 %in% stop_words$word) %>%
unite(trigram, word1, word2, word3, sep = " ")
tri_gram_freq <- tri_grams %>%
count(trigram, sort = TRUE)
print(tri_gram_freq)
## trigram n
## 1 common law copyright 2
## 2 absolutely mind blowing 1
## 3 adding subway lines 1
## 4 amazing public transport 1
## 5 austin highway infrastructure 1
## 6 austin police force 1
## 7 automatic headlights shut 1
## 8 autozone install brighter 1
## 9 bike lanes traffic 1
## 10 blocking boomers vest 1
## 11 board driver absolutely 1
## 12 brigading arise people 1
## 13 bright led headlights 1
## 14 bright summer day 1
## 15 browsing tinder matched 1
## 16 bumper traffic people 1
## 17 called rat running 1
## 18 career funeral procession 1
## 19 chaotic lawful excitement 1
## 20 chase center tonight 1
## 21 closed road ect 1
## 22 controlled environment parking 1
## 23 crawl walk run 1
## 24 dashed line exit 1
## 25 day urgent care 1
## 26 effect tech rips 1
## 27 entire austin highway 1
## 28 environment parking lot 1
## 29 event eventually makes 1
## 30 fine individual ran 1
## 31 flashlight automatically notches 1
## 32 fucking sun emits 1
## 33 gridlock traffic adding 1
## 34 hated freeway section 1
## 35 hov lane edit 1
## 36 hrlokiw nbsp wait 1
## 37 install brighter bulbs 1
## 38 involved traffic stops 1
## 39 lane road ran 1
## 40 las vegas convention 1
## 41 las vegas loop 1
## 42 lesson tldr dickhead 1
## 43 lines train lines 1
## 44 literal fucking sun 1
## 45 lol people hate 1
## 46 lot closed road 1
## 47 lumen brightness setting 1
## 48 lumen searchlight sitting 1
## 49 mind blowing impact 1
## 50 mirror minor damage 1
## 51 mock traffic stop 1
## 52 nbsp precisely op 1
## 53 officers involved traffic 1
## 54 parking lot closed 1
## 55 parking lot ran 1
## 56 people hate projects 1
## 57 phone charge makes 1
## 58 pissed im thinking 1
## 59 podcast wtf rant 1
## 60 political career funeral 1
## 61 powerful elon bad 1
## 62 previous provincial administrations 1
## 63 provincial government dragging 1
## 64 regular lanes yield 1
## 65 shown traffic piling 1
## 66 single issue voter 1
## 67 smooth brain drivers 1
## 68 subway lines train 1
## 69 super bright led 1
## 70 super duper love 1
## 71 traffic adding lanes 1
## 72 traffic move faster 1
## 73 traffic stop conducted 1
## 74 trafficking white van 1
## 75 trigger warning sa 1
## 76 trump political career 1
## 77 trump traffic control 1
## 78 tunnel nbsp precisely 1
## 79 upvotes lol people 1
## 80 urgent care vet 1
## 81 vegas convention center 1
## 82 watt hour battery 1
## 83 whatnot driving slowly 1
## 84 white van pulls 1
## 85 wing mirror minor 1
###As you can see, there is not a high enough frequency of trigram pairs to make comparison between the groups. However, the trigrams do still point to some ideas that we will discuss later on.
bi_grams <- traffic_threads %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
separate(bigram, into = c("word1", "word2"), sep = " ") %>%
filter(str_detect(word1, "^[a-z]+$"),
str_detect(word2, "^[a-z]+$")) %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ")
bi_gram_freq <- bi_grams %>%
count(bigram, sort = TRUE)
print(bi_gram_freq)
## bigram n
## 1 hov lane 4
## 2 police officer 3
## 3 traffic stop 3
## 4 bike lanes 2
## 5 chase center 2
## 6 closed road 2
## 7 common law 2
## 8 double white 2
## 9 funeral procession 2
## 10 gas station 2
## 11 las vegas 2
## 12 law copyright 2
## 13 low beams 2
## 14 parking lot 2
## 15 rat running 2
## 16 subway lines 2
## 17 absolute max 1
## 18 absolutely mind 1
## 19 accelerating quickly 1
## 20 acura crossover 1
## 21 add lanes 1
## 22 adding lanes 1
## 23 adding subway 1
## 24 affection growing 1
## 25 afternoon sun 1
## 26 amazing public 1
## 27 angry minions 1
## 28 arise people 1
## 29 asshole billionaire 1
## 30 asshole millionaire 1
## 31 austin highway 1
## 32 austin police 1
## 33 automatic headlights 1
## 34 automatically dim 1
## 35 automatically notches 1
## 36 autozone install 1
## 37 battery power 1
## 38 begrudge people 1
## 39 billionaire douchebag 1
## 40 blankets covering 1
## 41 blinding light 1
## 42 blocked traffic 1
## 43 blocking boomers 1
## 44 blocking traffic 1
## 45 blowing impact 1
## 46 board driver 1
## 47 boomers vest 1
## 48 brain drivers 1
## 49 brigading arise 1
## 50 bright led 1
## 51 bright summer 1
## 52 brighter bulbs 1
## 53 brightness setting 1
## 54 browsing tinder 1
## 55 bumper move 1
## 56 bumper traffic 1
## 57 bus lane 1
## 58 called police 1
## 59 called rat 1
## 60 calling people 1
## 61 capacity operation 1
## 62 captain smoothbrain 1
## 63 car traffic 1
## 64 care vet 1
## 65 career funeral 1
## 66 cars waving 1
## 67 carte blanche 1
## 68 catch bart 1
## 69 center tonight 1
## 70 change lanes 1
## 71 chaotic lawful 1
## 72 charge desk 1
## 73 charge makes 1
## 74 clipper card 1
## 75 closing properly 1
## 76 college campus 1
## 77 constant roll 1
## 78 controlled environment 1
## 79 convention center 1
## 80 cool civic 1
## 81 crawl walk 1
## 82 crazy accident 1
## 83 dashed line 1
## 84 day urgent 1
## 85 dipshit moron 1
## 86 discuss debrief 1
## 87 drive safe 1
## 88 driver absolutely 1
## 89 drivers education 1
## 90 driving home 1
## 91 driving slowly 1
## 92 duper love 1
## 93 ed class 1
## 94 effect tech 1
## 95 elon bad 1
## 96 elon rich 1
## 97 entire austin 1
## 98 entitled jackasses 1
## 99 environment parking 1
## 100 er tech 1
## 101 event eventually 1
## 102 eventually makes 1
## 103 excellent training 1
## 104 excessively flashing 1
## 105 explicit permission 1
## 106 extremely nervous 1
## 107 eyes forward 1
## 108 facial expression 1
## 109 fan subreddit 1
## 110 faster traffic 1
## 111 fine individual 1
## 112 flashlight automatically 1
## 113 flawed tunnel 1
## 114 freeway section 1
## 115 freight train 1
## 116 fucking airport 1
## 117 fucking sun 1
## 118 future iterations 1
## 119 future pt 1
## 120 gardiner expressway 1
## 121 gauge traffic 1
## 122 genuinely hurt 1
## 123 getgo elon 1
## 124 giant suv 1
## 125 glove box 1
## 126 gonna learn 1
## 127 government dragging 1
## 128 gridlock traffic 1
## 129 guy saved 1
## 130 hate projects 1
## 131 hated freeway 1
## 132 headlights shut 1
## 133 heavy stop 1
## 134 highway infrastructure 1
## 135 highways wider 1
## 136 hour battery 1
## 137 hrlokiw nbsp 1
## 138 im thinking 1
## 139 imaginable permutation 1
## 140 immediately overcome 1
## 141 impacts traffic 1
## 142 individual ran 1
## 143 install brighter 1
## 144 intellectual property 1
## 145 involved traffic 1
## 146 issue voter 1
## 147 lakeshore blvd 1
## 148 lane edit 1
## 149 lane road 1
## 150 lanes traffic 1
## 151 lanes yield 1
## 152 law involved 1
## 153 lawful excitement 1
## 154 led headlights 1
## 155 lesson tldr 1
## 156 line exit 1
## 157 lines train 1
## 158 literal fucking 1
## 159 ll figure 1
## 160 lol people 1
## 161 looong line 1
## 162 loop video 1
## 163 lot closed 1
## 164 lot ran 1
## 165 lumen brightness 1
## 166 lumen searchlight 1
## 167 lumen setting 1
## 168 magifying glass 1
## 169 major thoroughfare 1
## 170 millionaire douchebag 1
## 171 mind blowing 1
## 172 minor damage 1
## 173 minute congestion 1
## 174 mirror minor 1
## 175 mirrors bright 1
## 176 mock citation 1
## 177 mock traffic 1
## 178 money nbsp 1
## 179 motherfucker burn 1
## 180 move faster 1
## 181 multiple tunnels 1
## 182 nbsp claims 1
## 183 nbsp precisely 1
## 184 nbsp wait 1
## 185 nest clusterfuck 1
## 186 normal human 1
## 187 north india 1
## 188 obligation hear 1
## 189 officers involved 1
## 190 oncoming traffic 1
## 191 passenger seat 1
## 192 people hate 1
## 193 personal vehicle 1
## 194 personaly beleive 1
## 195 phone charge 1
## 196 phone texting 1
## 197 pissed im 1
## 198 podcast wtf 1
## 199 police force 1
## 200 police vehicle 1
## 201 political career 1
## 202 power button 1
## 203 powerful elon 1
## 204 precisely op 1
## 205 pretty badly 1
## 206 pretty bright 1
## 207 pretty noisy 1
## 208 previous provincial 1
## 209 provincial administrations 1
## 210 provincial government 1
## 211 public transport 1
## 212 public transportation 1
## 213 pulling compleetly 1
## 214 question possibly 1
## 215 raped trafficked 1
## 216 rear window 1
## 217 red light 1
## 218 reduce traffic 1
## 219 regular lanes 1
## 220 remove lanes 1
## 221 repairs coupled 1
## 222 rich scumbag 1
## 223 ridiculous ordeal 1
## 224 road ect 1
## 225 road ran 1
## 226 roasting insects 1
## 227 rush hour 1
## 228 safely drive 1
## 229 searchlight sitting 1
## 230 semi trucks 1
## 231 shown traffic 1
## 232 simple invest 1
## 233 single car 1
## 234 single issue 1
## 235 smooth brain 1
## 236 sort controversial 1
## 237 sorta pull 1
## 238 source ninjaonii 1
## 239 spread gossip 1
## 240 square foot 1
## 241 starts beeping 1
## 242 starts cpr 1
## 243 starts flashing 1
## 244 stations footage 1
## 245 stop conducted 1
## 246 stop lots 1
## 247 stoping dead 1
## 248 stopped breathing 1
## 249 stressful situation 1
## 250 stressful situations 1
## 251 stretcher jumps 1
## 252 summer day 1
## 253 sun emits 1
## 254 sun hitting 1
## 255 sun visor 1
## 256 super bright 1
## 257 super duper 1
## 258 tech rips 1
## 259 tinder matched 1
## 260 tinted windshield 1
## 261 tldr dickhead 1
## 262 traffic adding 1
## 263 traffic control 1
## 264 traffic douggie 1
## 265 traffic due 1
## 266 traffic move 1
## 267 traffic people 1
## 268 traffic piling 1
## 269 traffic sitting 1
## 270 traffic stops 1
## 271 trafficking white 1
## 272 train lines 1
## 273 transport system 1
## 274 trigger warning 1
## 275 trump political 1
## 276 trump supporters 1
## 277 trump traffic 1
## 278 trunk lid 1
## 279 tunnel nbsp 1
## 280 twenty minutes 1
## 281 typically drive 1
## 282 unauthorized dissemination 1
## 283 underground lanes 1
## 284 underwear laying 1
## 285 upvotes lol 1
## 286 urgent care 1
## 287 van legs 1
## 288 van pulls 1
## 289 vegas convention 1
## 290 vegas loop 1
## 291 verified proven 1
## 292 video speaks 1
## 293 wait till 1
## 294 walk run 1
## 295 wanna break 1
## 296 warning sa 1
## 297 watt hour 1
## 298 whatnot driving 1
## 299 whiney lil 1
## 300 white van 1
## 301 wing mirror 1
## 302 woman blue 1
## 303 wtf rant 1
traffic_clean <- traffic_threads %>%
filter(text != "" & !is.na(title)) %>%
mutate(element_id = row_number())
traffic_sentiment2 <- sentiment(traffic_clean$title)
aggregate_scores <- traffic_sentiment2 %>%
group_by(element_id) %>%
summarize(
avg_sentiment = mean(sentiment),
total_sentiment = sum(sentiment),
sentence_count = n(),
.groups = "drop")
final_traffic <- aggregate_scores %>%
left_join(traffic_clean, by = "element_id")
ten_results <- final_traffic %>%
select(title, avg_sentiment, total_sentiment, sentence_count) %>%
head(10)
print(ten_results)
## # A tibble: 10 × 4
## title avg_sentiment total_sentiment sentence_count
## <chr> <dbl> <dbl> <int>
## 1 "Trump Traffic Brigade " -0.202 -0.202 1
## 2 "Can\u0019t even see traffic be… 0 0 1
## 3 "Traffic data over 3 days in a … 0 0 1
## 4 "Traffic is bad because the Gar… -0.237 -0.237 1
## 5 "Yield to faster traffic doesn'… -0.190 -0.190 1
## 6 "Traffic Nightmare for One Podc… -0.335 -0.335 1
## 7 "Please turn your highbeams off… 0.333 0.333 1
## 8 "Guy behind me in traffic was b… -0.470 -0.470 1
## 9 "Trump Traffic Jam has East Ash… -0.367 -0.367 1
## 10 "A traffic stop should be a man… -0.302 -0.302 1
###I believe these ten example sentences are credible. Overall, they seem to be mostly negative. However, one positive result is using “please” and is asking people to turn their highbeams off in the presence of oncoming traffic. This is using polite language, while also contributing to driver safety. Additionally, other sentences tend to trend negative, without any inherent negative language. I believe this is due to terms such as “traffic”, “gridlock”, and “jam” have negative connotations.
final_results <- final_traffic %>%
mutate(sentiment_bin = cut(avg_sentiment, breaks = seq(-0.5, 0.5, by = 0.1),
include.lowest = TRUE))
sentiment_counts <- final_results %>%
group_by(sentiment_bin) %>%
summarize(count = n(), .groups = "drop")
ggplot(sentiment_counts, aes(x = sentiment_bin, y = count)) +
geom_bar(stat = "identity", fill = "skyblue", color = "black") +
labs(title = "Distribution of Sentiment Scores", x = "Sentiment Scores", y = "Count") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_x_discrete(drop = FALSE)
ggplot(final_traffic, aes(x = avg_sentiment, y = comments)) +
geom_point(color = "blue", size = 3, alpha = 0.6) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(title = "Sentiment Score vs. Post Engagement", x = "Sentiment Score", y = "Number of Comments") + theme_minimal() + theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14))
## `geom_smooth()` using formula = 'y ~ x'
###After performing sentiment analysis, it appears that posters on
Reddit tend to view traffic in a more negative light. The first graph
shows that there is a higher number of posts with a negative score than
a higher score. Typically, posts tend to settle around a sentiment score
of -0.25. Additionally, there are more posts with a strongly negative
score (-0.5) rather than posts with a strongly positive score (0.5). The
second graph was examining if posts that are more negative or positive
increase discourse. As the scatterplot shows, there is a slightly
negative correlation between sentiment score and engagement. This means
that the more negative a post’s sentiment score is, the more likely an
individual is going to engage with it. Overall, the word clouds,
bigrams, and sentiment analyses lead us to believe that Reddit users do
not feel good about traffic, and will utilize the media site to vocalize
their disdain for traffic.