Keyword selected - Daylight Savings Time
On November 5, most of the population of North America and Europe set their clocks back an hour as daylight saving time ends. The change means disruptions in the sleep patterns of over 300 million people—the effects of which will doubtless be felt and seen nearly everywhere sparking debate once again in the U.S. over whether and how to end this practice.
Here, I aim to examine what peoples opinion about Daylight Savings Time has been.
## Selecting subreddits -----
# using keyword
threads_1 <- find_thread_urls(keywords = "daylight savings time",
sort_by = 'relevance',
period = 'all')
## parsing URLs on page 1...
## parsing URLs on page 2...
## parsing URLs on page 3...
colnames(threads_1)
## [1] "date_utc" "timestamp" "title" "text" "subreddit" "comments"
## [7] "url"
head(threads_1)
## date_utc timestamp
## 1 <NA> NA
## 2 2018-02-05 1517814613
## 3 2020-11-09 1604952389
## 4 2022-03-13 1647177714
## 5 2023-11-05 1699171872
## 6 2019-11-04 1572882943
## title
## 1 <NA>
## 2 Finland seeks to abolish Daylight Saving Time from the European Union
## 3 Fuck Daylight Savings
## 4 Write your reps: stay on daylight savings time
## 5 Daylight savings time sucks for night shift workers
## 6 Daylights savings time
## text
## 1 <NA>
## 2
## 3
## 4 Now is the perfect time to write your reps to support the "Sunshine Protection Act", making daylight savings time (the one we just switched to) the new permanent standard time.\n\nHouse: [H.R.69 - Sunshine Protection Act of 2021](https://www.congress.gov/bill/117th-congress/house-bill/69)\n\nSenate: [S.623 - Sunshine Protection Act of 2021](https://www.congress.gov/bill/117th-congress/senate-bill/623)\n\nContact info:\n\nHouse: [Find your representative](https://www.house.gov/representatives/find-your-representative)\n\nSenate: [Links to our senators contact pages](https://www.senate.gov/states/MA/intro.htm)
## 5 They need to abolish daylight savings time, us night shift workers get screwed over every year having to work an extra hour (we do get paid for it though)
## 6
## subreddit comments
## 1 <NA> NA
## 2 worldnews 610
## 3 WatchPeopleDieInside 1485
## 4 boston 169
## 5 antiwork 84
## 6 SeattleWA 111
## url
## 1 <NA>
## 2 https://www.reddit.com/r/worldnews/comments/7vd4ko/finland_seeks_to_abolish_daylight_saving_time/
## 3 https://www.reddit.com/r/WatchPeopleDieInside/comments/jr55pc/fuck_daylight_savings/
## 4 https://www.reddit.com/r/boston/comments/td6t5c/write_your_reps_stay_on_daylight_savings_time/
## 5 https://www.reddit.com/r/antiwork/comments/17o7apz/daylight_savings_time_sucks_for_night_shift/
## 6 https://www.reddit.com/r/SeattleWA/comments/drjih1/daylights_savings_time/
# The prvious commands showed multiple results that may be irrelevant for this study therefore we look at specific subreddits
subreddit_list <- RedditExtractoR::find_subreddits('daylight savings time')
## parsing URLs on page 1...
# Selecting a few subreddits that have most threads and are interesting and relevant to this study
interesting_subreddits <- c("NoStupidQuestions", "AskReddit", "politics", "unpopularopinion", "arizona", "AskAnAmerican","canada", "changemyview", "CasualConversation", "worldnews", "DST", "todayilearned", "news", "Seattle")
threads_3 <- find_thread_urls(keywords="daylight savings time",
subreddit = interesting_subreddits,
sort_by = 'relevance',
period = 'all')
## parsing URLs on page 1...
## parsing URLs on page 2...
## parsing URLs on page 3...
# Tokenization -----
words <- threads_3 %>%
unnest_tokens(output = word, input = text, token = "words")
words %>%
count(word, sort = TRUE) %>%
top_n(20) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(x = "words",
y = "counts",
title = "Unique wordcounts")
## Selecting by n
# Filtering Stop Words -----
data("stop_words")
# view random 50 words
print(stop_words$word[sample(1:nrow(stop_words), 50)])
## [1] "took" "had" "upon" "new" "likely"
## [6] "in" "thanx" "brief" "himself" "should"
## [11] "mostly" "whom" "getting" "far" "contains"
## [16] "doesn't" "nobody" "afterwards" "please" "behind"
## [21] "greetings" "willing" "ever" "own" "least"
## [26] "another" "whither" "enough" "hence" "parting"
## [31] "available" "ending" "him" "i'm" "general"
## [36] "thank" "mean" "ordered" "about" "why's"
## [41] "the" "we" "should" "indeed" "nobody"
## [46] "where" "has" "where" "through" "largely"
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&|<|>"
words_clean <- threads_3 %>%
# drop URLs
mutate(text = str_replace_all(text, replace_reg, "")) %>%
# Tokenization (word tokens)
unnest_tokens(word, text, token = "words") %>%
# drop stop words
anti_join(stop_words, by = "word") %>%
# drop non-alphabet-only strings
filter(str_detect(word, "[a-z]"))
# Check the number of rows after removal of the stop words. There should be fewer words now
print(
glue::glue("Before: {nrow(words)}, After: {nrow(words_clean)}")
)
## Before: 6584, After: 2102
words_clean %>%
count(word, sort = TRUE) %>%
top_n(20, n) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(x = "words",
y = "counts",
title = "Unique wordcounts")
# Word cloud after removing stop words
words_clean %>%
count(word, sort = TRUE) %>%
wordcloud2()
# Highlighting 20 most frequently appearing words
n <- 30
h <- runif(n, 0, 1) # any color
s <- runif(n, 0.6, 1) # vivid
v <- runif(n, 0.3, 0.7) # neither too dark or bright
df_hsv <- data.frame(h = h, s = s, v = v)
pal <- apply(df_hsv, 1, function(x) hsv(x['h'], x['s'], x['v']))
pal <- c(pal, rep("grey", 10000))
words_clean %>%
count(word, sort = TRUE) %>%
wordcloud2(color = pal,
minRotation = 0,
maxRotation = 0,
ellipticity = 0.8)
# Creating Tri-Gram
words_ngram <- threads_3 %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
select(text) %>%
unnest_tokens(output = paired_words,
input = text,
token = "ngrams",
n = 3)
words_ngram %>%
count(paired_words, sort = TRUE) %>%
head(30) %>%
knitr::kable()
| paired_words | n |
|---|---|
| NA | 77 |
| daylight savings time | 27 |
| an extra hour | 11 |
| daylight saving time | 10 |
| extra hour of | 8 |
| twice a year | 7 |
| a lot of | 6 |
| back an hour | 5 |
| in the us | 5 |
| of daylight savings | 5 |
| of the year | 5 |
| the time change | 5 |
| 2 00 am | 4 |
| an hour earlier | 4 |
| back and forth | 4 |
| back to 2 | 4 |
| get rid of | 4 |
| go back to | 4 |
| have more daylight | 4 |
| hour of sleep | 4 |
| i don t | 4 |
| i feel like | 4 |
| i live in | 4 |
| in the fall | 4 |
| in the morning | 4 |
| in the spring | 4 |
| in the winter | 4 |
| lose an hour | 4 |
| lot of people | 4 |
| make it permanent | 4 |
#separate the grouped words into two columns
words_ngram_pair <- words_ngram %>%
separate(paired_words, c("word1", "word2","word3"), sep = " ")
# filter rows where there are stop words under word 1,2,3 columns
words_ngram_pair_filtered <- words_ngram_pair %>%
# drop stop words
filter(!word1 %in% stop_words$word & !word2 %in% stop_words$word & !word3 %in% stop_words$word) %>%
# drop non-alphabet-only strings
filter(str_detect(word1, "[a-z]") & str_detect(word2, "[a-z]"))
# Filter out words that are not encoded in ASCII
# To see what's ASCCII, google 'ASCII table'
library(stringi)
## Warning: package 'stringi' was built under R version 4.1.2
words_ngram_pair_filtered %<>%
filter(stri_enc_isascii(word1) & stri_enc_isascii(word2) & stri_enc_isascii(word3))
# Sort the new Tri-gram (n=3) counts:
words_counts <- words_ngram_pair_filtered %>%
count(word1, word2, word3) %>%
arrange(desc(n))
head(words_counts, 10) %>%
knitr::kable()
| word1 | word2 | word3 | n |
|---|---|---|---|
| daylight | savings | time | 27 |
| daylight | saving | time | 10 |
| observe | daylight | savings | 4 |
| savings | time | permanent | 4 |
| follow | daylight | savings | 2 |
| follow | daylights | saving | 2 |
| 10pm | 6am | shift | 1 |
| 12am | 1am | 2am | 1 |
| 1am | 2am | 2am | 1 |
| 2am | 2am | 3 | 1 |
words_counts %>%
filter(n >= 1) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = .6, edge_width = n)) +
geom_node_point(color = "darkslategray4", size = 3) +
geom_node_text(aes(label = name), vjust = 0.5) +
labs(title = "Word Networks",
x = "", y = "")
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The most worthy tri-gram associates words like - loss, clocks, observation with daylight savings time. Some other relevant tri-grams showed associations between words like question, clock, and sunlight. This suggests that majority of the threads revolve around discussing the concept of daylight savings time and some express opinion regarding the annual change.
texts_1 <- threads_3$title
# syuzhet package
get_sentiment(texts_1, method='nrc')
## [1] 1 3 0 -1 1 2 1 3 0 1 1 1 1 0 3 1 1 1 1 1 1 1 0 1 3
## [26] 2 1 1 1 2 1 1 2 0 2 1 0 2 0 1 0 2 1 0 1 1 0 1 1 1
## [51] 1 2 0 1 2 0 2 1 1 1 2 1 2 1 1 1 2 1 2 3 1 1 0 0 1
## [76] 0 1 1 5 1 1 1 0 2 2 1 1 -1 2 0 2 1 1 1 1 -1 -2 0 1 1
## [101] 0 1 1 1 2 1 0 2 1 1 1 2 1 1 1 0 1 0 4 0 1 0 2 1 2
## [126] 2 1 1 -1 1 2 1 1 1 1 0 1 1 1 2 0 0 0 2 0 1 1 1 1 1
## [151] 1 1 1 1 1 1 -1 1 1 1 0 2 1 1 1 2 0 2 0 0 1 1 1 1 1
## [176] 2 1 1 1 1 1 1 1 2 3 1 1 0 1 0 1 1 2 1 0 1 0 1 1 2
## [201] 2 0 1 1 0 1 1 1 1 1 0 1 1 1 0 1 0 0 1 0 2 1 1 1 2
get_sentiment(texts_1, method='afinn')
## [1] 1 0 2 0 1 3 1 4 0 1 1 1 1 0 0 1 1 1 1 1 1 1 -1 1 1
## [26] 3 1 1 -2 1 1 1 1 0 1 -2 1 -1 0 1 -2 5 1 0 1 1 0 -1 -1 1
## [51] 0 1 0 1 1 0 6 1 -1 1 1 1 3 0 1 1 1 1 3 0 1 1 0 -1 1
## [76] 0 0 1 1 0 1 0 0 1 2 1 1 0 1 0 1 3 1 1 1 -4 0 -1 2 4
## [101] 0 1 1 1 3 3 -4 0 1 1 1 1 1 1 1 0 1 2 1 -2 1 1 1 1 1
## [126] 2 1 1 -3 1 1 1 1 1 1 -1 3 1 1 1 0 0 0 1 0 1 1 1 1 1
## [151] 1 1 1 1 1 1 -3 3 2 1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 4
## [176] -1 1 1 1 2 1 1 1 1 1 1 0 0 -1 1 0 1 0 1 1 4 -2 1 1 1
## [201] 1 0 1 1 0 1 1 1 0 1 1 4 1 1 0 1 0 2 1 1 1 1 1 1 1
get_sentiment(texts_1, method='syuzhet')
## [1] 0.50 2.30 0.80 -0.60 0.50 1.00 0.50 2.40 0.80 0.50 0.90 0.75
## [13] 0.50 0.80 0.90 0.50 0.50 0.50 0.50 0.50 0.50 0.50 -0.25 0.50
## [25] 1.40 1.00 0.50 0.50 0.50 1.30 0.50 0.50 0.75 0.55 1.10 0.15
## [37] -0.35 1.35 0.80 0.50 -0.25 1.75 0.50 0.80 0.50 0.50 0.95 0.35
## [49] 0.50 0.50 0.90 1.10 0.80 0.50 0.90 0.55 2.65 0.50 0.80 0.50
## [61] 0.50 0.50 1.70 1.05 1.30 0.50 0.50 0.75 1.00 0.50 0.50 0.50
## [73] 0.80 0.55 0.50 0.80 0.50 0.25 0.95 0.40 0.50 0.25 0.80 1.00
## [85] 1.10 0.50 0.50 0.55 0.50 0.80 1.70 1.55 1.10 0.50 0.50 0.00
## [97] 0.40 0.15 1.00 0.40 0.80 0.50 0.50 0.50 1.25 1.00 -0.75 0.65
## [109] 0.50 0.50 1.10 0.50 -0.30 0.50 0.75 0.80 0.25 1.30 2.30 -0.25
## [121] 0.50 0.00 0.90 0.50 1.10 -0.30 0.50 0.25 0.05 0.50 1.10 0.50
## [133] 0.50 0.50 0.50 -0.25 1.50 0.25 -0.10 1.10 0.80 0.80 0.55 1.00
## [145] 0.80 0.50 0.00 0.50 0.50 0.50 0.50 0.25 0.50 0.50 0.50 0.50
## [157] 0.50 1.00 1.30 0.50 0.40 0.40 0.50 0.50 0.50 0.75 -0.50 0.50
## [169] 0.55 0.80 0.50 0.50 0.50 0.50 1.00 3.60 0.50 0.50 0.50 1.30
## [181] 0.50 0.50 0.50 0.75 1.15 0.50 -0.55 0.00 0.00 0.25 -0.10 0.50
## [193] 1.10 0.50 -0.30 1.00 -0.10 1.80 0.25 0.85 0.75 0.55 0.25 0.50
## [205] 0.80 0.50 0.50 0.50 0.80 0.50 0.00 1.00 0.75 0.50 0.80 0.50
## [217] 0.80 1.30 0.25 0.25 0.75 0.50 0.50 0.50 1.10
get_sentiment(texts_1, method='bing')
## [1] 1 1 0 -1 1 2 1 1 0 1 1 2 1 0 2 1 1 1 1 1 1 1 0 1 2
## [26] 2 1 1 1 1 1 1 1 0 1 0 1 1 0 1 0 3 1 0 1 1 0 0 1 1
## [51] 2 1 0 1 1 0 2 1 0 1 1 1 2 0 2 1 1 2 1 1 1 1 0 0 1
## [76] 0 1 1 2 0 1 1 0 2 1 1 1 0 1 0 0 3 1 1 0 -1 0 0 2 1
## [101] 0 1 1 1 2 2 0 1 1 1 2 1 1 1 2 0 1 1 1 0 1 1 1 1 1
## [126] 1 1 1 -1 1 1 1 1 1 1 0 2 1 0 1 0 0 0 2 0 1 1 1 1 1
## [151] 1 1 1 1 1 1 1 2 0 1 0 1 1 1 1 1 0 1 0 0 1 1 1 1 2
## [176] 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 2 0 0 1 1
## [201] 1 0 1 1 0 1 1 1 0 1 0 2 2 1 0 1 0 1 1 0 1 1 1 1 1
get_nrc_sentiment(texts_1)
## anger anticipation disgust fear joy sadness surprise trust negative
## 1 0 0 0 0 0 0 0 0 0
## 2 0 3 0 1 2 1 0 3 1
## 3 0 0 0 0 0 0 0 0 0
## 4 1 1 1 2 0 3 1 0 2
## 5 0 1 0 0 0 0 0 0 0
## 6 0 0 0 0 1 0 0 1 0
## 7 0 2 0 0 0 0 0 0 0
## 8 0 1 0 0 1 0 0 1 0
## 9 0 1 0 0 0 0 0 0 0
## 10 0 1 0 0 0 0 0 0 0
## 11 0 1 0 0 0 1 0 0 1
## 12 0 1 0 0 0 0 0 1 0
## 13 0 1 0 0 0 0 0 0 0
## 14 0 1 0 0 0 0 0 0 0
## 15 0 3 0 0 2 0 1 2 0
## 16 0 0 0 0 0 1 0 0 0
## 17 0 1 0 0 0 0 0 0 0
## 18 0 0 0 0 0 0 0 0 0
## 19 0 1 0 0 0 0 0 0 0
## 20 0 1 0 0 0 0 0 0 0
## 21 0 1 0 0 0 0 0 0 0
## 22 0 0 0 0 0 0 0 0 0
## 23 0 1 0 1 0 0 1 0 1
## 24 0 0 0 0 0 0 0 0 0
## 25 0 1 0 0 0 0 0 0 0
## 26 0 1 0 0 0 0 0 0 0
## 27 0 0 0 0 0 0 0 0 0
## 28 0 1 0 0 0 0 0 0 0
## 29 0 1 0 0 0 0 0 0 0
## 30 0 1 0 1 0 0 0 0 0
## 31 0 1 0 0 0 0 0 0 0
## 32 0 1 0 0 0 0 0 1 0
## 33 0 1 0 0 0 1 0 0 0
## 34 0 1 0 0 0 0 0 0 0
## 35 0 1 0 0 0 0 0 0 0
## 36 1 2 1 0 0 0 0 0 1
## 37 0 1 0 0 0 0 1 0 1
## 38 0 2 0 0 1 0 0 1 0
## 39 0 1 0 0 0 0 0 0 0
## 40 0 0 0 0 0 0 0 0 0
## 41 0 0 0 1 0 1 0 0 1
## 42 0 1 0 0 0 0 0 0 0
## 43 0 1 0 0 0 0 0 0 0
## 44 0 0 0 0 0 0 0 0 0
## 45 0 1 0 0 0 0 0 0 0
## 46 0 1 0 0 0 0 0 0 0
## 47 0 1 0 0 0 1 0 1 1
## 48 1 3 1 2 1 1 1 1 2
## 49 0 2 0 0 0 0 0 0 0
## 50 0 1 0 0 0 0 0 0 0
## 51 0 1 0 0 0 2 0 0 1
## 52 0 2 0 1 1 0 0 1 0
## 53 0 1 0 0 0 0 0 0 0
## 54 0 1 0 0 0 0 0 0 0
## 55 0 2 0 0 0 0 0 1 0
## 56 0 1 0 0 0 0 0 1 0
## 57 0 3 0 0 1 0 1 2 0
## 58 0 1 0 0 0 0 0 0 0
## 59 0 1 0 0 0 0 0 1 0
## 60 0 1 0 0 0 0 0 0 0
## 61 0 0 0 0 0 0 0 0 0
## 62 0 1 0 0 0 0 0 0 0
## 63 1 1 0 0 0 0 0 1 0
## 64 0 1 0 0 0 0 0 0 0
## 65 0 1 0 0 0 0 0 0 0
## 66 0 1 0 0 0 0 0 0 0
## 67 0 1 0 0 0 0 0 1 0
## 68 0 1 0 0 0 0 0 0 0
## 69 0 0 0 0 1 0 0 1 0
## 70 0 1 0 0 0 0 0 1 0
## 71 0 1 0 0 0 0 0 0 0
## 72 0 1 0 0 0 0 0 0 0
## 73 0 1 0 0 0 0 0 0 0
## 74 1 0 0 0 0 0 0 0 1
## 75 0 0 0 0 0 0 0 0 0
## 76 0 1 0 0 0 0 0 0 0
## 77 0 1 0 0 0 0 0 0 0
## 78 0 1 0 0 0 0 0 0 0
## 79 1 3 1 0 1 1 1 4 1
## 80 0 0 0 0 0 0 0 0 0
## 81 0 0 0 0 0 0 0 0 0
## 82 0 0 0 0 0 0 0 0 0
## 83 0 1 0 0 0 0 0 0 0
## 84 0 0 0 0 0 0 0 0 0
## 85 0 2 0 0 1 0 1 1 0
## 86 0 1 0 0 0 0 0 0 0
## 87 0 1 0 0 0 0 0 0 0
## 88 1 1 1 1 0 1 0 0 1
## 89 0 2 0 0 1 0 1 1 0
## 90 0 0 0 0 0 0 0 0 0
## 91 0 1 0 0 0 0 0 0 0
## 92 0 1 0 0 0 0 0 0 0
## 93 0 1 0 0 0 1 0 0 1
## 94 0 1 0 0 0 0 0 0 0
## 95 0 1 0 0 0 0 0 0 0
## 96 1 1 1 1 0 1 0 0 1
## 97 0 2 1 1 0 0 0 0 2
## 98 0 1 0 1 0 0 0 0 1
## 99 0 1 0 0 0 0 0 0 0
## 100 0 1 0 0 0 1 0 0 0
## 101 0 1 0 0 0 0 0 0 0
## 102 0 1 0 0 0 0 0 0 0
## 103 0 0 0 0 0 0 0 0 0
## 104 0 0 0 0 0 0 0 0 0
## 105 0 1 0 0 0 0 0 0 0
## 106 0 0 0 0 0 0 0 0 0
## 107 0 1 0 0 0 1 1 0 1
## 108 0 2 0 0 1 0 0 1 1
## 109 0 1 0 0 0 0 0 0 0
## 110 0 0 0 0 0 0 0 0 0
## 111 0 1 0 0 0 0 0 0 0
## 112 0 1 0 0 0 0 0 0 0
## 113 0 0 0 0 0 0 0 0 0
## 114 0 1 0 0 0 0 0 0 0
## 115 0 0 0 1 0 0 0 0 0
## 116 0 1 0 0 0 0 0 0 0
## 117 0 0 0 0 0 0 0 0 0
## 118 0 1 0 0 0 0 0 0 0
## 119 0 2 0 0 1 0 0 1 0
## 120 1 0 1 1 0 1 0 0 1
## 121 0 1 0 0 0 0 0 0 0
## 122 0 1 0 1 0 0 0 0 1
## 123 0 1 0 0 0 0 0 0 0
## 124 0 0 0 0 0 0 0 0 0
## 125 0 2 0 1 1 0 0 1 0
## 126 0 1 0 0 0 0 0 0 0
## 127 0 1 0 0 0 0 0 0 0
## 128 0 0 0 0 0 0 0 0 0
## 129 1 1 1 1 0 1 0 0 1
## 130 0 1 0 0 0 0 0 0 0
## 131 0 2 0 1 1 0 0 1 0
## 132 0 1 0 0 0 0 0 0 0
## 133 0 1 0 0 0 0 0 0 0
## 134 0 1 0 0 0 0 0 0 0
## 135 0 2 0 0 0 0 0 0 0
## 136 1 1 0 0 0 1 0 0 1
## 137 0 1 0 0 0 0 0 0 0
## 138 0 1 0 0 0 0 0 0 0
## 139 0 1 0 0 0 0 0 0 0
## 140 0 1 0 0 0 0 0 0 0
## 141 0 1 0 0 0 0 0 0 0
## 142 0 1 0 0 0 0 0 0 0
## 143 0 1 0 0 0 0 0 0 0
## 144 0 1 0 0 0 0 0 0 0
## 145 0 1 0 1 0 0 0 0 0
## 146 0 1 0 0 0 0 0 0 0
## 147 0 1 0 0 0 0 0 0 0
## 148 0 1 0 0 0 0 0 0 0
## 149 0 1 0 0 0 0 0 0 0
## 150 0 1 0 0 0 0 0 0 0
## 151 0 1 0 0 0 0 0 0 0
## 152 0 1 0 0 0 0 0 0 0
## 153 0 1 0 0 0 0 0 0 0
## 154 0 1 0 0 0 0 0 0 0
## 155 0 0 0 0 0 0 0 0 0
## 156 0 1 0 1 0 0 0 0 0
## 157 0 1 1 0 0 0 0 0 2
## 158 0 1 0 0 0 0 0 0 0
## 159 0 1 0 0 0 0 0 0 0
## 160 0 1 0 0 0 0 0 0 0
## 161 0 1 0 0 0 0 0 0 0
## 162 0 1 0 0 1 0 0 1 0
## 163 0 1 0 0 0 0 0 0 0
## 164 0 1 0 0 0 0 0 0 0
## 165 0 1 0 0 0 0 0 0 0
## 166 0 1 0 0 0 0 0 1 0
## 167 0 1 0 1 0 0 0 0 1
## 168 0 1 0 0 0 0 0 0 0
## 169 0 1 0 0 0 0 0 0 0
## 170 0 1 0 0 0 0 0 0 0
## 171 0 1 0 0 0 0 0 0 0
## 172 0 1 0 0 0 0 0 0 0
## 173 0 0 0 0 0 0 0 0 0
## 174 0 0 0 0 0 0 0 0 0
## 175 0 2 0 1 0 0 0 1 1
## 176 0 1 0 1 0 1 0 1 0
## 177 0 1 0 0 0 0 0 0 0
## 178 0 1 0 0 0 0 0 0 0
## 179 0 1 0 0 0 0 0 0 0
## 180 0 1 0 0 0 0 0 0 0
## 181 0 1 0 0 0 0 0 0 0
## 182 0 1 0 0 0 0 0 0 0
## 183 0 1 0 0 0 0 0 0 0
## 184 0 1 0 0 0 0 0 1 0
## 185 0 1 0 0 0 0 0 0 0
## 186 0 0 0 0 0 0 0 0 0
## 187 0 1 0 0 0 0 0 0 0
## 188 0 1 0 1 0 0 0 0 1
## 189 1 1 1 1 0 1 0 0 1
## 190 0 0 0 0 0 1 0 0 1
## 191 0 1 0 0 0 0 0 0 0
## 192 0 1 0 0 0 0 0 0 0
## 193 0 1 0 0 0 0 0 0 0
## 194 0 1 0 0 0 0 0 0 0
## 195 1 1 0 0 0 0 0 0 1
## 196 0 1 0 0 0 0 0 0 0
## 197 0 2 0 1 0 0 0 0 1
## 198 0 1 0 0 0 0 0 1 0
## 199 0 1 0 0 0 0 0 0 0
## 200 0 2 0 0 1 0 1 1 0
## 201 0 0 0 0 0 0 0 0 0
## 202 0 1 0 0 0 0 1 0 0
## 203 0 0 0 0 0 0 0 0 0
## 204 0 1 1 0 0 0 0 1 0
## 205 0 1 0 0 0 0 0 0 0
## 206 0 0 0 0 0 0 0 0 0
## 207 0 1 0 0 0 0 0 0 0
## 208 0 1 0 0 0 0 0 0 0
## 209 0 1 0 0 0 0 0 0 0
## 210 0 1 0 0 0 0 0 0 0
## 211 0 0 0 0 0 0 0 0 1
## 212 0 1 0 0 0 0 0 0 0
## 213 0 1 0 0 0 0 0 0 0
## 214 0 1 0 0 0 0 0 0 0
## 215 0 1 0 1 0 0 0 0 0
## 216 0 1 0 0 0 0 0 0 0
## 217 0 0 0 0 0 0 0 0 0
## 218 0 1 0 0 0 0 0 0 0
## 219 0 1 0 0 0 0 0 0 0
## 220 1 2 1 1 0 1 1 0 1
## 221 0 1 0 0 0 0 0 1 0
## 222 0 0 0 0 0 0 0 0 0
## 223 0 0 0 0 0 0 0 0 0
## 224 0 0 0 0 0 0 0 0 0
## 225 0 1 0 0 0 0 0 0 0
## positive
## 1 1
## 2 4
## 3 0
## 4 1
## 5 1
## 6 2
## 7 1
## 8 2
## 9 0
## 10 1
## 11 2
## 12 1
## 13 1
## 14 0
## 15 3
## 16 1
## 17 1
## 18 1
## 19 1
## 20 1
## 21 1
## 22 1
## 23 1
## 24 1
## 25 3
## 26 2
## 27 1
## 28 1
## 29 1
## 30 2
## 31 1
## 32 1
## 33 2
## 34 0
## 35 2
## 36 2
## 37 1
## 38 2
## 39 0
## 40 1
## 41 1
## 42 2
## 43 1
## 44 0
## 45 1
## 46 1
## 47 1
## 48 3
## 49 1
## 50 1
## 51 2
## 52 2
## 53 0
## 54 1
## 55 2
## 56 0
## 57 2
## 58 1
## 59 1
## 60 1
## 61 2
## 62 1
## 63 2
## 64 1
## 65 1
## 66 1
## 67 2
## 68 1
## 69 2
## 70 3
## 71 1
## 72 1
## 73 0
## 74 1
## 75 1
## 76 0
## 77 1
## 78 1
## 79 6
## 80 1
## 81 1
## 82 1
## 83 0
## 84 2
## 85 2
## 86 1
## 87 1
## 88 0
## 89 2
## 90 0
## 91 1
## 92 1
## 93 2
## 94 1
## 95 1
## 96 0
## 97 0
## 98 1
## 99 1
## 100 1
## 101 0
## 102 1
## 103 1
## 104 1
## 105 2
## 106 1
## 107 1
## 108 3
## 109 1
## 110 1
## 111 1
## 112 2
## 113 1
## 114 1
## 115 1
## 116 0
## 117 1
## 118 0
## 119 4
## 120 1
## 121 1
## 122 1
## 123 2
## 124 1
## 125 2
## 126 2
## 127 1
## 128 1
## 129 0
## 130 1
## 131 2
## 132 1
## 133 1
## 134 1
## 135 1
## 136 1
## 137 1
## 138 1
## 139 1
## 140 2
## 141 0
## 142 0
## 143 0
## 144 2
## 145 0
## 146 1
## 147 1
## 148 1
## 149 1
## 150 1
## 151 1
## 152 1
## 153 1
## 154 1
## 155 1
## 156 1
## 157 1
## 158 1
## 159 0
## 160 1
## 161 0
## 162 2
## 163 1
## 164 1
## 165 1
## 166 2
## 167 1
## 168 2
## 169 0
## 170 0
## 171 1
## 172 1
## 173 1
## 174 1
## 175 2
## 176 2
## 177 1
## 178 1
## 179 1
## 180 1
## 181 1
## 182 1
## 183 1
## 184 2
## 185 3
## 186 1
## 187 1
## 188 1
## 189 2
## 190 1
## 191 1
## 192 1
## 193 2
## 194 1
## 195 1
## 196 1
## 197 1
## 198 1
## 199 1
## 200 2
## 201 2
## 202 0
## 203 1
## 204 1
## 205 0
## 206 1
## 207 1
## 208 1
## 209 1
## 210 1
## 211 1
## 212 1
## 213 1
## 214 1
## 215 0
## 216 1
## 217 0
## 218 0
## 219 1
## 220 1
## 221 2
## 222 1
## 223 1
## 224 1
## 225 2
# Handeling negations
# by sentence
sentiment(texts_1)
## element_id sentence_id word_count sentiment
## 1: 1 1 2 0.3535534
## 2: 2 1 11 0.4974937
## 3: 2 2 34 0.2143732
## 4: 3 1 8 0.2828427
## 5: 4 1 7 -0.2267787
## ---
## 252: 221 1 7 0.3401680
## 253: 222 1 7 0.1889822
## 254: 223 1 4 0.2500000
## 255: 224 1 9 0.1666667
## 256: 225 1 12 -0.1443376
# by string (a group of sentences)
reddit_sentiment_dictionary <- sentiment_by(texts_1)
# 10 sample texts alongside their sentiment scores
sentiment_attributes(texts_1[1])$Attributes
## Attribute Count Rate
## 1: negator 0 0.0
## 2: amplifier 0 0.0
## 3: de-amplifier 0 0.0
## 4: adversative 0 0.0
## 5: negative 0 0.0
## 6: positive 1 0.5
## 7: polarized 1 0.5
sentiment_attributes(texts_1[2])$Attributes
## Attribute Count Rate
## 1: negator 0 0.0000000
## 2: amplifier 0 0.0000000
## 3: de-amplifier 0 0.0000000
## 4: adversative 0 0.0000000
## 5: negative 0 0.0000000
## 6: positive 8 0.1666667
## 7: polarized 8 0.1666667
sentiment_attributes(texts_1[3])$Attributes
## Attribute Count Rate
## 1: negator 0 0.000
## 2: amplifier 0 0.000
## 3: de-amplifier 0 0.000
## 4: adversative 0 0.000
## 5: negative 0 0.000
## 6: positive 1 0.125
## 7: polarized 1 0.125
sentiment_attributes(texts_1[4])$Attributes
## Attribute Count Rate
## 1: negator 1 0.01587302
## 2: amplifier 0 0.00000000
## 3: de-amplifier 0 0.00000000
## 4: adversative 0 0.00000000
## 5: negative 3 0.04761905
## 6: positive 2 0.03174603
## 7: polarized 5 0.07936508
sentiment_attributes(texts_1[5])$Attributes
## Attribute Count Rate
## 1: negator 0 0.00000000
## 2: amplifier 0 0.00000000
## 3: de-amplifier 0 0.00000000
## 4: adversative 0 0.00000000
## 5: negative 0 0.00000000
## 6: positive 1 0.09090909
## 7: polarized 1 0.09090909
sentiment_attributes(texts_1[6])$Attributes
## Attribute Count Rate
## 1: negator 1 0.06666667
## 2: amplifier 0 0.00000000
## 3: de-amplifier 0 0.00000000
## 4: adversative 1 0.06666667
## 5: negative 0 0.00000000
## 6: positive 1 0.06666667
## 7: polarized 1 0.06666667
sentiment_attributes(texts_1[7])$Attributes
## Attribute Count Rate
## 1: negator 0 0.00000000
## 2: amplifier 0 0.00000000
## 3: de-amplifier 0 0.00000000
## 4: adversative 1 0.07692308
## 5: negative 0 0.00000000
## 6: positive 1 0.07692308
## 7: polarized 1 0.07692308
sentiment_attributes(texts_1[8])$Attributes
## Attribute Count Rate
## 1: negator 0 0.00000000
## 2: amplifier 1 0.08333333
## 3: de-amplifier 0 0.00000000
## 4: adversative 0 0.00000000
## 5: negative 0 0.00000000
## 6: positive 2 0.16666667
## 7: polarized 2 0.16666667
sentiment_attributes(texts_1[9])$Attributes
## Attribute Count Rate
## 1: negator 0 0.0
## 2: amplifier 0 0.0
## 3: de-amplifier 0 0.0
## 4: adversative 0 0.0
## 5: negative 0 0.0
## 6: positive 1 0.2
## 7: polarized 1 0.2
sentiment_attributes(texts_1[10])$Attributes
## Attribute Count Rate
## 1: negator 0 0.00000000
## 2: amplifier 0 0.00000000
## 3: de-amplifier 0 0.00000000
## 4: adversative 0 0.00000000
## 5: negative 0 0.00000000
## 6: positive 1 0.07692308
## 7: polarized 1 0.07692308
sentiment_attributes(texts_1[1:10])$Attributes
## Attribute Count Rate
## 1: negator 2 0.010526316
## 2: amplifier 1 0.005263158
## 3: de-amplifier 0 0.000000000
## 4: adversative 2 0.010526316
## 5: negative 3 0.015789474
## 6: positive 19 0.100000000
## 7: polarized 22 0.115789474
reddit_sentiment <- read_csv(here('/Users/apple/Desktop/FALL 2023/2. IUA/Assignments/Major_4/reddit_bert2.csv')) %>%
drop_na('bert_label')
## New names:
## Rows: 225 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): date_utc, title, text, subreddit, url, bert_label dbl (4): ...1,
## timestamp, comments, bert_score
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
BERT: 1 star (negative) vs. 5 stars (positive)
bert_example <- reddit_sentiment %>%
filter(bert_label_numeric %in% c(1,5)) %>%
group_by(bert_label) %>%
arrange(desc(bert_score)) %>%
slice_head(n = 10) %>%
ungroup()
# 1 star
bert_example %>% filter(bert_label_numeric == 1) %>% pull(title_text) %>% print()
## [1] "What do we need to do to end daylight savings in the US?. Seriously most people hate it. What actually needs to happen to get rid of it? A petition or something to get the ball rolling?"
## [2] "Could the president just declare we will no longer do daylight saving time?. What would it take to make this twice a year time change crap go away?"
## [3] "What is the likelyhood of the government ever officially ending daylight savings time. I, as many people do, personally hate it. As it\031s need is pretty outdated, is it possible it would be eliminated in the next 100 years/ever?"
## [4] "What can I, a US citizen, do to help get Daylight savings time cancelled?. Not even joking. I've been changing my clock twice a year for 29 years, to no go God damn benefit. Do I need to call my senator? What do I do?\n\nEDIT: Yes, I'm an idiot. I meant making DST permanent."
## [5] "Can someone explain Daylight Savings Time to me? It makes absolutely no sense to me.. "
## [6] "Why haven't we gotten rid of Daylight Savings Time in the US? How long until we end this ridiculous practice?. "
## [7] "Why haven't we ended daylight savings?. It seems pointless and it sucks for the majority of people."
## [8] "Is it true Arizona doesn\031t observe daylights saving time?. I read online that Hawaii and Arizona don\031t observe DST. Is this true? How would this work since everyone else in the US time changes and time zones????"
## [9] "Is there no way for the public to push the government to stop Daylight Saving Time?. There was the bill passed last year, but that didn't seem to change anything for both U.S. or Canada. I could be wrong, but I have yet to meet a single person who appreciates or thinks they benefit from the time change, so why is something so hated by the majority of the public not being changed at all? Can't we all just mass petition or protest for either DST to be permanent or to abolish it?"
## [10] "Daylight Savings Time. Why are we still changing clocks? We go back to standard time tomorrow and everyone is going to be miserable this week (personally I\031m going to be SAD until March). Car accidents are going to go up, heart attacks, and just general people being moody without knowing why.\n\nI understand DST sucks for people on the back end of time zones this time of year like in Indiana, but why don\031t we just create another time zone in America and forget this clock changing nonsense?"
# 5 star
bert_example %>% filter(bert_label_numeric == 5) %>% pull(title_text) %>% print()
## [1] "I hate daylight saving times. Why can't we move the clock back 30 minutes and be done with this forever?. Wow.. thank you for the award"
## [2] "What are you doing to celebrate daylight savings time?. "
## [3] "What's your opinion on Daylight Savings Time?. It's coming up this month...and fast."
## [4] "What's daylight savings time for. "
## [5] "What's the point of daylight savings time. "
## [6] "What do you think about Daylight Savings Time?. It's coming up later this month...and fast."
## [7] "How do hospitals and airports deal with daylight savings time?. "
## [8] "What do you think about Daylight Savings Time?. "
## [9] "My friend says Benjamin Franklin invented daylight savings time, is it true?. "
Dictionary method: negative vs. positive
sentimentr_example <- reddit_sentiment %>%
mutate(sentimentr_abs = abs(sentiment_dict),
sentimentr_binary = case_when(sentiment_dict > 0 ~ 'positive',
TRUE ~ 'negative')) %>%
group_by(sentimentr_binary) %>%
arrange(desc(sentimentr_abs)) %>%
slice_head(n = 10) %>%
ungroup() %>%
arrange(sentiment_dict)
# negative
sentimentr_example %>% filter(sentimentr_binary == 'negative') %>% pull(title_text) %>% print()
## [1] "Why won't they just make daylight savings permanent?. It's annoying that theres been so many attempts at making daylight savings permanent but congress will just not approve it for some reason, it getting dark at 5 pm is depressing and doesn't benefit any body."
## [2] "Are businesses required to change their clocks for daylight savings time?. Like obviously there is the motivation of being aligned with everybody else for coordination, but are there any punitive consequences stopping everybody from just change their clocks back failed state style?"
## [3] "Why haven't we ended daylight savings?. It seems pointless and it sucks for the majority of people."
## [4] "Is the US making daylight savings permanent? I heard we did but googling isn't helpful.. "
## [5] "What do we need to do to end daylight savings in the US?. Seriously most people hate it. What actually needs to happen to get rid of it? A petition or something to get the ball rolling?"
## [6] "what would the consequences be if Daylight Savings never existed. Title"
## [7] "What is the likelyhood of the government ever officially ending daylight savings time. I, as many people do, personally hate it. As it\031s need is pretty outdated, is it possible it would be eliminated in the next 100 years/ever?"
## [8] "Could the president just declare we will no longer do daylight saving time?. What would it take to make this twice a year time change crap go away?"
## [9] "If they end up getting rid of Daylight Savings Time, does that mean it won\031t get dark so early?. Would it still get dark as early around this time of year if they didn\031t change the clocks? Sorry, the whole DST concept confuses me and I\031m starting to go insane at how depressing this time of the year is."
## [10] "Why isn't Daylight Saving Time a gradual change? Small increments forwards and backwards throughout the year?. For example, couldn't you jump forward/backwards 10 minutes on the 1st of every month and get the same benefits of DST without the drawback of the big sudden change?"
# positive
sentimentr_example %>% filter(sentimentr_binary == 'positive') %>% pull(title_text) %>% print()
## [1] "Does anyone actually like daylight savings?. "
## [2] "If Prince George has twins, and one is born at 0126 and then because of daylight savings time his next child is born at 0107; who\031s the the heir to the throne?. "
## [3] "about twins Birth after Daylight savings time ends?. if someone gives Birth one is Born at 1:59 AM right before daylight savings time ends the other is Born shortly after 2:00 AM when daylight savings time has ended which one is considered the older twin?"
## [4] "Why do we still have daylight saving time? If you're in favor of it, convince me why it's good. "
## [5] "What is the purpose of Daylight Saving Time?. Not only is it difficult to get a straight answer to what the *historical* purpose was, but regardless, now in 2022 - what benefit does it have?"
## [6] "In daylight savings time, which time of the year is more correct?. "
## [7] "Why do we change the clocks twice a year for daylight savings time and have such short days in the winter when we can just have it set an hour ahead of what it would normally be all year?. Like I understand the purpose of daylight savings is to have more daylight to enjoy in summer, but that makes winter days so short, which is especially noticeable in colder climates further from the equator, so why don't they just have it an hour ahead all year so we can also have more daylight in winter?"
## [8] "Is there still a reason to have Daylight Savings Time?. I remember being told it was created to save energy during WW1, but is there a modern reason to keep it besides tradition?"
## [9] "My friend says Benjamin Franklin invented daylight savings time, is it true?. "
## [10] "As a concerned US citizen, what practical steps can I take to contribute to the movement to end Daylight Saving Time and promote a consistent time system?. "
The analysis of number of threads per sentiment category suggests that majority of the threads shows that most people have negative sentiments regarding daylight savings time. Threads that revolved around discussing why DST is necessary and whether it should be discontinued had a more negative score as compared to threads that asked questions regarding the concept of DST.
reddit_sentiment %>%
ggplot(aes(x = bert_label)) +
geom_bar(fill = "white") +
dark_theme_gray()
Since there were more threads with negative sentiments, I looked at the words that were most commonly appearing negative words in negative threads. The word cloud shows that “change”, “switch”, “darkness”, “rid” were some of the most commonly appearing negative words. These findings conform with hypothesis that the study began with, that DST is associated with disruption in daily activities and there has been a debate over ending this practice. Through the analysis of Reddit data I also wanted to find out more about what factors drives people to have negative sentiments around DST. The word cloud reveals that “change”, “darkness” and “sleep” are some of the factors that drive negative sentiments towards DST.
data("stop_words")
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&|<|>"
reddit_sentiment_clean <- reddit_sentiment %>%
mutate(title_text = str_replace_all(title_text, replace_reg, "")) %>%
unnest_tokens(word, title_text, token = "words") %>%
anti_join(stop_words, by = "word") %>%
filter(str_detect(word, "[a-z]")) %>%
filter(!word %in% c('daylight','savings','time'))
reddit_sentiment_clean_negative <- reddit_sentiment_clean %>%
filter(bert_label_numeric %in% c(1,2))
reddit_sentiment_clean_positive <- reddit_sentiment_clean %>%
filter(bert_label_numeric %in% c(4,5))
reddit_sentiment_clean_negative_unique <- reddit_sentiment_clean_negative %>%
anti_join(reddit_sentiment_clean_positive, by = 'word')
reddit_sentiment_clean_positive_unique <- reddit_sentiment_clean_positive %>%
anti_join(reddit_sentiment_clean_negative, by = 'word')
n <- 20
h <- runif(n, 0, 1) # any color
s <- runif(n, 0.6, 1) # vivid
v <- runif(n, 0.3, 0.7) # neither too dark or bright
df_hsv <- data.frame(h = h, s = s, v = v)
pal <- apply(df_hsv, 1, function(x) hsv(x['h'], x['s'], x['v']))
pal <- c(pal, rep("grey", 10000))
reddit_sentiment_clean_negative_unique %>%
count(word, sort = TRUE) %>%
wordcloud2(color = pal,
minRotation = 0,
maxRotation = 0,
ellipticity = 0.8)