# Package names
packages <- c("RedditExtractoR", "anytime", "magrittr", "httr", "tidytext", "tidyverse", "igraph", "ggraph", "wordcloud2", "textdata", "here", "syuzhet", "sentimentr", "ggplot2", "ggthemes")
# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# Load packages
invisible(lapply(packages, library, character.only = TRUE))
I want to observe how the sentiment towards crypto changed after the crypto plummeted.
# finding keywords
threads_1 <- find_thread_urls(keywords = 'crypto',
sort_by = 'relevance',
period = 'month') %>%
drop_na()
rownames(threads_1) <- NULL
# Sanitize text
threads_1 %<>%
mutate(across(
where(is.character),
~ .x %>%
str_replace_all("\\|", "/") %>% # replace vertical bars
str_replace_all("\\n", " ") %>% # replace newlines
str_squish() # clean up extra spaces
))
colnames(threads_1)
head(threads_1, 3) %>% knitr::kable()
write.csv(threads_1, "reddit.csv", row.names = FALSE)
# loading data
threads_1 <- read_csv("reddit.csv")
## Rows: 247 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): title, text, subreddit, url
## dbl (2): timestamp, comments
## date (1): date_utc
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Word tokenization
words <- threads_1 %>%
unnest_tokens(output = word, input = text, token = 'words')
words %>%
count(word, sort = TRUE) %>%
top_n(20) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(x = "words",
y = "counts",
title = "Unique wordcounts")
## Selecting by n
# load list of stop words - from the tidytext package
data("stop_words")
# view random 50 words
print(stop_words$word[sample(1:nrow(stop_words), 100)])
## [1] "whereby" "wouldn't" "hers" "themselves" "hello"
## [6] "between" "it" "here's" "anything" "everything"
## [11] "d" "que" "goes" "orders" "t"
## [16] "having" "second" "wanted" "old" "sure"
## [21] "anyone" "third" "however" "only" "anybody"
## [26] "turn" "they" "why" "we've" "haven't"
## [31] "much" "so" "first" "mean" "selves"
## [36] "wherein" "men" "nd" "myself" "these"
## [41] "her" "differ" "thanks" "new" "face"
## [46] "yourselves" "l" "th" "while" "went"
## [51] "latterly" "where's" "know" "entirely" "he"
## [56] "anyhow" "opens" "differently" "better" "thru"
## [61] "shouldn't" "fact" "anyway" "it'll" "greatest"
## [66] "into" "those" "appear" "consider" "early"
## [71] "etc" "then" "ought" "must" "asks"
## [76] "works" "which" "we" "had" "whoever"
## [81] "sees" "right" "been" "looking" "me"
## [86] "not" "instead" "man" "likely" "or"
## [91] "lest" "by" "s" "almost" "turned"
## [96] "a" "really" "thing" "nowhere" "welcome"
# Regex that matches URL-type string
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&|<|>"
words_clean <- threads_1 %>%
# drop URLs
mutate(text = str_replace_all(text, replace_reg, "")) %>%
# Tokenization (word tokens)
unnest_tokens(word, text, token = 'words') %>%
# drop stop words
anti_join(stop_words, by = "word") %>%
# drop non-alphabet-only strings
filter(str_detect(word, "[a-z]"))
# Check the number of rows after removal of the stop words. There should be fewer words now
print(
glue::glue("Before: {nrow(words)}, After: {nrow(words_clean)}")
)
## Before: 27788, After: 11306
words_clean %>%
count(word, sort = TRUE) %>%
top_n(20, n) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(x = "words",
y = "counts",
title = "Unique wordcounts")
words_clean <- words_clean[words_clean$word != 'crypto',]
words_clean %>%
count(word, sort = TRUE) %>%
wordcloud2()
# Get ngrams. You may try playing around with the value of n, n=3, n=4
words_ngram <- threads_1 %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
select(text) %>%
unnest_tokens(output = paired_words,
input = text,
token = "ngrams",
n = 3)
# Show ngrams with sorted values
words_ngram %>%
count(paired_words, sort = TRUE) %>%
head(20) %>%
knitr::kable()
| paired_words | n |
|---|---|
| 0 0 0 | 949 |
| NA | 132 |
| discussion threads u | 93 |
| posts the monthly | 93 |
| daily crypto discussion | 62 |
| do not share | 62 |
| the daily crypto | 62 |
| 00 0 0 | 54 |
| 00 00 0 | 54 |
| 0 0 1 | 43 |
| get rich quick | 33 |
| afford to lose | 32 |
| can afford to | 32 |
| is no longer | 32 |
| to lose and | 32 |
| you can afford | 32 |
| 22 restrict_sr onsort | 31 |
| 22daily crypto discussion | 31 |
| 2fa if possible | 31 |
| 3a 22daily crypto | 31 |
#separate the paired words into two columns
words_ngram_pair <- words_ngram %>%
separate(paired_words, c("word1", "word2", "word3"), sep = " ")
# filter rows where there are stop words under word 1 column and word 2 column
words_ngram_pair_filtered <- words_ngram_pair %>%
# drop stop words
filter(!word1 %in% stop_words$word & !word2 %in% stop_words$word & !word3 %in% stop_words$word) %>%
# drop non-alphabet-only strings
filter(str_detect(word1, "[a-z]") & str_detect(word2, "[a-z]") & str_detect(word3, "[a-z]"))
# Filter out words that are not encoded in ASCII
# To see what's ASCII, google 'ASCII table'
library(stringi)
words_ngram_pair_filtered %<>%
filter(stri_enc_isascii(word1) & stri_enc_isascii(word2) & stri_enc_isascii(word3))
# Sort the new bi-gram (n=3) counts:
words_counts <- words_ngram_pair_filtered %>%
count(word1, word2, word3) %>%
arrange(desc(n))
head(words_counts, 20) %>%
knitr::kable()
| word1 | word2 | word3 | n |
|---|---|---|---|
| daily | crypto | discussion | 62 |
| 22daily | crypto | discussion | 31 |
| 3a | 22daily | crypto | 31 |
| _rcryptocurrency_moons | moons | wiki | 31 |
| beginner | resources | _resources | 31 |
| cc | discussion | thread | 31 |
| common | scams | rules | 31 |
| crypto | discussion | thread | 31 |
| crypto | discussion | threads | 31 |
| cryptocurrency | moons | _rcryptocurrency_moons | 31 |
| cryptocurrencymemes | prior | daily | 31 |
| discussion | threads | follow | 31 |
| effect | discussion | topics | 31 |
| exercise | utmost | caution | 31 |
| homophobic | language | comments | 31 |
| links | beginner | resources | 31 |
| monthly | news | summary | 31 |
| monthly | optimists | discussion | 31 |
| monthly | skeptics | discussion | 31 |
| moons | _rcryptocurrency_moons | moons | 31 |
# plot word network
words_counts %>%
filter(n >= 3) %>%
graph_from_data_frame() %>% # convert to graph
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = .6, edge_width = n)) +
geom_node_point(color = "darkslategray4", size = 3) +
geom_node_text(aes(label = name), vjust = 1.8) +
labs(title = "Word Networks",
x = "", y = "")
## Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
## ℹ Please use the `transform` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
I observed many interesting words here. Overall, the trigrams show predominantly negative patterns over the past month, such as “common scam rules” or “exercise utmost caution.” At the same time, many people are still hoping for cryptocurrency to “moon,” a term used enthusiastically by traders and investors who anticipate substantial gains. This suggests that investors remain eager for a price surge, even in the context of the recent cryptocurrency downturn.
I suspect that the sentiment analysis library is not fully capturing nuanced sentiment here. For example, thread #7 feels more negative to me: it uses many negative words and expresses anger toward the public. In contrast, thread #8 is simply asking how to access the crypto account of the author’s daughter, who passed away. However, thread #9 has a sentiment score that is almost twice as negative as thread #8, which I find surprising.
# Negation handling model
set.seed(123)
threads_clean <- threads_1 %>%
filter(nchar(trimws(text)) > 0)
n <- 10
rows <- sample(nrow(threads_clean), n)
sentiment_sample <- threads_clean[rows, ]
print(sentiment_sample$text)
## [1] "**Welcome to the Daily Crypto Discussion thread. Please read the disclaimer and rules before participating.** # Disclaimer: Consider all information posted here with several liberal heaps of salt, and always cross check any information you may read on this thread with known sources. Any trade information posted in this open thread may be highly misleading, and could be an attempt to manipulate new readers by known \"pump and dump (PnD) groups\" for their own profit. BEWARE of such practices and exercise utmost caution before acting on any trade tip mentioned here. **Please be careful about what information you share and the actions you take.** Do not share the amounts of your portfolios (why not just share percentage?). Do not share your private keys or wallet seed. Use strong, non-SMS 2FA if possible. Beware of scammers and be smart. Do not invest more than you can afford to lose, and do not fall for pyramid schemes, promises of unrealistic returns (get-rich-quick schemes), and other common scams. # Rules: * All [sub rules](https://www.reddit.com/r/CryptoCurrency/about/rules/) apply in this thread. The prior exemption for karma and age requirements is no longer in effect. * Discussion topics must be related to cryptocurrency. * Behave with civility and politeness. Do not use offensive, racist or homophobic language. * Comments will be sorted by newest first. # Useful Links: * [**Beginner Resources**](https://www.reddit.com/r/CryptoCurrency/wiki/beginner_resources) * [**Intro to** **r/Cryptocurrency** **MOONs <\024**](https://www.reddit.com/r/CryptoCurrency/comments/gj96lb/introducing_rcryptocurrency_moons/) * [**MOONs Wiki Page**](https://www.reddit.com/r/CryptoCurrency/wiki/moons_wiki/) * [**r/CryptoCurrency** **Discord**](https://discord.gg/ZuU9Gqeqmy) * [**r/CryptoCurrencyMemes**](https://www.reddit.com/r/cryptocurrencymemes) * [**Prior Daily Discussions**](https://www.reddit.com/r/CryptoCurrency/search?q=title%3A%22Daily+Crypto+Discussion+-+%22+&restrict_sr=on&sort=new&t=all) \\- (Link fixed.) * [**r/CryptoCurrencyMeta**](https://www.reddit.com/r/CryptoCurrencyMeta/) \\- Join in on all meta discussions regarding r/CryptoCurrency whether it be moon distributions or governance. # Finding Other Discussion Threads Follow a mod account below to be notified in your home feed when the latest r/CC discussion thread of your interest is posted. * u/CryptoDaily- \024 Posts the Daily Crypto Discussion threads. * u/CryptoSkeptics \024 Posts the Monthly Skeptics Discussion threads. * u/CryptoOptimists- \024 Posts the Monthly Optimists Discussion threads. * u/CryptoNewsUpdates \024 Posts the Monthly News Summary threads."
## [2] "AD arrives for his first game back in LA"
## [3] "I was in an airport restaurant waiting to board my flight. I was seated next to two women and a man and I noticed that the man was talking incessantly. Just on and on and on without a break. The two women were sitting there politely listening and the man finally gets up to go use the bathroom. After he left the two women looked at each other and started laughing. One woman says \034Looking forward to two weeks of this?\035. The other one says \034Hey, you invited him\035."
## [4] "Hey all I wanted to ask your thoughts I usually invest with conviction focus on projects I believe in and eat the dips calmly Lately though it feels like sentiment is more cautious. Are you feeling nervous too or staying chill waiting for the next rally I am curious to hear both perspectives and what you are seeing in the market right now?"
## [5] "Thailand is throwing down the gauntlet with a bold declaration: as of January 2025, there will be a 0% capital gains tax on cryptocurrency profits. This visionary policy positions Thailand as a serious contender in the global race for crypto dominance, inviting both local enthusiasts and international investors to explore the burgeoning digital asset landscape without the burden of steep taxes. For crypto investors looking for fertile ground, Thailand\031s initiative is a beacon, signaling a new dawn for digital investment in Southeast Asia. Other Asian based CEXs like Bitget are also making strides providing tokenized US stocks and RWA onchain for users to trade... they added other initiatives like Bitget lucky scratch for users to win 777 USDT and BGB which has also boosted trading activities Overall, Thailand's innovative tax exemption is more than just a local initiative; it has the potential to resonate on the global stage. Other countries observing Thailand may realize the importance of crafting flexible regulations that can adapt to rapid changes in the crypto landscape. As nations evaluate their own approaches to cryptocurrency, Thailand's proactive stance might act as a catalyst for a sweeping rethinking of tax structures and regulatory frameworks worldwide, as jurisdictions strive to attract investment in this volatile but promising market."
## [6] "**Welcome to the Daily Crypto Discussion thread. Please read the disclaimer and rules before participating.** # Disclaimer: Consider all information posted here with several liberal heaps of salt, and always cross check any information you may read on this thread with known sources. Any trade information posted in this open thread may be highly misleading, and could be an attempt to manipulate new readers by known \"pump and dump (PnD) groups\" for their own profit. BEWARE of such practices and exercise utmost caution before acting on any trade tip mentioned here. **Please be careful about what information you share and the actions you take.** Do not share the amounts of your portfolios (why not just share percentage?). Do not share your private keys or wallet seed. Use strong, non-SMS 2FA if possible. Beware of scammers and be smart. Do not invest more than you can afford to lose, and do not fall for pyramid schemes, promises of unrealistic returns (get-rich-quick schemes), and other common scams. # Rules: * All [sub rules](https://www.reddit.com/r/CryptoCurrency/about/rules/) apply in this thread. The prior exemption for karma and age requirements is no longer in effect. * Discussion topics must be related to cryptocurrency. * Behave with civility and politeness. Do not use offensive, racist or homophobic language. * Comments will be sorted by newest first. # Useful Links: * [**Beginner Resources**](https://www.reddit.com/r/CryptoCurrency/wiki/beginner_resources) * [**Intro to** **r/Cryptocurrency** **MOONs <\024**](https://www.reddit.com/r/CryptoCurrency/comments/gj96lb/introducing_rcryptocurrency_moons/) * [**MOONs Wiki Page**](https://www.reddit.com/r/CryptoCurrency/wiki/moons_wiki/) * [**r/CryptoCurrency** **Discord**](https://discord.gg/ZuU9Gqeqmy) * [**r/CryptoCurrencyMemes**](https://www.reddit.com/r/cryptocurrencymemes) * [**Prior Daily Discussions**](https://www.reddit.com/r/CryptoCurrency/search?q=title%3A%22Daily+Crypto+Discussion+-+%22+&restrict_sr=on&sort=new&t=all) \\- (Link fixed.) * [**r/CryptoCurrencyMeta**](https://www.reddit.com/r/CryptoCurrencyMeta/) \\- Join in on all meta discussions regarding r/CryptoCurrency whether it be moon distributions or governance. # Finding Other Discussion Threads Follow a mod account below to be notified in your home feed when the latest r/CC discussion thread of your interest is posted. * u/CryptoDaily- \024 Posts the Daily Crypto Discussion threads. * u/CryptoSkeptics \024 Posts the Monthly Skeptics Discussion threads. * u/CryptoOptimists- \024 Posts the Monthly Optimists Discussion threads. * u/CryptoNewsUpdates \024 Posts the Monthly News Summary threads."
## [7] "No major news and the traders still getting fucked. I dont know this is probably like the third massive liquidation this week. Lev traders with a combined IQ of a bean& Stay safe Bozos"
## [8] "Hello, My daughter unfortunately passed away pretty much exactly a month ago (she wrote down all the codes and everything on paper, so I have access, but no instructions) and left me money in BTC (bitcoin) and XMR (monero). It\030s all in \036Cakewallet\034 app on an iPad (I have access to the wallets in Cakewallet) and all in all around 10.000¬. I have zero idea how any of this works, as I have never in my life explored or dealt with cryptocurrency. How do I get the around 10.000¬ in BTC and XMR from Cakewallet into my bank account? Someone told me that asking on Reddit would be a good idea, so I\030m asking you. I\030m not exactly bad with computers as my whole career I worked (but as a lawyer, so I am no IT specialist either) in a big IT company, but have absolutely no idea about cryptocurrency at all, I\030m completely new to it and 60 years old, so please explain it slowly and in a way that someone like me understands. It\030s a lot dealing with my daughter\031s death and now I have to learn this too, I would be super grateful for and could really use some help! Thanks a lot to anyone who would be willing to help explain this to me, I really appreciate it! By the way, if it is way too complicated, I think I will just not retrieve the money. But I will see how much I can understand, if someone would be willing to explain step by step what I have to do! Kind regards"
## [9] "Source: https://open.substack.com/pub/marcstein/p/nico-harrison-is-exiting-the-mavericks?r=nuq3a&utm_medium=ios > The Mavericks have now officially announced \"that the organization has relieved Nico Harrison of his role as general manager and president of basketball operations effective immediately.\" The evil is dead."
## [10] "super talented dev from chennai, Thiru, posted selfie and his comments got flooded with racist garbage. 15 million views in 5 days, and half the replies were just racist af! yeah in trading there\031s this preconceived notion that indians flip early and are paper hands. fine, some of it is true but this guy had nothing to do with any of that. mfrs are in west are just bunch of salty losers man. racist fkrs. hate mongering is easy content for them. that\031s what pissed me off the most. it\031s disgusting and honestly disturbing to see how quickly people jump to racism for engagement he still got massive love. the real ones showed up. the web3 crowd backed him. and now he\031s famous in web3...."
sentiment <- sentiment_by(sentiment_sample$text)
print(sentiment)
## Key: <element_id>
## element_id word_count sd ave_sentiment
## <int> <int> <num> <num>
## 1: 1 393 0.18966572 0.08055104
## 2: 2 9 NA 0.00000000
## 3: 3 89 0.19591255 0.08877991
## 4: 4 63 0.07025012 0.15921885
## 5: 5 207 0.17668900 0.37665106
## 6: 6 393 0.18966572 0.08055104
## 7: 7 34 0.30944160 -0.10738957
## 8: 8 269 0.21164905 0.12568094
## 9: 9 52 0.88407982 -0.24986116
## 10: 10 127 0.27289894 -0.12583782
sentiment_full <- threads_clean %>%
mutate(text_split = get_sentences(text)) %>%
{ sentimentr::sentiment_by(.$text_split, .$subreddit) }
# Visualization 1
sentiment_full %>%
filter(!is.na(ave_sentiment)) %>%
ggplot(aes(x = ave_sentiment)) +
geom_histogram(
fill = "skyblue",
color = "black",
bins = 10
) +
labs(
x = "Average sentiment",
y = "Count",
title = "Distribution of average sentiment"
) +
theme_minimal()
The first visualization shows the distribution of average sentiment for each text related to crypto. We can see that both the mode and the mean are slightly positive, with a long tail to the right. This suggests that, overall, sentiment toward crypto has been slightly positive over the past month, which is compared to my hypothesis.
plot(sentiment_full)
The second visualization shows sentiment by subreddit. We can see a clear correlation between the subreddit title and the sentiment scores of the posts. For example, goodnews literally shows positive sentiment, and CryptoCurrency also shows very positive values. In contrast, we see more negative sentiment in subreddits such as scams, and buttcoin.
ggplot(sentiment_full, aes(x = word_count, y = ave_sentiment)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
x = "Word count per post",
y = "Average sentiment",
title = "Relationship between text length and average sentiment"
)
## `geom_smooth()` using formula = 'y ~ x'
Here, we can see there is no relationship between word count and average sentiment.