Objective

I aim to analyse the attitudes and sentiments of environmentalists on social media platforms such as Reddit towards renewable energy to see whether they are radical or moderate.

# Package names
packages <- c("RedditExtractoR", "anytime", "magrittr", "httr", "tidytext", "tidyverse", "igraph", "ggraph", "wordcloud2", "textdata", "sf", "tmap", "here", "sentimentr")

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}

# Load packages
invisible(lapply(packages, library, character.only = TRUE))

Search Reddit threads using a keyword “renewable energy”

#keyword
#threads_1 <- find_thread_urls(keywords = "renewable energy",
                              #sort_by = 'relevance', 
                              #period = 'all') %>% 
  #drop_na()
#colnames(threads_1)
#head(threads_1)

#save(threads_1, file = 'energy.RData')
load('energy.RData')

#library(readr)
#save_path <- "E:/GaTech/CP 8883/container/major3/reddit_data.csv"
#write_csv(threads_1, 'reddit_data.csv')
# create new column: date
threads_1 %<>% 
  mutate(date = as.POSIXct(date_utc)) %>%
  filter(!is.na(date))

# number of threads by year
threads_1 %>%
  ggplot(aes(x = date)) +
  geom_histogram(color="black", position = 'stack', binwidth = 604800) +
  scale_x_datetime(date_labels = "%Y",
                   breaks = seq(min(threads_1$date, na.rm = TRUE),
                                max(threads_1$date, na.rm = TRUE),
                                by = "1 year")) +
  theme_minimal()    

Clean and tokenize

# Tokenization (word tokens)
words <- threads_1 %>% 
  unnest_tokens(output = word, input = text, token = "words")

words %>%
  count(word, sort = TRUE) %>%
  top_n(20) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  labs(x = "words",
       y = "counts",
       title = "Unique wordcounts")
## Selecting by n

# load list of stop words - from the tidytext package
data("stop_words")
# view random 50 words
print(stop_words$word[sample(1:nrow(stop_words), 100)])
##   [1] "whereas"      "w"            "ordering"     "long"         "known"       
##   [6] "show"         "look"         "could"        "here"         "young"       
##  [11] "always"       "wells"        "un"           "man"          "cause"       
##  [16] "does"         "thanks"       "went"         "please"       "fifth"       
##  [21] "need"         "when"         "do"           "apart"        "some"        
##  [26] "all"          "except"       "some"         "ain't"        "him"         
##  [31] "itself"       "many"         "this"         "generally"    "let"         
##  [36] "ours"         "shouldn't"    "in"           "i'll"         "but"         
##  [41] "does"         "unlikely"     "to"           "hi"           "sees"        
##  [46] "obviously"    "it'd"         "n"            "among"        "keeps"       
##  [51] "if"           "gives"        "under"        "since"        "none"        
##  [56] "mr"           "during"       "perhaps"      "ours"         "meanwhile"   
##  [61] "able"         "should"       "point"        "whenever"     "along"       
##  [66] "possible"     "it'll"        "couldn't"     "greetings"    "after"       
##  [71] "here's"       "she's"        "cannot"       "too"          "j"           
##  [76] "rooms"        "hasn't"       "few"          "well"         "what's"      
##  [81] "have"         "of"           "had"          "any"          "under"       
##  [86] "make"         "using"        "know"         "may"          "haven't"     
##  [91] "we've"        "appropriate"  "more"         "respectively" "years"       
##  [96] "that's"       "contains"     "himself"      "we"           "downwards"
# Regex that matches URL-type string
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&amp;|&lt;|&gt;"

words_clean <- threads_1 %>% 
  # drop URLs
  mutate(text = str_replace_all(text, replace_reg, "")) %>%
  # Tokenization (word tokens)
  unnest_tokens(word, text, token = "words") %>% 
  # drop stop words
  anti_join(stop_words, by = "word") %>% 
  # drop non-alphabet-only strings
  filter(str_detect(word, "[a-z]"))

# Check the number of rows after removal of the stop words. There should be fewer words now
print(
  glue::glue("Before: {nrow(words)}, After: {nrow(words_clean)}")
)
## Before: 1722, After: 698
words_clean %>%
  count(word, sort = TRUE) %>%
  top_n(20, n) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  labs(x = "words",
       y = "counts",
       title = "Unique wordcounts")

Word cloud

replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&amp;|&lt;|&gt;"

wordscloud_clean <- threads_1 %>% 
  mutate(text = str_replace_all(text, replace_reg, "")) %>%
  unnest_tokens(word, text, token = "words") %>% 
  anti_join(stop_words, by = "word") %>% 
  filter(str_detect(word, "[a-z]")) %>% 
  filter(!word %in% c('renewable',' energy'))
n <- 20
h <- runif(n, 0, 1) # any color
s <- runif(n, 0.6, 1) # vivid
v <- runif(n, 0.3, 0.7) # neither too dark or bright

df_hsv <- data.frame(h = h, s = s, v = v)
pal <- apply(df_hsv, 1, function(x) hsv(x['h'], x['s'], x['v']))
pal <- c(pal, rep("grey", 10000))

wordscloud_clean %>% 
  count(word, sort = TRUE) %>% 
  wordcloud2(color = pal, 
              minRotation = 0, 
              maxRotation = 0, 
              ellipticity = 0.8)

Tri-gram analysis

# Get ngrams.
words_ngram <- threads_1 %>%
  mutate(text = str_replace_all(text, replace_reg, "")) %>%
  select(text) %>%
  unnest_tokens(output = paired_words,
                input = text,
                token = "ngrams",
                n = 3)

# Show ngrams with sorted values
words_ngram %>%
  count(paired_words, sort = TRUE) %>% 
  head(20) %>% 
  knitr::kable()
paired_words n
NA 215
in renewable energy 4
100 renewable energy 3
a lot of 3
higher ranked firms 3
renewable energy and 3
second paternity leave 3
to make a 3
what is the 3
a boutique transactional 2
at higher ranked 2
at the national 2
boutique transactional firm 2
but much more 2
doing renewable energy 2
for similar roles 2
got the idea 2
has dropped over 2
how much renewable 2
however i m 2
#separate the paired words into three columns
words_ngram_pair <- words_ngram %>%
  separate(paired_words, c("word1", "word2", "word3"), sep = " ")

# filter rows where there are stop words under word 1 column and word 2 column
words_ngram_pair_filtered <- words_ngram_pair %>%
  # drop stop words
  filter(!word1 %in% stop_words$word & !word2 %in% stop_words$word & !word3 %in% stop_words$word) %>% 
  # drop non-alphabet-only strings
  filter(str_detect(word1, "[a-z]") & str_detect(word2, "[a-z]") & str_detect(word3, "[a-z]"))

# Filter out words that are not encoded in ASCII
# To see what's ASCCII, google 'ASCII table'
library(stringi)
words_ngram_pair_filtered %<>% 
  filter(stri_enc_isascii(word1) & stri_enc_isascii(word2) & stri_enc_isascii(word3))

words_counts <- words_ngram_pair_filtered %>%
  count(word1, word2, word3) %>%
  arrange(desc(n))

head(words_counts, 20) %>% 
  knitr::kable()
word1 word2 word3 n
boutique transactional firm 2
renewable energy finance 2
architecture poly amidoxime 1
bigger firm boutique 1
career opportunities x200b 1
clean energy dense 1
climate change didn 1
climate targets b2529461 1
cooling lateral market 1
cost effective configuration 1
dense power source 1
desalination plants x200b 1
design sustainable energy 1
earliest reddit post 1
economical uranium extraction 1
employed substation design 1
energy capacity x200b 1
energy costs scotland 1
energy dense power 1
energy finance ma 1
words_counts %>%
  filter(n >= 1) %>%
  graph_from_data_frame() %>% # convert to graph
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = .6, edge_width = n)) +
  geom_node_point(color = "darkslategray4", linewidth = 3) +
  geom_node_text(aes(label = name), vjust = 1.8) +
  labs(title = "Renewable Energy Word Networks",
       x = "", y = "")
## Warning in geom_node_point(color = "darkslategray4", linewidth = 3): Ignoring
## unknown parameters: `linewidth`
## Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
## ℹ Please use the `transform` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Discussion noteworthy tri-grams

There is one main cluster of discussion centered around “renewable energy” and related terms such as “green energy,” “solar power,” and “wind generation.”

It’s not surprising to see “energy” as the cluster center, as it is the core concept connecting various discussions on sustainability. The surrounding words like “clean,” “generation,” and “capacity” emphasize the focus on renewable energy’s practical implementation and its environmental benefits.

A noteworthy branch in the network connects “solar,” “climate,” and “change.” This tri-gram highlights the frequent association between solar energy as a renewable source and its role in addressing climate change. Another significant link is “renewable,” “finance,” and “sustainable,” reflecting discussions about the economic aspects of renewable energy projects, including funding and long-term viability.

Smaller nodes like “sea water,” “desalination,” and “plants” suggest niche applications of renewable energy technologies, such as powering desalination plants. Similarly, connections like “employment,” “pursuing,” and “opportunities” indicate discussions about job creation in the renewable energy sector.

Sentiment analysis with dictionary method

reddit_sentiment <- read_csv('reddit_data_bert.csv') %>% 
  drop_na('bert_label')
## New names:
## Rows: 228 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): title, text, subreddit, url, bert_label dbl (4): ...1, timestamp,
## comments, bert_score date (1): date_utc
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Get sentiment scores using the dictionary method for comparison.
# Join thread title and text.
reddit_sentiment %<>% 
  mutate(title = replace_na(title, ""),
         text = replace_na(text, ""),
         title_text = str_c(title, text, sep = ". "))

# dictionary method
reddit_sentiment_dictionary <- sentiment_by(reddit_sentiment$title_text)

reddit_sentiment$sentiment_dict <- reddit_sentiment_dictionary %>% pull(ave_sentiment)
reddit_sentiment$word_count <- reddit_sentiment_dictionary %>% pull(word_count)
# Check the correlation between the sentiment values from two different methods.
reddit_sentiment %<>% mutate(bert_label_numeric = str_sub(bert_label, 1, 1) %>% as.numeric())

cor(reddit_sentiment$bert_label_numeric, reddit_sentiment$sentiment_dict)
## [1] 0.2581777

0.26 implies a mild positive correlation.

ggplot(data = reddit_sentiment, aes(x = bert_label_numeric, y = sentiment_dict)) +
  geom_jitter(width = 0.1, height = 0) +
  geom_line(aes(y = 0), color = '#FFD700', lwd = 1, linetype='dashed')

Sample texts alongside their sentiment scores display and credibility evaluation

sentimentr_example <- reddit_sentiment %>%
  mutate(sentimentr_abs = abs(sentiment_dict),
         sentimentr_binary = case_when(sentiment_dict > 0 ~ 'positive',
                                       TRUE ~ 'negative')) %>% 
  group_by(sentimentr_binary) %>%
  arrange(desc(sentimentr_abs)) %>%
  slice_head(n = 10) %>%
  ungroup() %>% 
  arrange(sentiment_dict)

# negative
sentimentr_example %>% filter(sentimentr_binary == 'negative') %>% pull(title_text, sentiment_dict) %>% print()
##                                                                                                                                                                                               -0.645234937832725 
## "Australian Treasurer Joe Hockey has attacked wind farms as \034utterly offensive and \034a blight on the landscape\035 in the latest sign that the Abbott government intends to cut back on renewable energy. " 
##                                                                                                                                                                                               -0.603022689155527 
##                                                                                                                                      "I wish the entire world would switch to 100% renewable energy overnight. " 
##                                                                                                                                                                                               -0.596302710942121 
##                                                                                                         "Ohio politicians found guilty in $60 million utilities bribery scandal to undermine renewable energy. " 
##                                                                                                                                                                                               -0.542720420239975 
##                                                                                                                                         "Negative Power Prices Hit Europe as Renewable Energy Floods the Grid. " 
##                                                                                                                                                                                                -0.44744645862196 
##                                                                                                                               "Electricity prices in France turn negative as renewable energy floods the grid. " 
##                                                                                                                                                                                                -0.44744645862196 
##                                                                                                                               "Electricity prices in France turn negative as renewable energy floods the grid. " 
##                                                                                                                                                                                                -0.44744645862196 
##                                                                                                                               "Electricity prices in France turn negative as renewable energy floods the grid. " 
##                                                                                                                                                                                                 -0.3699864862397 
##                                                                                                                                         "Nuclear energy is more expensive than renewables, CSIRO report finds. " 
##                                                                                                                                                                                                           -0.325 
##                                                                                                          "Australia must stop wasting time and shift to renewable energy to spark job creation, Albanese says. " 
##                                                                                                                                                                                               -0.319504825211347 
##                        "The cost of transforming the US power grid into 100% renewable energy is estimated at $4.5 trillion (nearly as much as what the country has spent, since 2001, on the war on terror.). "
# positive
sentimentr_example %>% filter(sentimentr_binary == 'positive') %>% pull(title_text, sentiment_dict) %>% print()
##                                                                                                                                                                                                                                                                                                   0.442718872423573 
##                                                                                                                                                                                                                                            "Renewable Energy Is Now The Cheapest Option - Even Without Subsidies. " 
##                                                                                                                                                                                                                                                                                                   0.466435954874836 
##                                     "Stanford engineers develop a new method of keeping the lights on if the world turns to 100% clean, renewable energy - several solutions to making clean, renewable energy reliable enough to power at least 139 countries, published this week in journal Renewable Energy.. " 
##                                                                                                                                                                                                                                                                                                   0.474692883171144 
##                                                                                "\034Eat your heart out Fox News,\035 says Newsom, signing climate agreement with West Coast governors | California, Oregon, Washington and British Columbia agree to more electric vehicles, renewable energy, forest treatments. " 
##                                                                                                                                                                                                                                                                                                   0.485897189157719 
## "Replacing coal with gas or renewables saves billions of gallons of water, suggests a new study, which found that the water intensity of renewable energy sources like solar or wind energy, as measured by water use per kilowatt of electricity, is only 1% to 2% of coal or natural gas\031s water intensity.. " 
##                                                                                                                                                                                                                                                                                                   0.501996778367678 
##                                                                                          "Obama's clean power plan hailed as strongest ever climate action by US. Hundreds of businesses including eBay and Nestle back federal rules to cut emissions and encourage a switch away from coal to renewable energy. " 
##                                                                                                                                                                                                                                                                                                   0.527644853011086 
##                                                                                                                                                                                                                                                        "A cool guide to the Five Major Types of Renewable Energy. " 
##                                                                                                                                                                                                                                                                                                    0.54788554592047 
##                                                                                                                                                                                                                           "Renewable energy set to be cheaper than fossil fuels by 2020, according to new report. " 
##                                                                                                                                                                                                                                                                                                   0.583333333333333 
##                                                                                                                                                                                                                                                               "A cool guide on the five major Renewable Energies. " 
##                                                                                                                                                                                                                                                                                                   0.673609679265374 
##                                                                                                                                             "The Australian Senate\031s decisions to stop Tony Abbott abolishing clean energy agencies helped create renewable energy projects worth $23.4bn, a new report says.. " 
##                                                                                                                                                                                                                                                                                                   0.683333333333333 
##                                                                                                                                                                                                                                                               "Playground fun or a new form of renewable energy?. "

Credibility evaluation

It is important to note that the context of renewable energy discussions can influence sentiment analysis results, especially when dealing with complex or nuanced topics. For example, statements that critique policies or express concerns, such as “Australian Treasurer Joe Hockey has attacked wind farms as ‘utterly offensive,’” receive strongly negative scores (e.g., -0.645). While the sentiment aligns with the negative tone of the statement, it is worth noting that the score does not capture the broader policy implications or public reactions.

On the positive side, statements like “Renewable energy set to be cheaper than fossil fuels by 2020” receive high positive scores (e.g., 0.547), which accurately reflects the optimistic tone of the text. However, some positive examples, such as “The Australian Senate’s decisions to stop Tony Abbott abolishing clean energy agencies helped create renewable energy projects worth $23.4bn,” receive scores that might be overly influenced by specific keywords like “helped” or “create,” rather than considering the full context.

Insights and visualization

Based on sentiment analysis, discussions about renewable energy on social media are generally optimistic and solution-oriented. Positive threads focus on advancements, cost reductions, and environmental benefits, while negative ones highlight challenges like political opposition or grid issues. Overall, the conversation leans toward promoting renewable energy and addressing its obstacles constructively.

Sentiment distribution

# Number of threads by sentiment category.
reddit_sentiment %>%
  ggplot(aes(x = bert_label)) +
  geom_bar(fill = "black")

# Word counts by sentiment category.
reddit_sentiment %>%
  ggplot(aes(x = bert_label, y = word_count)) +
  geom_jitter(height = 0, width = 0.05) +
  stat_summary(fun = mean, geom = "crossbar", width = 0.4, color = "red")

# Association between a thread's sentiment and the number of comments on the thread.
# Remove outliers
reddit_sentiment_rm_outlier <- reddit_sentiment %>%
  group_by(bert_label) %>%
  filter(
    between(
      comments,
      quantile(comments, 0.25) - 1.5 * IQR(comments),
      quantile(comments, 0.75) + 1.5 * IQR(comments)))

# Correlation analysis
cor.test(reddit_sentiment_rm_outlier$comments, reddit_sentiment_rm_outlier$bert_label_numeric)
## 
##  Pearson's product-moment correlation
## 
## data:  reddit_sentiment_rm_outlier$comments and reddit_sentiment_rm_outlier$bert_label_numeric
## t = -2.62, df = 213, p-value = 0.009425
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.30332984 -0.04393276
## sample estimates:
##        cor 
## -0.1766977
# Scatterplot
reddit_sentiment_rm_outlier %>%
  ggplot(aes(x = bert_label_numeric, y = comments)) +
  geom_jitter(height = 0, width = 0.05) + 
  geom_smooth(method = 'loess', span = 0.75)
## `geom_smooth()` using formula = 'y ~ x'

Word clouds

# Stop word removal and tokenization
data("stop_words")
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&amp;|&lt;|&gt;"

reddit_sentiment_clean <- reddit_sentiment %>% 
  mutate(title_text = str_replace_all(title_text, replace_reg, "")) %>%
  unnest_tokens(word, title_text, token = "words") %>% 
  anti_join(stop_words, by = "word") %>% 
  filter(str_detect(word, "[a-z]")) %>% 
  filter(!word %in% c('renewable',' energy'))
# negative text
reddit_sentiment_clean_negative <- reddit_sentiment_clean %>% 
  filter(bert_label_numeric %in% c(1,2))
# positive text
reddit_sentiment_clean_positive <- reddit_sentiment_clean %>% 
  filter(bert_label_numeric %in% c(4,5))

# Remove words that are commonly seen in both negative and positive threads
reddit_sentiment_clean_negative_unique <- reddit_sentiment_clean_negative %>% 
  anti_join(reddit_sentiment_clean_positive, by = 'word')
reddit_sentiment_clean_positive_unique <- reddit_sentiment_clean_positive %>%
  anti_join(reddit_sentiment_clean_negative, by = 'word')
# Words appearing in negative threads
n <- 20
h <- runif(n, 0, 1) # any color
s <- runif(n, 0.6, 1) # vivid
v <- runif(n, 0.3, 0.7) # neither too dark or bright

df_hsv <- data.frame(h = h, s = s, v = v)
pal <- apply(df_hsv, 1, function(x) hsv(x['h'], x['s'], x['v']))
pal <- c(pal, rep("grey", 10000))

negative_word_cloud <- reddit_sentiment_clean_negative_unique %>% 
  count(word, sort = TRUE) %>%
  wordcloud2(color = pal, 
             minRotation = -pi/6,
             maxRotation = -pi/6,
             rotateRatio = 1)

negative_word_cloud
# Words appearing in positive threads
positive_word_cloud <- reddit_sentiment_clean_positive_unique %>%
  count(word, sort = TRUE) %>%
  wordcloud2(color = pal,
       minRotation = pi/6,
       maxRotation = pi/6,
       rotateRatio = 1)

positive_word_cloud

If the two word cloud images cannot be displayed in RPubs, please see the two static images below. The first one is negative and the second one is positive. negative_word_cloud positive_word_cloud

#save.image('241128.RData')