#loading libraries
library(dplyr)
library(readr)
library(tidyr)
library(rtweet)
library(writexl)
library(readxl)
library(tidytext)
library(textdata)
library(ggplot2)
library(textdata)
library(scales)
#reading ngss and ccss files
ngss_tweets <- read_xlsx("data/ngss_tweets.xlsx")
csss_tweets <- read_xlsx("data/csss_tweets.xlsx")
#filtering english entries for ngss and ccss
ngss_text <- filter(ngss_tweets, lang == "en")
#selecting columns for ngss and ccss dataframes
ngss_text <- select(ngss_text,screen_name, created_at, text)
ngss_text <- mutate(ngss_text, standards = "ngss")
# rearranging the columns
ngss_text <- relocate(ngss_text, standards)
ngss_text <- select(ngss_text, standards, screen_name, created_at, text)
csss_text <-
csss_tweets %>%
filter(lang == "en") %>%
select(screen_name, created_at, text) %>%
mutate(standards = "csss") %>%
relocate(standards)
#binding rows
tweets <- bind_rows(ngss_text, csss_text)
head(tweets)
## # A tibble: 6 × 4
## standards screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 ngss loyr2662 2021-02-27 17:33:27 "Switching gears for a bit for the…
## 2 ngss loyr2662 2021-02-20 20:02:37 "Was just introduced to the Engine…
## 3 ngss Furlow_teach 2021-02-27 17:03:23 "@IBchemmilam @chemmastercorey I’m…
## 4 ngss Furlow_teach 2021-02-27 14:41:01 "@IBchemmilam @chemmastercorey How…
## 5 ngss TdiShelton 2021-02-27 14:17:34 "I am so honored and appreciative …
## 6 ngss TdiShelton 2021-02-27 15:49:17 "Thank you @brian_womack I loved c…
tail(tweets)
## # A tibble: 6 × 4
## standards screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 csss JosiePaul8807 2021-02-20 00:34:53 "@SenatorHick You realize science…
## 2 csss ctwittnc 2021-02-19 23:44:18 "@winningatmylife I’ll bet none o…
## 3 csss the_rbeagle 2021-02-19 23:27:06 "@dmarush @electronlove @Montgome…
## 4 csss silea 2021-02-19 23:11:21 "@LizerReal I don’t think that’s …
## 5 csss JodyCoyote12 2021-02-19 22:58:25 "@CarlaRK3 @NedLamont Fully fund …
## 6 csss Ryan_Hawes 2021-02-19 22:41:01 "I just got an \"explainer\" on h…
#tokenizing text
tweet_tokens <-
tweets %>%
unnest_tokens(output = word,
input = text,
token = "tweets")
#removing stopwords
tidy_tweets <-
tweet_tokens %>%
anti_join(stop_words, by = "word")
count(tidy_tweets, word, sort = T)
## # A tibble: 7,524 × 2
## word n
## <chr> <int>
## 1 common 1089
## 2 core 1083
## 3 math 434
## 4 students 140
## 5 #ngss 131
## 6 school 127
## 7 teachers 122
## 8 amp 120
## 9 kids 111
## 10 standards 111
## # … with 7,514 more rows
filter(tweets, grepl('amp', text))
## # A tibble: 124 × 4
## standards screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 ngss TdiShelton 2021-02-27 14:17:34 "I am so honored and appreciati…
## 2 ngss STEMTeachTools 2021-02-27 16:25:04 "Open, non-hierarchical communi…
## 3 ngss NGSSphenomena 2021-02-25 13:24:22 "Bacteria have music preference…
## 4 ngss CTSKeeley 2021-02-21 21:50:04 "Today I was thinking about the…
## 5 ngss richbacolor 2021-02-24 14:14:49 "Last chance to register for @M…
## 6 ngss MrsEatonELL 2021-02-27 06:24:09 "Were we doing the hand jive? N…
## 7 ngss STEMuClaytion 2021-02-24 14:56:19 "#WonderWednesday w/ questions …
## 8 ngss LearningUNDFTD 2021-02-24 18:13:01 "Are candies like M&Ms and …
## 9 ngss abeslo 2021-02-26 18:54:31 "#M'Kenna, whose story we share…
## 10 ngss E3Chemistry 2021-02-25 14:15:20 "Molarity & Parts Per Milli…
## # … with 114 more rows
tidy_tweets <-
tweet_tokens %>%
anti_join(stop_words, by = "word") %>%
filter(!word == "amp")
#Afinn Lexicons
afinn <- get_sentiments("afinn")
afinn
## # A tibble: 2,477 × 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # … with 2,467 more rows
#Bing Lexicons
bing <- get_sentiments("bing")
bing
## # A tibble: 6,786 × 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # … with 6,776 more rows
nrc <- get_sentiments("nrc")
nrc
## # A tibble: 13,875 × 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # … with 13,865 more rows
loughran <- get_sentiments("loughran")
loughran
## # A tibble: 4,150 × 2
## word sentiment
## <chr> <chr>
## 1 abandon negative
## 2 abandoned negative
## 3 abandoning negative
## 4 abandonment negative
## 5 abandonments negative
## 6 abandons negative
## 7 abdicated negative
## 8 abdicates negative
## 9 abdicating negative
## 10 abdication negative
## # … with 4,140 more rows
1.How were these sentiment lexicons put together and validated? -They were constructed and validated through approaches such as crowdsourcing, as work and contributions of various researchers and data from public reviews(such as restaurant, movie reviews).
2.Why should we be cautious when using and interpreting them? -It is recommended to be cautious in using sentiment lexicons because of context, especially in cases and scenarios that they were not validated on.Additionally, fromthe reading, the lexicons are stand alone unigrams and their interpretation does not put into consideration aspects such as qualifiers which in real sense they are critical in sense making of analysed text.
#joining Sentiments afinn
sentiment_afinn <- inner_join(tidy_tweets, afinn, by = "word")
sentiment_afinn
## # A tibble: 1,520 × 5
## standards screen_name created_at word value
## <chr> <chr> <dttm> <chr> <dbl>
## 1 ngss loyr2662 2021-02-27 17:33:27 win 4
## 2 ngss Furlow_teach 2021-02-27 17:03:23 love 3
## 3 ngss Furlow_teach 2021-02-27 17:03:23 sweet 2
## 4 ngss Furlow_teach 2021-02-27 17:03:23 significance 1
## 5 ngss TdiShelton 2021-02-27 14:17:34 honored 2
## 6 ngss TdiShelton 2021-02-27 14:17:34 opportunity 2
## 7 ngss TdiShelton 2021-02-27 14:17:34 wonderful 4
## 8 ngss TdiShelton 2021-02-27 14:17:34 powerful 2
## 9 ngss TdiShelton 2021-02-27 15:49:17 loved 3
## 10 ngss TdiShelton 2021-02-27 16:51:32 share 1
## # … with 1,510 more rows
sentiment_loughran <- inner_join(tidy_tweets, loughran, by = "word")
sentiment_loughran
## # A tibble: 829 × 5
## standards screen_name created_at word sentiment
## <chr> <chr> <dttm> <chr> <chr>
## 1 ngss loyr2662 2021-02-27 17:33:27 win positive
## 2 ngss TdiShelton 2021-02-27 14:17:34 honored positive
## 3 ngss TdiShelton 2021-02-27 14:17:34 opportunity positive
## 4 ngss TdiShelton 2021-02-22 01:57:58 excited positive
## 5 ngss STEMTeachTools 2021-02-23 23:35:05 strong positive
## 6 ngss STEMTeachTools 2021-02-27 16:25:04 improvement positive
## 7 ngss STEMTeachTools 2021-02-24 17:35:10 opportunity positive
## 8 ngss STEMTeachTools 2021-02-21 16:35:06 uncertainties uncertainty
## 9 ngss STEMTeachTools 2021-02-21 16:35:06 investigations negative
## 10 ngss LabAids 2021-02-27 15:35:18 challenges negative
## # … with 819 more rows
sentiment_bing <- inner_join(tidy_tweets, bing, by = "word")
sentiment_bing
## # A tibble: 1,637 × 5
## standards screen_name created_at word sentiment
## <chr> <chr> <dttm> <chr> <chr>
## 1 ngss loyr2662 2021-02-27 17:33:27 win positive
## 2 ngss Furlow_teach 2021-02-27 17:03:23 love positive
## 3 ngss Furlow_teach 2021-02-27 17:03:23 helped positive
## 4 ngss Furlow_teach 2021-02-27 17:03:23 sweet positive
## 5 ngss Furlow_teach 2021-02-27 17:03:23 tough positive
## 6 ngss TdiShelton 2021-02-27 14:17:34 honored positive
## 7 ngss TdiShelton 2021-02-27 14:17:34 appreciative positive
## 8 ngss TdiShelton 2021-02-27 14:17:34 wonderful positive
## 9 ngss TdiShelton 2021-02-27 14:17:34 powerful positive
## 10 ngss TdiShelton 2021-02-27 15:49:17 loved positive
## # … with 1,627 more rows
sentiment_nrc <- inner_join(tidy_tweets, nrc, by = "word")
sentiment_nrc
## # A tibble: 7,651 × 5
## standards screen_name created_at word sentiment
## <chr> <chr> <dttm> <chr> <chr>
## 1 ngss loyr2662 2021-02-20 20:02:37 mathematical trust
## 2 ngss Furlow_teach 2021-02-27 17:03:23 familiar positive
## 3 ngss Furlow_teach 2021-02-27 17:03:23 familiar trust
## 4 ngss Furlow_teach 2021-02-27 17:03:23 love joy
## 5 ngss Furlow_teach 2021-02-27 17:03:23 love positive
## 6 ngss Furlow_teach 2021-02-27 17:03:23 sweet anticipation
## 7 ngss Furlow_teach 2021-02-27 17:03:23 sweet joy
## 8 ngss Furlow_teach 2021-02-27 17:03:23 sweet positive
## 9 ngss Furlow_teach 2021-02-27 17:03:23 sweet surprise
## 10 ngss Furlow_teach 2021-02-27 17:03:23 sweet trust
## # … with 7,641 more rows
ts_plot(tweets, by = "days")
tweets %>%
dplyr::group_by(standards) %>%
ts_plot("days")
From the graph, it can be observed that Twitter users talk more about CCSS standards compared to NGSS standards.
summary_bing <- count(sentiment_bing, sentiment, sort = TRUE)
summary_bing
## # A tibble: 2 × 2
## sentiment n
## <chr> <int>
## 1 negative 974
## 2 positive 663
summary_bing <- sentiment_bing %>%
group_by(standards) %>%
count(sentiment)
summary_bing
## # A tibble: 4 × 3
## # Groups: standards [2]
## standards sentiment n
## <chr> <chr> <int>
## 1 csss negative 914
## 2 csss positive 437
## 3 ngss negative 60
## 4 ngss positive 226
#untidy the data
summary_bing <- sentiment_bing %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
spread(sentiment, n)
summary_bing
## # A tibble: 2 × 3
## # Groups: standards [2]
## standards negative positive
## <chr> <int> <int>
## 1 csss 914 437
## 2 ngss 60 226
summary_bing <- sentiment_bing %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
spread(sentiment, n) %>%
mutate(sentiment = positive - negative) %>%
mutate(lexicon = "bing") %>%
relocate(lexicon)
summary_bing
## # A tibble: 2 × 5
## # Groups: standards [2]
## lexicon standards negative positive sentiment
## <chr> <chr> <int> <int> <int>
## 1 bing csss 914 437 -477
## 2 bing ngss 60 226 166
head(sentiment_afinn)
## # A tibble: 6 × 5
## standards screen_name created_at word value
## <chr> <chr> <dttm> <chr> <dbl>
## 1 ngss loyr2662 2021-02-27 17:33:27 win 4
## 2 ngss Furlow_teach 2021-02-27 17:03:23 love 3
## 3 ngss Furlow_teach 2021-02-27 17:03:23 sweet 2
## 4 ngss Furlow_teach 2021-02-27 17:03:23 significance 1
## 5 ngss TdiShelton 2021-02-27 14:17:34 honored 2
## 6 ngss TdiShelton 2021-02-27 14:17:34 opportunity 2
summary_afinn <- sentiment_afinn %>%
group_by(standards) %>%
summarise(sentiment = sum(value)) %>%
mutate(lexicon = "AFINN") %>%
relocate(lexicon)
summary_afinn
## # A tibble: 2 × 3
## lexicon standards sentiment
## <chr> <chr> <dbl>
## 1 AFINN csss -833
## 2 AFINN ngss 502
summary_loughran <- sentiment_loughran %>%
group_by(standards) %>%
mutate(lexicon = "loughran") %>%
relocate(lexicon)
summary_loughran
## # A tibble: 829 × 6
## # Groups: standards [2]
## lexicon standards screen_name created_at word sentiment
## <chr> <chr> <chr> <dttm> <chr> <chr>
## 1 loughran ngss loyr2662 2021-02-27 17:33:27 win positive
## 2 loughran ngss TdiShelton 2021-02-27 14:17:34 honored positive
## 3 loughran ngss TdiShelton 2021-02-27 14:17:34 opportunity positive
## 4 loughran ngss TdiShelton 2021-02-22 01:57:58 excited positive
## 5 loughran ngss STEMTeachTools 2021-02-23 23:35:05 strong positive
## 6 loughran ngss STEMTeachTools 2021-02-27 16:25:04 improvement positive
## 7 loughran ngss STEMTeachTools 2021-02-24 17:35:10 opportunity positive
## 8 loughran ngss STEMTeachTools 2021-02-21 16:35:06 uncertainties uncertai…
## 9 loughran ngss STEMTeachTools 2021-02-21 16:35:06 investigatio… negative
## 10 loughran ngss LabAids 2021-02-27 15:35:18 challenges negative
## # … with 819 more rows
summary_nrc <- sentiment_nrc %>%
group_by(standards) %>%
filter (sentiment == "positive"|sentiment == "negative") %>%
summarise(sentiment) %>%
mutate(lexicon = "NRC") %>%
relocate(lexicon)
summary_nrc
## # A tibble: 3,577 × 3
## # Groups: standards [2]
## lexicon standards sentiment
## <chr> <chr> <chr>
## 1 NRC csss positive
## 2 NRC csss positive
## 3 NRC csss positive
## 4 NRC csss positive
## 5 NRC csss positive
## 6 NRC csss positive
## 7 NRC csss positive
## 8 NRC csss positive
## 9 NRC csss positive
## 10 NRC csss positive
## # … with 3,567 more rows
ngss_text <-
ngss_tweets %>%
filter(lang == "en") %>%
select(status_id, text) %>%
mutate(standards = "ngss") %>%
relocate(standards)
ccss_text <-
csss_tweets %>%
filter(lang == "en") %>%
select(status_id, text) %>%
mutate(standards = "ccss") %>%
relocate(standards)
tweets <- bind_rows(ngss_text, ccss_text)
tweets
## # A tibble: 1,441 × 3
## standards status_id text
## <chr> <chr> <chr>
## 1 ngss 1365716690336645124 "Switching gears for a bit for the \"Crosscutt…
## 2 ngss 1363217513761415171 "Was just introduced to the Engineering Habits…
## 3 ngss 1365709122763653133 "@IBchemmilam @chemmastercorey I’m familiar w/…
## 4 ngss 1365673294360420353 "@IBchemmilam @chemmastercorey How well does t…
## 5 ngss 1365667393188601857 "I am so honored and appreciative to have an o…
## 6 ngss 1365690477266284545 "Thank you @brian_womack I loved connecting wi…
## 7 ngss 1365706140496130050 "Please share #NGSSchat PLN! https://t.co/Qc2c…
## 8 ngss 1363669328147677189 "So excited about this weekend’s learning... p…
## 9 ngss 1365442786544214019 "The Educators Evaluating the Quality of Instr…
## 10 ngss 1364358149164175362 "Foster existing teacher social networks that …
## # … with 1,431 more rows
sentiment_afinn <- tweets %>%
unnest_tokens(output = word,
input = text,
token = "tweets") %>%
anti_join(stop_words, by = "word") %>%
filter(!word == "amp") %>%
inner_join(afinn, by = "word")
sentiment_afinn
## # A tibble: 1,520 × 4
## standards status_id word value
## <chr> <chr> <chr> <dbl>
## 1 ngss 1365716690336645124 win 4
## 2 ngss 1365709122763653133 love 3
## 3 ngss 1365709122763653133 sweet 2
## 4 ngss 1365709122763653133 significance 1
## 5 ngss 1365667393188601857 honored 2
## 6 ngss 1365667393188601857 opportunity 2
## 7 ngss 1365667393188601857 wonderful 4
## 8 ngss 1365667393188601857 powerful 2
## 9 ngss 1365690477266284545 loved 3
## 10 ngss 1365706140496130050 share 1
## # … with 1,510 more rows
afinn_score <- sentiment_afinn %>%
group_by(standards, status_id) %>%
summarise(value = sum(value))
afinn_score
## # A tibble: 842 × 3
## # Groups: standards [2]
## standards status_id value
## <chr> <chr> <dbl>
## 1 ccss 1362894990813188096 -2
## 2 ccss 1362899370199445508 4
## 3 ccss 1362906588021989376 -2
## 4 ccss 1362910494487535618 -9
## 5 ccss 1362910913855160320 -3
## 6 ccss 1362928225379250179 2
## 7 ccss 1362933982074073090 -1
## 8 ccss 1362947497258151945 -3
## 9 ccss 1362949805694013446 3
## 10 ccss 1362970614282264583 3
## # … with 832 more rows
#Adding a flag for whether a tweet is positive or negative
afinn_sentiment <- afinn_score %>%
filter(value != 0) %>%
mutate(sentiment = if_else(value < 0, "negative", "positive"))
afinn_sentiment
## # A tibble: 801 × 4
## # Groups: standards [2]
## standards status_id value sentiment
## <chr> <chr> <dbl> <chr>
## 1 ccss 1362894990813188096 -2 negative
## 2 ccss 1362899370199445508 4 positive
## 3 ccss 1362906588021989376 -2 negative
## 4 ccss 1362910494487535618 -9 negative
## 5 ccss 1362910913855160320 -3 negative
## 6 ccss 1362928225379250179 2 positive
## 7 ccss 1362933982074073090 -1 negative
## 8 ccss 1362947497258151945 -3 negative
## 9 ccss 1362949805694013446 3 positive
## 10 ccss 1362970614282264583 3 positive
## # … with 791 more rows
afinn_ratio <- afinn_sentiment %>%
group_by(standards) %>%
count(sentiment) %>%
spread(sentiment, n) %>%
mutate(ratio = negative/positive)
afinn_ratio
## # A tibble: 2 × 4
## # Groups: standards [2]
## standards negative positive ratio
## <chr> <int> <int> <dbl>
## 1 ccss 417 202 2.06
## 2 ngss 18 164 0.110
afinn_counts <- afinn_sentiment %>%
group_by(standards) %>%
count(sentiment) %>%
filter(standards == "ngss")
afinn_counts %>%
ggplot(aes(x="", y=n, fill=sentiment)) +
geom_bar(width = .6, stat = "identity") +
labs(title = "Next Gen Science Standards",
subtitle = "Proportion of Positive & Negative Tweets") +
coord_polar(theta = "y") +
theme_void()
summary_afinn2 <- sentiment_afinn %>%
group_by(standards) %>%
filter(value != 0) %>%
mutate(sentiment = if_else(value < 0, "negative", "positive")) %>%
count(sentiment, sort = TRUE) %>%
mutate(method = "AFINN")
summary_bing2 <- sentiment_bing %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
mutate(method = "bing")
summary_nrc2 <- sentiment_nrc %>%
filter(sentiment %in% c("positive", "negative")) %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
mutate(method = "nrc")
summary_loughran2 <- sentiment_loughran %>%
filter(sentiment %in% c("positive", "negative")) %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
mutate(method = "loughran")
#combining the dataframes
summary_sentiment <- bind_rows(summary_afinn2,
summary_bing2,
summary_nrc2,
summary_loughran2) %>%
arrange(method, standards) %>%
relocate(method)
summary_sentiment
## # A tibble: 16 × 4
## # Groups: standards [3]
## method standards sentiment n
## <chr> <chr> <chr> <int>
## 1 AFINN ccss negative 740
## 2 AFINN ccss positive 468
## 3 AFINN ngss positive 273
## 4 AFINN ngss negative 39
## 5 bing csss negative 914
## 6 bing csss positive 437
## 7 bing ngss positive 226
## 8 bing ngss negative 60
## 9 loughran csss negative 440
## 10 loughran csss positive 112
## 11 loughran ngss negative 68
## 12 loughran ngss positive 54
## 13 nrc csss positive 2198
## 14 nrc csss negative 764
## 15 nrc ngss positive 542
## 16 nrc ngss negative 73
total_counts <- summary_sentiment %>%
group_by(method, standards) %>%
summarise(total = sum(n))
sentiment_counts <- left_join(summary_sentiment, total_counts)
sentiment_counts
## # A tibble: 16 × 5
## # Groups: standards [3]
## method standards sentiment n total
## <chr> <chr> <chr> <int> <int>
## 1 AFINN ccss negative 740 1208
## 2 AFINN ccss positive 468 1208
## 3 AFINN ngss positive 273 312
## 4 AFINN ngss negative 39 312
## 5 bing csss negative 914 1351
## 6 bing csss positive 437 1351
## 7 bing ngss positive 226 286
## 8 bing ngss negative 60 286
## 9 loughran csss negative 440 552
## 10 loughran csss positive 112 552
## 11 loughran ngss negative 68 122
## 12 loughran ngss positive 54 122
## 13 nrc csss positive 2198 2962
## 14 nrc csss negative 764 2962
## 15 nrc ngss positive 542 615
## 16 nrc ngss negative 73 615
#new row that calculates the percentage
sentiment_percents <- sentiment_counts %>%
mutate(percent = n/total * 100)
sentiment_percents
## # A tibble: 16 × 6
## # Groups: standards [3]
## method standards sentiment n total percent
## <chr> <chr> <chr> <int> <int> <dbl>
## 1 AFINN ccss negative 740 1208 61.3
## 2 AFINN ccss positive 468 1208 38.7
## 3 AFINN ngss positive 273 312 87.5
## 4 AFINN ngss negative 39 312 12.5
## 5 bing csss negative 914 1351 67.7
## 6 bing csss positive 437 1351 32.3
## 7 bing ngss positive 226 286 79.0
## 8 bing ngss negative 60 286 21.0
## 9 loughran csss negative 440 552 79.7
## 10 loughran csss positive 112 552 20.3
## 11 loughran ngss negative 68 122 55.7
## 12 loughran ngss positive 54 122 44.3
## 13 nrc csss positive 2198 2962 74.2
## 14 nrc csss negative 764 2962 25.8
## 15 nrc ngss positive 542 615 88.1
## 16 nrc ngss negative 73 615 11.9
sentiment_percents %>%
ggplot(aes(x = standards, y = percent, fill=sentiment)) +
geom_bar(width = .8, stat = "identity") +
facet_wrap(~method, ncol = 1) +
coord_flip() +
labs(title = "Public Sentiment on Twitter",
subtitle = "The Common Core & Next Gen Science Standards",
x = "State Standards",
y = "Percentage of Words")