install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("readr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("tidyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("rtweet")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("writexl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("tidytext")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("textdata")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("textdata")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("scales")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(tidyr)
library(rtweet)
library(writexl)
library(readxl)
library(tidytext)
library(textdata)
library(ggplot2)
library(textdata)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
app_name <- "eci588research"
api_key <- "mzNQRmiYPV8x8ZwobqvO3oRuN"
api_secret_key <- "oVZHB7J40GXhnGcLgDSCkiTP0j7fn9boOfVpeKmZzWvET2dKJR"
access_token <- "1562797323440103428-zXikORK587QKMWuJQJ49zICSOg4h3C"
access_token_secret <- "ONUgitOJ1eZgsf1Y8WcgZu0aLB8NGSiRwxSSNXlOACqZS"
## authenticate via web browser
token <- create_token(
app = app_name,
consumer_key = api_key,
consumer_secret = api_secret_key,
access_token = access_token,
access_secret = access_token_secret)
## Warning: `create_token()` was deprecated in rtweet 1.0.0.
## ℹ See vignette('auth') for details
## Saving auth to '/cloud/home/r1588112/.config/R/rtweet/create_token.rds'
ngss_all_tweets <- search_tweets(q = "#NGSSchat", n=5000)
ngss_all_tweets
## # A tibble: 186 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2023-01-28 16:15:41 1.62e18 161936… "RT … "RT @N… FALSE <named list> "<a h…
## 2 2023-01-28 14:02:53 1.62e18 161933… "RT … "RT @S… FALSE <named list> "<a h…
## 3 2023-01-28 12:30:48 1.62e18 161931… "...… ".... … FALSE <named list> "<a h…
## 4 2023-01-28 04:15:01 1.62e18 161918… "RT … "RT @S… FALSE <named list> "<a h…
## 5 2023-01-28 04:01:14 1.62e18 161918… "RT … "RT @S… FALSE <named list> "<a h…
## 6 2023-01-28 03:59:35 1.62e18 161918… "Sci… "Scien… FALSE <named list> "<a h…
## 7 2023-01-27 22:01:36 1.62e18 161909… "RT … "RT @V… FALSE <named list> "<a h…
## 8 2023-01-27 20:48:49 1.62e18 161907… "RT … "RT @N… FALSE <named list> "<a h…
## 9 2023-01-27 16:41:44 1.62e18 161901… "Che… "Check… FALSE <named list> "<a h…
## 10 2023-01-27 16:23:53 1.62e18 161900… "RT … "RT @V… FALSE <named list> "<a h…
## # … with 176 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
View your new ngss_all_tweetsdata frame using one of the
previous view methods from Unit 1 Section 2a to help answer the
following questions:
How many tweets did our query using the Twitter API actually return? How many variables?
Why do you think our query pulled in far less than 5,000 tweets requested?
Does our query also include retweets? How do you know?
ngss_non_retweets <- search_tweets("#NGSSchat",
n=5000,
include_rts = FALSE)
ngss_non_retweets
## # A tibble: 79 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2023-01-28 12:30:48 1.62e18 161931… "...… ".... … FALSE <named list> "<a h…
## 2 2023-01-28 03:59:35 1.62e18 161918… "Sci… "Scien… FALSE <named list> "<a h…
## 3 2023-01-27 16:41:44 1.62e18 161901… "Che… "Check… FALSE <named list> "<a h…
## 4 2023-01-27 14:00:01 1.62e18 161897… "Cal… "Calli… FALSE <named list> "<a h…
## 5 2023-01-26 20:45:18 1.62e18 161871… "My … "My 2n… FALSE <named list> "<a h…
## 6 2023-01-26 19:38:30 1.62e18 161869… "Whi… "Which… FALSE <named list> "<a h…
## 7 2023-01-26 18:15:24 1.62e18 161867… "An … "An an… FALSE <named list> "<a h…
## 8 2023-01-26 17:00:46 1.62e18 161865… "Cov… "Cover… FALSE <named list> "<a h…
## 9 2023-01-26 04:36:28 1.62e18 161846… "#ed… "#edch… FALSE <named list> "<a h…
## 10 2023-01-25 21:05:47 1.62e18 161835… "Mar… "Mark … FALSE <named list> "<a h…
## # … with 69 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
ngss_or_tweets <- search_tweets(q = "#NGSSchat OR ngss",
n=5000,
include_rts = FALSE)
ngss_or_tweets
## # A tibble: 335 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2023-01-28 20:01:44 1.62e18 161942… "Eve… "Even … FALSE <named list> "<a h…
## 2 2023-01-28 18:46:24 1.62e18 161940… "Mak… "Makin… FALSE <named list> "<a h…
## 3 2023-01-28 18:44:12 1.62e18 161940… "Tyt… "Tyto … FALSE <named list> "<a h…
## 4 2023-01-28 16:22:24 1.62e18 161937… "#NG… "#NGSS… FALSE <named list> "<a h…
## 5 2023-01-28 15:54:35 1.62e18 161936… "ギ… "ギル… FALSE <named list> "<a h…
## 6 2023-01-28 15:13:08 1.62e18 161935… "@tr… "@trhk… FALSE <named list> "<a h…
## 7 2023-01-28 14:56:36 1.62e18 161934… "#ال… "#الفي… FALSE <named list> "<a h…
## 8 2023-01-28 14:14:35 1.62e18 161933… "【… "【譲… FALSE <named list> "<a h…
## 9 2023-01-28 12:58:28 1.62e18 161931… "ピ… "ピン… FALSE <named list> "<a h…
## 10 2023-01-28 12:30:48 1.62e18 161931… "...… ".... … FALSE <named list> "<a h…
## # … with 325 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
ngss_noor_tweets <- search_tweets(q = "#NGSSchat ngss",
n=5000,
include_rts = FALSE)
ngss_noor_tweets
## # A tibble: 14 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2023-01-28 12:30:48 1.62e18 161931… "...… ".... … FALSE <named list> "<a h…
## 2 2023-01-27 16:41:44 1.62e18 161901… "Che… "Check… FALSE <named list> "<a h…
## 3 2023-01-26 18:15:24 1.62e18 161867… "An … "An an… FALSE <named list> "<a h…
## 4 2023-01-24 20:23:00 1.62e18 161798… "How… "How c… FALSE <named list> "<a h…
## 5 2023-01-24 01:36:26 1.62e18 161769… "Tha… "Thank… FALSE <named list> "<a h…
## 6 2023-01-22 14:07:26 1.62e18 161716… "Wha… "What … FALSE <named list> "<a h…
## 7 2023-01-21 01:15:49 1.62e18 161660… "THI… "THIS … FALSE <named list> "<a h…
## 8 2023-01-20 18:56:21 1.62e18 161650… "One… "One g… FALSE <named list> "<a h…
## 9 2023-01-20 02:41:51 1.62e18 161626… "@Td… "@TdiS… FALSE <named list> "<a h…
## 10 2023-01-20 02:39:30 1.62e18 161626… "In … "In th… FALSE <named list> "<a h…
## 11 2023-01-20 02:33:41 1.62e18 161626… "Q4 … "Q4 Hi… FALSE <named list> "<a h…
## 12 2023-01-20 02:31:19 1.62e18 161626… "Q4 … "Q4 Hi… FALSE <named list> "<a h…
## 13 2023-01-20 02:18:01 1.62e18 161625… "@Td… "@TdiS… FALSE <named list> "<a h…
## 14 2023-01-20 02:16:38 1.62e18 161625… "A2 … "A2 We… FALSE <named list> "<a h…
## # … with 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
Try including both search terms but excluding the OR
operator to answer the following question:
Does excluding the OR operator return more tweets,
the same number of tweets, or fewer tweets? Why?
What other useful arguments does the search_tweet()
function contain? Try adding one and see what happens.
ngss_noor_tweets <- search_tweets(q = "#NGSSchat ngss",
n=5000,
include_rts = TRUE)
ngss_noor_tweets
## # A tibble: 66 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2023-01-28 16:15:41 1.62e18 161936… "RT … "RT @N… FALSE <named list> "<a h…
## 2 2023-01-28 12:30:48 1.62e18 161931… "...… ".... … FALSE <named list> "<a h…
## 3 2023-01-27 20:48:49 1.62e18 161907… "RT … "RT @N… FALSE <named list> "<a h…
## 4 2023-01-27 16:41:44 1.62e18 161901… "Che… "Check… FALSE <named list> "<a h…
## 5 2023-01-26 18:15:24 1.62e18 161867… "An … "An an… FALSE <named list> "<a h…
## 6 2023-01-26 14:54:58 1.62e18 161862… "RT … "RT @N… FALSE <named list> "<a h…
## 7 2023-01-26 14:46:52 1.62e18 161862… "RT … "RT @N… FALSE <named list> "<a h…
## 8 2023-01-25 00:55:19 1.62e18 161804… "RT … "RT @S… FALSE <named list> "<a h…
## 9 2023-01-24 20:37:41 1.62e18 161798… "RT … "RT @S… FALSE <named list> "<a h…
## 10 2023-01-24 20:23:00 1.62e18 161798… "How… "How c… FALSE <named list> "<a h…
## # … with 56 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
ngss_tweets <- search_tweets2(c("NGSSchat OR ngss",
'"next generation science standard"',
'"next generation science standards"',
'"next gen science standard"',
'"next gen science standards"'
),
n=5000,
include_rts = FALSE)
ngss_dictionary <- c("NGSSchat OR ngss",
'"next generation science standard"',
'"next generation science standards"',
'"next gen science standard"',
'"next gen science standards"')
ngss_tweets <- search_tweets2(ngss_dictionary,
n=5000,
include_rts = FALSE)
ngss_tweets
## # A tibble: 355 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2023-01-28 20:01:44 1.62e18 161942… "Eve… "Even … FALSE <named list> "<a h…
## 2 2023-01-28 18:46:24 1.62e18 161940… "Mak… "Makin… FALSE <named list> "<a h…
## 3 2023-01-28 18:44:12 1.62e18 161940… "Tyt… "Tyto … FALSE <named list> "<a h…
## 4 2023-01-28 16:22:24 1.62e18 161937… "#NG… "#NGSS… FALSE <named list> "<a h…
## 5 2023-01-28 15:54:35 1.62e18 161936… "ギ… "ギル… FALSE <named list> "<a h…
## 6 2023-01-28 15:13:08 1.62e18 161935… "@tr… "@trhk… FALSE <named list> "<a h…
## 7 2023-01-28 14:56:36 1.62e18 161934… "#ال… "#الفي… FALSE <named list> "<a h…
## 8 2023-01-28 14:14:35 1.62e18 161933… "【… "【譲… FALSE <named list> "<a h…
## 9 2023-01-28 12:58:28 1.62e18 161931… "ピ… "ピン… FALSE <named list> "<a h…
## 10 2023-01-28 12:30:48 1.62e18 161931… "...… ".... … FALSE <named list> "<a h…
## # … with 345 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
ccss_dictionary <- c("commoncore", '"common core"')
ccss_tweets <- ccss_dictionary %>%
search_tweets2(n=5000, include_rts = FALSE)
ccss_tweets
## # A tibble: 827 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2023-01-28 21:16:21 1.62e18 161944… "@ti… "@tisk… FALSE <named list> "<a h…
## 2 2023-01-28 18:20:05 1.62e18 161939… "💯 … "💯 Sc… FALSE <named list> "<a h…
## 3 2023-01-28 16:14:35 1.62e18 161936… "@Ry… "@Ryan… FALSE <named list> "<a h…
## 4 2023-01-28 14:16:58 1.62e18 161933… "@ma… "@mast… FALSE <named list> "<a h…
## 5 2023-01-26 17:37:18 1.62e18 161866… "@Mr… "@MrsA… FALSE <named list> "<a h…
## 6 2023-01-26 13:02:48 1.62e18 161859… "@ca… "@catt… FALSE <named list> "<a h…
## 7 2023-01-26 12:13:15 1.62e18 161858… "@ze… "@zebb… FALSE <named list> "<a h…
## 8 2023-01-25 18:02:15 1.62e18 161830… "🔢… "🔢🤔 … FALSE <named list> "<a h…
## 9 2023-01-25 17:42:24 1.62e18 161830… "The… "The T… FALSE <named list> "<a h…
## 10 2023-01-25 17:29:39 1.62e18 161830… "@Br… "@Brad… FALSE <named list> "<a h…
## # … with 817 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
search_tweets function to create you own custom
query for a twitter hashtag or topic(s) of interest.search_tweets("crt")
## # A tibble: 100 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2023-01-28 02:10:00 1.62e18 161915… "The… "The s… FALSE <named list> "<a h…
## 2 2023-01-27 22:07:30 1.62e18 161909… "A M… "A Mis… FALSE <named list> "<a h…
## 3 2023-01-28 00:08:09 1.62e18 161912… "BUS… "BUSTE… FALSE <named list> "<a h…
## 4 2023-01-28 21:50:37 1.62e18 161945… "RT … "RT @l… FALSE <named list> "<a h…
## 5 2023-01-28 21:50:34 1.62e18 161945… "@Ki… "@King… FALSE <named list> "<a h…
## 6 2023-01-28 21:50:18 1.62e18 161945… "@jo… "@jose… FALSE <named list> "<a h…
## 7 2023-01-28 21:50:16 1.62e18 161945… "RT … "RT @S… FALSE <named list> "<a h…
## 8 2023-01-28 21:50:13 1.62e18 161945… "🔄… "🔄💗@… FALSE <named list> "<a h…
## 9 2023-01-28 21:50:02 1.62e18 161945… "Mot… "Motio… FALSE <named list> "<a h…
## 10 2023-01-28 21:49:49 1.62e18 161945… "RT … "RT @k… FALSE <named list> "<a h…
## # … with 90 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
write_xlsx(ngss_tweets, "ngss_tweets.xlsx")
write_xlsx(ccss_tweets, "dcsss_tweets.xlsx")
fi <- c("sbkellogg", "mjsamberg", "haspires", "tarheel93", "drcallie_tweets", "AlexDreier")
fi_tweets <- fi %>%
get_timeline(include_rts=FALSE)
fi_tweets
## # A tibble: 295 × 43
## created_at id id_str text full_…¹ trunc…² entities source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <list> <chr>
## 1 2022-11-19 14:08:14 1.59e18 159396… @wat… @watli… FALSE <named list> "<a h…
## 2 2022-11-17 12:50:30 1.59e18 159322… Two … Two Fr… FALSE <named list> "<a h…
## 3 2022-11-12 17:08:34 1.59e18 159147… Home… Home! … FALSE <named list> "<a h…
## 4 2022-11-12 15:20:37 1.59e18 159145… @ced… @cedmn… FALSE <named list> "<a h…
## 5 2022-11-11 23:43:18 1.59e18 159121… @ewm… @ewman… FALSE <named list> "<a h…
## 6 2022-11-11 21:07:45 1.59e18 159117… @ewm… @ewman… FALSE <named list> "<a h…
## 7 2022-11-09 13:37:17 1.59e18 159033… Grea… Greatl… FALSE <named list> "<a h…
## 8 2022-11-03 17:48:10 1.59e18 158822… @ewm… @ewman… FALSE <named list> "<a h…
## 9 2022-10-25 19:51:10 1.58e18 158499… @Mic… @Micha… FALSE <named list> "<a h…
## 10 2022-10-25 19:04:53 1.58e18 158498… @rob… @robmo… FALSE <named list> "<a h…
## # … with 285 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <list>,
## # coordinates <list>, place <list>, contributors <lgl>,
## # is_quote_status <lgl>, retweet_count <int>, favorite_count <int>,
## # favorited <lgl>, favorited_by <lgl>, retweeted <lgl>, scopes <lgl>,
## # lang <chr>, possibly_sensitive <lgl>, display_text_width <lgl>, …
## ℹ Users data at users_data()
sample_n(fi_tweets, 10) %>%
select(in_reply_to_screen_name, text)
## # A tibble: 10 × 2
## in_reply_to_screen_name text
## <chr> <chr>
## 1 <NA> "In retrospect this is the most obvious sign Lorca w…
## 2 AlexDreier "@AlexDreier Now you can't log in at all. https://t.…
## 3 <NA> "Thank you so much for inviting me! 😃 It was a plea…
## 4 <NA> "Attending my first NC robotics event at Campbell Un…
## 5 <NA> "Greatly enjoyed participating in the “Transforming …
## 6 <NA> "I’m looking forward to sharing my experiences as a …
## 7 <NA> "Join @SheaKerkhoff for this exciting discussion. @F…
## 8 <NA> "Ms. Inabinet (12th grade English teacher) inspired …
## 9 <NA> "Wordle 217 3/6\n\n⬛🟩🟩🟩🟩\n⬛🟩🟩🟩🟩\n🟩🟩🟩🟩…
## 10 DocAngMullennix "@DocAngMullennix @IRONMANtri @ncsupers @SuperTCS @C…
## ℹ Users data at users_data()
get_friends("jtbrantley")
## # A tibble: 601 × 2
## from_id to_id
## <chr> <chr>
## 1 jtbrantley 95975081
## 2 jtbrantley 245958903
## 3 jtbrantley 2425515951
## 4 jtbrantley 1524823081558106112
## 5 jtbrantley 1389792319180783616
## 6 jtbrantley 760676900381396992
## 7 jtbrantley 1050109395697377281
## 8 jtbrantley 506455937
## 9 jtbrantley 1436425107703013395
## 10 jtbrantley 490072059
## # … with 591 more rows
#vignette("intro", package="rtweet")
To conclude Section 2a, try one of the following search functions
from the rtweet vignette:
get_timelines() Get the most recent 3,200 tweets
from users.
stream_tweets() Randomly sample (approximately 1%)
from the live stream of all tweets.
get_friends() Retrieve a list of all the accounts a
user follows.
get_followers() Retrieve a list of the accounts
following a user.
get_favorites() Get the most recently favorited
statuses by a user.
get_trends() Discover what’s currently trending in a
city.
search_users() Search for 1,000 users with the
specific hashtag in their profile bios.
get_trends("charlotte")
## # A tibble: 49 × 9
## trend url promo…¹ query tweet…² place woeid as_of
## <chr> <chr> <lgl> <chr> <int> <chr> <int> <dttm>
## 1 Terquavion Smith http… NA %22T… NA Char… 2.38e6 2023-01-28 21:50:48
## 2 Wake Forest http… NA %22W… NA Char… 2.38e6 2023-01-28 21:50:48
## 3 Wolfpack http… NA Wolf… NA Char… 2.38e6 2023-01-28 21:50:48
## 4 Casemiro http… NA Case… 63233 Char… 2.38e6 2023-01-28 21:50:48
## 5 Jokic http… NA Jokic 13543 Char… 2.38e6 2023-01-28 21:50:48
## 6 PJ Tucker http… NA %22P… NA Char… 2.38e6 2023-01-28 21:50:48
## 7 #RoyalRumble http… NA %23R… 53291 Char… 2.38e6 2023-01-28 21:50:48
## 8 Oklahoma http… NA Okla… 17140 Char… 2.38e6 2023-01-28 21:50:48
## 9 Niang http… NA Niang NA Char… 2.38e6 2023-01-28 21:50:48
## 10 Binnington http… NA Binn… NA Char… 2.38e6 2023-01-28 21:50:48
## # … with 39 more rows, 1 more variable: created_at <dttm>, and abbreviated
## # variable names ¹promoted_content, ²tweet_volume
ngss_tweets <- read_xlsx("Data/ngss_tweets.xlsx")
ccss_tweets <- read_xlsx("Data/ccss_tweets.xlsx")
ngss_tweets
## # A tibble: 338 × 43
## created_at id id_str text full_…¹ trunc…² entit…³ source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <lgl> <chr>
## 1 2023-01-25 18:06:04 1.62e18 16183092401… "Con… "Const… FALSE NA "<a h…
## 2 2023-01-25 17:47:48 1.62e18 16183046441… "TOD… "TODAY… FALSE NA "<a h…
## 3 2023-01-25 17:43:13 1.62e18 16183034918… "Pro… "Prove… FALSE NA "<a h…
## 4 2023-01-25 17:40:50 1.62e18 16183028910… "#NG… "#NGSS… FALSE NA "<a h…
## 5 2023-01-25 17:39:55 1.62e18 16183026610… "@ng… "@ngss… FALSE NA "<a h…
## 6 2023-01-25 17:01:18 1.62e18 16182929419… "We … "We fe… FALSE NA "<a h…
## 7 2023-01-25 16:58:59 1.62e18 16182923610… "@ng… "@ngss… FALSE NA "<a h…
## 8 2023-01-25 16:57:51 1.62e18 16182920741… "😖… "😖😖 … FALSE NA "<a h…
## 9 2023-01-25 14:28:33 1.62e18 16182545032… "@hy… "@hyuc… FALSE NA "<a h…
## 10 2023-01-25 13:08:14 1.62e18 16182342871… "Ss … "Ss co… FALSE NA "<a h…
## # … with 328 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <lgl>,
## # coordinates <lgl>, place <lgl>, contributors <lgl>, is_quote_status <lgl>,
## # retweet_count <dbl>, favorite_count <dbl>, favorited <lgl>,
## # favorited_by <lgl>, retweeted <lgl>, scopes <lgl>, lang <chr>,
## # possibly_sensitive <lgl>, display_text_width <lgl>, …
ccss_tweets
## # A tibble: 1,428 × 43
## created_at id id_str text full_…¹ trunc…² entit…³ source
## <dttm> <dbl> <chr> <chr> <chr> <lgl> <lgl> <chr>
## 1 2023-01-25 18:02:15 1.62e18 16183082794… "🔢… "🔢🤔 … FALSE NA "<a h…
## 2 2023-01-25 17:42:24 1.62e18 16183032844… "The… "The T… FALSE NA "<a h…
## 3 2023-01-25 17:29:39 1.62e18 16183000757… "@Br… "@Brad… FALSE NA "<a h…
## 4 2023-01-25 16:02:15 1.62e18 16182780826… "Gre… "Great… FALSE NA "<a h…
## 5 2023-01-25 03:21:31 1.62e18 16180866390… "#wo… "#woke… FALSE NA "<a h…
## 6 2023-01-25 01:54:13 1.62e18 16180646671… "#co… "#comm… FALSE NA "<a h…
## 7 2023-01-25 01:21:27 1.62e18 16180564217… "We … "We al… FALSE NA "<a h…
## 8 2023-01-24 17:24:27 1.62e18 16179363804… "@je… "@jere… FALSE NA "<a h…
## 9 2023-01-24 17:11:40 1.62e18 16179331641… "#ch… "#chat… FALSE NA "<a h…
## 10 2023-01-24 17:01:24 1.62e18 16179305800… "@li… "@libs… FALSE NA "<a h…
## # … with 1,418 more rows, 35 more variables: in_reply_to_status_id <dbl>,
## # in_reply_to_status_id_str <chr>, in_reply_to_user_id <dbl>,
## # in_reply_to_user_id_str <chr>, in_reply_to_screen_name <chr>, geo <lgl>,
## # coordinates <lgl>, place <lgl>, contributors <lgl>, is_quote_status <lgl>,
## # retweet_count <dbl>, favorite_count <dbl>, favorited <lgl>,
## # favorited_by <lgl>, retweeted <lgl>, scopes <lgl>, lang <chr>,
## # possibly_sensitive <lgl>, display_text_width <lgl>, …
ngss_text <-
ngss_tweets %>%
filter(lang == "en") %>%
select(in_reply_to_screen_name, created_at, text) %>%
mutate(standards = "ngss") %>%
relocate(standards)
ngss_text
## # A tibble: 236 × 4
## standards in_reply_to_screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 "Construction has comm…
## 2 ngss <NA> 2023-01-25 17:47:48 "TODAY'S @IowaSTEM Sca…
## 3 ngss <NA> 2023-01-25 17:43:13 "Prove to me that you …
## 4 ngss ngss_official 2023-01-25 17:39:55 "@ngss_official Old ha…
## 5 ngss <NA> 2023-01-25 17:01:18 "We feature GWC in our…
## 6 ngss <NA> 2023-01-25 13:08:14 "Ss completed a #Sketc…
## 7 ngss <NA> 2023-01-25 12:55:22 "Small group experimen…
## 8 ngss <NA> 2023-01-25 06:49:01 "It’s been a great day…
## 9 ngss Xeno_lith 2023-01-25 04:09:51 "I am modifying this c…
## 10 ngss <NA> 2023-01-25 03:25:26 "4 weeks, 30+ hours of…
## # … with 226 more rows
WARNING: You will not be able to progress to the next section until you have completed the following task:
ccss_text data frame for our
ccss_tweets Common Core tweets by modifying code
above.ccss_text <-
ccss_tweets %>%
filter(lang == "en") %>%
select(in_reply_to_screen_name, created_at, text) %>%
mutate(standards = "ccss") %>%
relocate(standards)
ccss_text
## # A tibble: 1,344 × 4
## standards in_reply_to_screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 ccss <NA> 2023-01-25 18:02:15 "🔢🤔 If helping your …
## 2 ccss <NA> 2023-01-25 17:42:24 "The Truth About Educa…
## 3 ccss BradWilcoxIFS 2023-01-25 17:29:39 "@BradWilcoxIFS @EWEri…
## 4 ccss <NA> 2023-01-25 16:02:15 "Great Interview: The …
## 5 ccss <NA> 2023-01-25 03:21:31 "#woke #american #白左…
## 6 ccss <NA> 2023-01-25 01:21:27 "We also need to solve…
## 7 ccss jeremyherb 2023-01-24 17:24:27 "@jeremyherb @jamiegan…
## 8 ccss libsoftiktok 2023-01-24 17:01:24 "@libsoftiktok This is…
## 9 ccss <NA> 2023-01-24 12:06:17 "#commoncore #math is …
## 10 ccss <NA> 2023-01-24 03:10:53 "Well @GovRonDeSantis …
## # … with 1,334 more rows
tweets <- bind_rows(ngss_text, ccss_text)
tweets
## # A tibble: 1,580 × 4
## standards in_reply_to_screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 "Construction has comm…
## 2 ngss <NA> 2023-01-25 17:47:48 "TODAY'S @IowaSTEM Sca…
## 3 ngss <NA> 2023-01-25 17:43:13 "Prove to me that you …
## 4 ngss ngss_official 2023-01-25 17:39:55 "@ngss_official Old ha…
## 5 ngss <NA> 2023-01-25 17:01:18 "We feature GWC in our…
## 6 ngss <NA> 2023-01-25 13:08:14 "Ss completed a #Sketc…
## 7 ngss <NA> 2023-01-25 12:55:22 "Small group experimen…
## 8 ngss <NA> 2023-01-25 06:49:01 "It’s been a great day…
## 9 ngss Xeno_lith 2023-01-25 04:09:51 "I am modifying this c…
## 10 ngss <NA> 2023-01-25 03:25:26 "4 weeks, 30+ hours of…
## # … with 1,570 more rows
head(tweets)
## # A tibble: 6 × 4
## standards in_reply_to_screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 "Construction has comme…
## 2 ngss <NA> 2023-01-25 17:47:48 "TODAY'S @IowaSTEM Scal…
## 3 ngss <NA> 2023-01-25 17:43:13 "Prove to me that you t…
## 4 ngss ngss_official 2023-01-25 17:39:55 "@ngss_official Old hab…
## 5 ngss <NA> 2023-01-25 17:01:18 "We feature GWC in our …
## 6 ngss <NA> 2023-01-25 13:08:14 "Ss completed a #Sketch…
tail(tweets)
## # A tibble: 6 × 4
## standards in_reply_to_screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 ccss <NA> 2023-01-15 13:34:09 "“If it cost you your p…
## 2 ccss <NA> 2023-01-15 13:12:29 "AGREED! WE MUST ELIMIN…
## 3 ccss <NA> 2023-01-15 13:09:43 "Robin Hood (Graphic Re…
## 4 ccss <NA> 2023-01-15 13:04:26 "Nobody who went to sch…
## 5 ccss CecCoalition 2023-01-15 12:34:59 "@CecCoalition @DOEChan…
## 6 ccss <NA> 2023-01-15 12:33:30 "L&D: Flip those so…
tweet_tokens <-
tweets %>%
unnest_tokens(output = word,
input = text)
tweet_tokens
## # A tibble: 43,981 × 4
## standards in_reply_to_screen_name created_at word
## <chr> <chr> <dttm> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 construction
## 2 ngss <NA> 2023-01-25 18:06:04 has
## 3 ngss <NA> 2023-01-25 18:06:04 commenced
## 4 ngss <NA> 2023-01-25 18:06:04 4th
## 5 ngss <NA> 2023-01-25 18:06:04 grade
## 6 ngss <NA> 2023-01-25 18:06:04 has
## 7 ngss <NA> 2023-01-25 18:06:04 started
## 8 ngss <NA> 2023-01-25 18:06:04 building
## 9 ngss <NA> 2023-01-25 18:06:04 their
## 10 ngss <NA> 2023-01-25 18:06:04 solutions
## # … with 43,971 more rows
tidy_tweets <-
tweet_tokens %>%
anti_join(stop_words, by = "word")
tidy_tweets
## # A tibble: 23,416 × 4
## standards in_reply_to_screen_name created_at word
## <chr> <chr> <dttm> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 construction
## 2 ngss <NA> 2023-01-25 18:06:04 commenced
## 3 ngss <NA> 2023-01-25 18:06:04 4th
## 4 ngss <NA> 2023-01-25 18:06:04 grade
## 5 ngss <NA> 2023-01-25 18:06:04 started
## 6 ngss <NA> 2023-01-25 18:06:04 building
## 7 ngss <NA> 2023-01-25 18:06:04 solutions
## 8 ngss <NA> 2023-01-25 18:06:04 beach
## 9 ngss <NA> 2023-01-25 18:06:04 erosion
## 10 ngss <NA> 2023-01-25 18:06:04 kssci
## # … with 23,406 more rows
count(tidy_tweets, word, sort = T)
## # A tibble: 7,350 × 2
## word n
## <chr> <int>
## 1 common 1388
## 2 core 1379
## 3 https 550
## 4 t.co 550
## 5 math 535
## 6 education 156
## 7 ngss 151
## 8 school 138
## 9 science 128
## 10 standards 125
## # … with 7,340 more rows
filter(tweets, grepl('https', text))
## # A tibble: 463 × 4
## standards in_reply_to_screen_name created_at text
## <chr> <chr> <dttm> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 "Construction has comm…
## 2 ngss <NA> 2023-01-25 17:47:48 "TODAY'S @IowaSTEM Sca…
## 3 ngss <NA> 2023-01-25 17:43:13 "Prove to me that you …
## 4 ngss <NA> 2023-01-25 17:01:18 "We feature GWC in our…
## 5 ngss <NA> 2023-01-25 13:08:14 "Ss completed a #Sketc…
## 6 ngss <NA> 2023-01-25 12:55:22 "Small group experimen…
## 7 ngss <NA> 2023-01-25 06:49:01 "It’s been a great day…
## 8 ngss <NA> 2023-01-25 03:25:26 "4 weeks, 30+ hours of…
## 9 ngss <NA> 2023-01-25 00:20:51 "Loved tonight’s NGSS …
## 10 ngss doc_nuge 2023-01-24 23:20:12 "@doc_nuge @AtchisonPu…
## # … with 453 more rows
tidy_tweets <-
tweet_tokens %>%
anti_join(stop_words, by = "word") %>%
filter(!word == "https")
We’ve created some unnecessarily lengthy code to demonstrate some of
the steps in the tidying process. Rewrite the tokenization and removal
of stop words processes into a more compact series of commands and save
your data frame as tidy_tweets.
tweet_tokens <- tweets %>%
unnest_tokens(output = word,
input = text)
tidy_tweets <- tweet_tokens %>%
anti_join(stop_words, by = "word") %>%
filter(!word == "https")
tidy_tweets
## # A tibble: 22,866 × 4
## standards in_reply_to_screen_name created_at word
## <chr> <chr> <dttm> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 construction
## 2 ngss <NA> 2023-01-25 18:06:04 commenced
## 3 ngss <NA> 2023-01-25 18:06:04 4th
## 4 ngss <NA> 2023-01-25 18:06:04 grade
## 5 ngss <NA> 2023-01-25 18:06:04 started
## 6 ngss <NA> 2023-01-25 18:06:04 building
## 7 ngss <NA> 2023-01-25 18:06:04 solutions
## 8 ngss <NA> 2023-01-25 18:06:04 beach
## 9 ngss <NA> 2023-01-25 18:06:04 erosion
## 10 ngss <NA> 2023-01-25 18:06:04 kssci
## # … with 22,856 more rows
write_xlsx(tidy_tweets, "tidy_tweets.xlsx")
afinn <- get_sentiments("afinn")
afinn
## # A tibble: 2,477 × 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # … with 2,467 more rows
bing <- get_sentiments("bing")
bing
## # A tibble: 6,786 × 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # … with 6,776 more rows
nrc <- get_sentiments("nrc")
nrc
## # A tibble: 13,872 × 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # … with 13,862 more rows
loughran <- get_sentiments("loughran")
loughran
## # A tibble: 4,150 × 2
## word sentiment
## <chr> <chr>
## 1 abandon negative
## 2 abandoned negative
## 3 abandoning negative
## 4 abandonment negative
## 5 abandonments negative
## 6 abandons negative
## 7 abdicated negative
## 8 abdicates negative
## 9 abdicating negative
## 10 abdication negative
## # … with 4,140 more rows
How were these sentiment lexicons put together and validated? Hint: take a look at Chapter 2 from Text Mining with R.
Why should we be cautious when using and interpreting them?
sentiment_afinn <- inner_join(tidy_tweets, afinn, by = "word")
sentiment_afinn
## # A tibble: 1,766 × 5
## standards in_reply_to_screen_name created_at word value
## <chr> <chr> <dttm> <chr> <dbl>
## 1 ngss <NA> 2023-01-25 18:06:04 solutions 1
## 2 ngss <NA> 2023-01-25 17:47:48 growing 1
## 3 ngss ngss_official 2023-01-25 17:39:55 die -3
## 4 ngss <NA> 2023-01-25 13:08:14 stop -1
## 5 ngss <NA> 2023-01-25 12:55:22 solid 2
## 6 ngss Xeno_lith 2023-01-25 04:09:51 fit 1
## 7 ngss Xeno_lith 2023-01-25 04:09:51 love 3
## 8 ngss <NA> 2023-01-25 03:25:26 trouble -2
## 9 ngss <NA> 2023-01-25 03:25:26 errors -2
## 10 ngss <NA> 2023-01-25 00:20:51 loved 3
## # … with 1,756 more rows
sentiment_bing <- inner_join(tidy_tweets, bing, by = "word")
sentiment_bing
## # A tibble: 1,791 × 5
## standards in_reply_to_screen_name created_at word sentiment
## <chr> <chr> <dttm> <chr> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 erosion negative
## 2 ngss ngss_official 2023-01-25 17:39:55 die negative
## 3 ngss <NA> 2023-01-25 12:55:22 solid positive
## 4 ngss <NA> 2023-01-25 12:55:22 vibrate negative
## 5 ngss Xeno_lith 2023-01-25 04:09:51 love positive
## 6 ngss <NA> 2023-01-25 03:25:26 trouble negative
## 7 ngss <NA> 2023-01-25 03:25:26 errors negative
## 8 ngss <NA> 2023-01-25 03:25:26 ready positive
## 9 ngss <NA> 2023-01-25 00:20:51 loved positive
## 10 ngss 4CarlosCardoso 2023-01-25 00:12:42 respect positive
## # … with 1,781 more rows
sentiment_loughran <- inner_join(tidy_tweets, loughran, by = "word")
sentiment_loughran
## # A tibble: 942 × 5
## standards in_reply_to_screen_name created_at word sentiment
## <chr> <chr> <dttm> <chr> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 erosion negative
## 2 ngss <NA> 2023-01-25 12:55:22 investigated negative
## 3 ngss <NA> 2023-01-25 06:49:01 inquiry negative
## 4 ngss <NA> 2023-01-25 03:25:26 trouble negative
## 5 ngss <NA> 2023-01-25 03:25:26 errors negative
## 6 ngss 4CarlosCardoso 2023-01-25 00:12:42 accomplished positive
## 7 ngss <NA> 2023-01-24 20:51:54 exposed negative
## 8 ngss <NA> 2023-01-24 20:51:54 disclaimers negative
## 9 ngss <NA> 2023-01-24 20:23:00 justice litigious
## 10 ngss <NA> 2023-01-24 19:40:07 challenge negative
## # … with 932 more rows
Create a sentiment_nrc data frame using the code
above.
sentiment_nrc <- inner_join(tidy_tweets, nrc, by = "word")
sentiment_nrc
## # A tibble: 8,954 × 5
## standards in_reply_to_screen_name created_at word sentiment
## <chr> <chr> <dttm> <chr> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 building positive
## 2 ngss <NA> 2023-01-25 18:06:04 beach joy
## 3 ngss <NA> 2023-01-25 18:06:04 erosion negative
## 4 ngss <NA> 2023-01-25 17:47:48 school trust
## 5 ngss <NA> 2023-01-25 17:47:48 system trust
## 6 ngss <NA> 2023-01-25 17:47:48 learning positive
## 7 ngss <NA> 2023-01-25 17:43:13 prove positive
## 8 ngss <NA> 2023-01-25 17:43:13 teach joy
## 9 ngss <NA> 2023-01-25 17:43:13 teach positive
## 10 ngss <NA> 2023-01-25 17:43:13 teach surprise
## # … with 8,944 more rowsWhat do you notice about the change in the number of observations
(i.e. words) between the tidy_tweets and data frames with
sentiment values attached? Why did this happen?
ts_plot(tweets, by = "days", color = "purple")
ts_plot(tweets, by = "hours", color = "blue")
Use ts_plot with the group_by function
to compare the number of tweets over time by Next Gen and Common Core
standards
tweets %>%
group_by(standards) %>%
ts_plot(group = "standards", by = "days")
Which set of standards is Twitter users talking about the most?
summary_bing <- count(sentiment_bing, sentiment, sort = TRUE)
summary_bing
## # A tibble: 2 × 2
## sentiment n
## <chr> <int>
## 1 negative 1051
## 2 positive 740
summary_bing <- sentiment_bing %>%
group_by(standards) %>%
count(sentiment)
summary_bing
## # A tibble: 4 × 3
## # Groups: standards [2]
## standards sentiment n
## <chr> <chr> <int>
## 1 ccss negative 1013
## 2 ccss positive 549
## 3 ngss negative 38
## 4 ngss positive 191
summary_bing <- sentiment_bing %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
spread(sentiment, n)
summary_bing
## # A tibble: 2 × 3
## # Groups: standards [2]
## standards negative positive
## <chr> <int> <int>
## 1 ccss 1013 549
## 2 ngss 38 191
summary_bing <- sentiment_bing %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
spread(sentiment, n) %>%
mutate(sentiment = positive - negative) %>%
mutate(lexicon = "bing") %>%
relocate(lexicon)
summary_bing
## # A tibble: 2 × 5
## # Groups: standards [2]
## lexicon standards negative positive sentiment
## <chr> <chr> <int> <int> <int>
## 1 bing ccss 1013 549 -464
## 2 bing ngss 38 191 153
head(sentiment_afinn)
## # A tibble: 6 × 5
## standards in_reply_to_screen_name created_at word value
## <chr> <chr> <dttm> <chr> <dbl>
## 1 ngss <NA> 2023-01-25 18:06:04 solutions 1
## 2 ngss <NA> 2023-01-25 17:47:48 growing 1
## 3 ngss ngss_official 2023-01-25 17:39:55 die -3
## 4 ngss <NA> 2023-01-25 13:08:14 stop -1
## 5 ngss <NA> 2023-01-25 12:55:22 solid 2
## 6 ngss Xeno_lith 2023-01-25 04:09:51 fit 1
summary_afinn <- sentiment_afinn %>%
group_by(standards) %>%
summarise(sentiment = sum(value)) %>%
mutate(lexicon = "AFINN") %>%
relocate(lexicon)
summary_afinn
## # A tibble: 2 × 3
## lexicon standards sentiment
## <chr> <chr> <dbl>
## 1 AFINN ccss -782
## 2 AFINN ngss 379
For your final task for this walk-though, calculate a single
sentiment score for NGSS and CCSS using the remaining nrc
and loughan lexicons and answer the following
questions.
Hint: The nrc lexicon contains “positive” and “negative”
values just like bing and loughan, but also
includes values like “trust” and “sadness” as shown below. You will need
to use the filter() function to select rows that only
contain “positive” and “negative.”
summary_nrc <- sentiment_nrc %>%
group_by(standards) %>%
count(sentiment) %>%
filter(sentiment == "positive" | sentiment == "negative")
summary_nrc
## # A tibble: 4 × 3
## # Groups: standards [2]
## standards sentiment n
## <chr> <chr> <int>
## 1 ccss negative 1022
## 2 ccss positive 2806
## 3 ngss negative 75
## 4 ngss positive 403
summary_nrc
## # A tibble: 4 × 3
## # Groups: standards [2]
## standards sentiment n
## <chr> <chr> <int>
## 1 ccss negative 1022
## 2 ccss positive 2806
## 3 ngss negative 75
## 4 ngss positive 403
summary_nrc <- sentiment_nrc %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
filter(sentiment == "positive" | sentiment == "negative") %>%
spread(sentiment, n)
summary_nrc
## # A tibble: 2 × 3
## # Groups: standards [2]
## standards negative positive
## <chr> <int> <int>
## 1 ccss 1022 2806
## 2 ngss 75 403
summary_nrc <- sentiment_nrc %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
filter(sentiment == "positive" | sentiment == "negative") %>%
spread(sentiment, n) %>%
mutate(sentiment = positive - negative) %>%
mutate(lexicon = "nrc") %>%
relocate(lexicon)
summary_nrc
## # A tibble: 2 × 5
## # Groups: standards [2]
## lexicon standards negative positive sentiment
## <chr> <chr> <int> <int> <int>
## 1 nrc ccss 1022 2806 1784
## 2 nrc ngss 75 403 328
head(sentiment_loughran)
## # A tibble: 6 × 5
## standards in_reply_to_screen_name created_at word sentiment
## <chr> <chr> <dttm> <chr> <chr>
## 1 ngss <NA> 2023-01-25 18:06:04 erosion negative
## 2 ngss <NA> 2023-01-25 12:55:22 investigated negative
## 3 ngss <NA> 2023-01-25 06:49:01 inquiry negative
## 4 ngss <NA> 2023-01-25 03:25:26 trouble negative
## 5 ngss <NA> 2023-01-25 03:25:26 errors negative
## 6 ngss 4CarlosCardoso 2023-01-25 00:12:42 accomplished positive
summary_loughran <- sentiment_loughran %>%
group_by(standards) %>%
count(sentiment) %>%
filter(sentiment == "positive" | sentiment == "negative")
summary_loughran
## # A tibble: 4 × 3
## # Groups: standards [2]
## standards sentiment n
## <chr> <chr> <int>
## 1 ccss negative 517
## 2 ccss positive 123
## 3 ngss negative 62
## 4 ngss positive 37
summary_loughran <- sentiment_loughran %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
filter(sentiment == "positive" | sentiment == "negative") %>%
spread(sentiment, n)
summary_loughran
## # A tibble: 2 × 3
## # Groups: standards [2]
## standards negative positive
## <chr> <int> <int>
## 1 ccss 517 123
## 2 ngss 62 37
summary_loughran <- sentiment_loughran %>%
group_by(standards) %>%
count(sentiment, sort = TRUE) %>%
filter(sentiment == "positive" | sentiment == "negative") %>%
spread(sentiment, n) %>%
mutate(sentiment = positive - negative) %>%
mutate(lexicon = "loughran") %>%
relocate(lexicon)
summary_loughran
## # A tibble: 2 × 5
## # Groups: standards [2]
## lexicon standards negative positive sentiment
## <chr> <chr> <int> <int> <int>
## 1 loughran ccss 517 123 -394
## 2 loughran ngss 62 37 -25
For the Loughran lexicon, both CCSS and NGSS return an overall negative sentiment, with CCSS leading with a sentiment of -394.
Overall, playing with the different lexicons was very interesting, and it’s interesting to see how specific word analysis vary from lexicon to lexicon. Though there were differences, based on the results, I think it is fair to say that overall there is a more negative sentiment associated with CCSS when compared with NGSS, with a negative sentiment score for 3 out of the 4 lexicons. Thinking about the conversations surround CCSS, and it being a popular talking point both socially and politically, I think this may contribute. This is supported by a higher number of tweets and hash tags surrounding CCSS when compared to NGSS.
NGSS are also an important set of standards, but not as understood or talked about outside of those who use them due to them being related to science, which is a subject that while extremely important, is not traditionally used as much to quantify and identify student achievement as the CCSS does with math and English.