library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.1 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rtweet)
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
##
## flatten
library(twitteR)
##
## Attaching package: 'twitteR'
## The following object is masked from 'package:rtweet':
##
## lookup_statuses
## The following objects are masked from 'package:dplyr':
##
## id, location
library(udpipe)
library(tidytext)
library(wordcloud)
## Loading required package: RColorBrewer
appname <- "xxxxxxxxxxxx"
key <- "xxxxxxxxxxxxxxxxxxxxxxxxxxx"
secret <- "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
access_token <- "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
access_secret <- "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
twitter_token <- create_token(app = appname,
consumer_key = key,
consumer_secret = secret,
access_token = access_token,
access_secret = access_secret)
covid_twitter %>%
filter(str_detect(word, "chinese")) %>%
group_by(location, word) %>%
count(word) %>%
ggplot(aes(location, n, fill = word)) +
geom_col(show.legend = FALSE) +
facet_wrap(~word, scales = "free_y") +
labs(y = "appear times", x = NULL) +
theme_bw() +
ggtitle("Chinese related tweets") +
coord_flip()
covid_twitter %>%
filter(str_detect(word, "chinese")) %>%
group_by(location, word) %>%
select(twitter_number) %>%
return(twitter_number)-> num
## Adding missing grouping variables: `location`, `word`
covid_twitter_ [c(num$twitter_number), 5] -> chinese_group
chinese_group %>%
filter(str_detect(text, "virus")) %>%
tidy_text() -> chinese_group
txt_freq(chinese_group$word) -> chinese_group
chinese_group %>%
group_by(freq) %>%
slice_max(order_by = key, n=10) %>%
arrange(desc(freq)) %>%
mutate(key = reorder(key, freq)) %>%
ggplot(aes(x = key, y = freq)) +
geom_col() +
theme_light()+
coord_flip() +
xlab("Key words") +
ylab("Appear times")
covid_twitter %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(word, sentiment, sort = TRUE) %>%
ungroup() -> covid_sent
covid_sent %>%
group_by(sentiment) %>%
slice_max(order_by = n, n=10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment", x = NULL) +
theme_bw() +
ggtitle("Covid sentiment analysis") +
coord_flip()
racist_word <- search_tweets("chinese virus OR Chinese Virus OR chinesevirus", n = 18000, geocode = lookup_coords("usa"), include_rts = FALSE, lang = "en", retryonratelimit = TRUE)
## retry on rate limit...
## waiting about 13 minutes...
racist_word2 <- search_tweets("chink OR CHINK OR Chink", n = 18000, geocode = lookup_coords("usa"), include_rts = FALSE, lang = "en", retryonratelimit = TRUE)
racist_word3 <- search_tweets("ching chong OR Ching Chong OR CHING CHONG", n = 18000, geocode = lookup_coords("usa"), include_rts = FALSE, lang = "en", retryonratelimit = TRUE)
racist_word4 <- search_tweets("chinaman OR Chinaman", n = 18000, geocode = lookup_coords("usa"), include_rts = FALSE, lang = "en", retryonratelimit = TRUE)
racist_word5 <- search_tweets("gook OR GOOK OR Gook", n = 18000, geocode = lookup_coords("usa"), include_rts = FALSE, lang = "en", retryonratelimit = TRUE)
tidy_text2 <- function(text){
text %>%
mutate(twitter_number = row_number()) %>%
unnest_tokens(word, text) %>%
mutate(word = str_extract(word, "[a-z']+")) %>%
filter(!is.na(word)) -> tidy_text2
return(tidy_text2)
}
racist_word %>%
tidy_text2() %>%
select(location, twitter_number, word, location) -> racist_word
racist_word %>%
mutate(Group = "Chinese Virus") %>%
select(word, Group, twitter_number, location) -> racist_word
racist_word2 %>%
tidy_text2() %>%
select(twitter_number, word, location) -> racist_word2
racist_word2 %>%
mutate(Group = "chink") %>%
select(word, Group, twitter_number, location) -> racist_word2
racist_word3 %>%
tidy_text2() %>%
select(twitter_number, word, location) -> racist_word3
racist_word3 %>%
mutate(Group = "ching chong") %>%
select(word, Group, twitter_number, location) -> racist_word3
racist_word4 %>%
tidy_text2() %>%
select(twitter_number, word, location) -> racist_word4
racist_word4 %>%
mutate(Group = "chinaman") %>%
select(word, Group, twitter_number, location) -> racist_word4
racist_word5 %>%
tidy_text2() %>%
select(twitter_number, word, location) -> racist_word5
racist_word5 %>%
mutate(Group = "gook") %>%
select(word, Group, twitter_number, location) -> racist_word5
racist_word %>%
group_by(Group) %>%
mutate(linenumber = row_number()) %>%
select(word, Group, linenumber, location) -> racist_word
racist_word %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(Group, index = linenumber %/% 100, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = list(n=0)) %>%
mutate(net = positive - negative) -> racist_word_sentiment
racist_word_sentiment %>%
ggplot(aes(index, net, fill = Group)) +
geom_col(show.legend = FALSE) +
ggtitle("Chinese Virus Sentiment Analysis") +
theme_bw()
racist_word2 %>%
group_by(Group) %>%
mutate(linenumber = row_number()) %>%
select(word, Group, linenumber, location) -> racist_word2
racist_word2 %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(Group, index = linenumber %/% 40, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = list(n=0)) %>%
mutate(net = positive - negative) -> racist_word_sentiment2
racist_word_sentiment2 %>%
ggplot(aes(index, net, fill = Group)) +
geom_col(show.legend = FALSE) +
ggtitle("Chink Sentiment Analysis") +
theme_bw()
### Group 3
racist_word3 %>%
group_by(Group) %>%
mutate(linenumber = row_number()) %>%
select(word, Group, linenumber, location) -> racist_word3
racist_word3 %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(Group, index = linenumber %/% 10, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = list(n=0)) %>%
mutate(net = positive - negative) -> racist_word_sentiment3
racist_word_sentiment3 %>%
ggplot(aes(index, net, fill = Group)) +
geom_col(show.legend = FALSE) +
ggtitle("Ching Sentiment Analysis") +
theme_bw()
### Group 4
racist_word4 %>%
group_by(Group) %>%
mutate(linenumber = row_number()) %>%
select(word, Group, linenumber, location) -> racist_word4
racist_word4 %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(Group, index = linenumber %/% 10, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = list(n=0)) %>%
mutate(net = positive - negative) -> racist_word_sentiment4
racist_word_sentiment4 %>%
ggplot(aes(index, net, fill = Group)) +
geom_col(show.legend = FALSE) +
ggtitle("Chinaman Sentiment Analysis") +
theme_bw()
### Group 5
racist_word5 %>%
group_by(Group) %>%
mutate(linenumber = row_number()) %>%
select(word, Group, linenumber, location) -> racist_word5
racist_word5 %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(Group, index = linenumber %/% 10, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = list(n=0)) %>%
mutate(net = positive - negative) -> racist_word_sentiment5
racist_word_sentiment5 %>%
ggplot(aes(index, net, fill = Group)) +
geom_col(show.legend = FALSE) +
ggtitle("Gook Sentiment Analysis") +
theme_bw()
### Combine all the data sets to compare
hateword <- do.call("rbind", list(racist_word, racist_word2, racist_word3, racist_word4, racist_word5))
hateword %>%
group_by(Group) %>%
mutate(linenumber = row_number()) %>%
select(word, Group, linenumber) -> hateword
hateword %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(Group, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = list(n=0)) %>%
mutate(net = positive - negative) -> hateword_sentiment
hateword_sentiment %>%
ggplot(aes(index, net, fill = Group)) +
geom_col(show.legend = FALSE) +
facet_wrap(~Group, ncol = 2, scales = "free_y") +
theme_bw()
get_sentiments("nrc") %>%
filter(sentiment == "anger" | sentiment == "fear" | sentiment == "disgust" |
sentiment == "trust" | sentiment == "sadness") ->
nrcanger
racist_word %>%
inner_join(nrcanger, by = "word") -> nrc_analysis
nrc_analysis %>%
count(word, sentiment, sort = TRUE) %>%
ungroup() -> nrc_analysis
nrc_analysis %>%
group_by(sentiment) %>%
slice_max(order_by = n, n = 10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment", x = NULL) +
theme_bw() +
coord_flip()
racist_word %>%
inner_join(get_sentiments("bing"), by = "word") %>%
count(word, sentiment, sort = TRUE) %>%
ungroup() ->
bing_word_counts
bing_word_counts <- bing_word_counts[-1,]
bing_word_counts %>%
group_by(sentiment) %>%
slice_max(order_by = n, n=10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment", x = NULL) +
theme_bw() +
ggtitle("Hatewords sentiment analysis", subtitle = "Group of Chinese virus") +
coord_flip()
mapcompare <- do.call("rbind", list(racist_word, racist_word2, racist_word3, racist_word4, racist_word5))
mapcompare %>%
inner_join(get_sentiments("bing"), by = "word") %>%
filter(sentiment == "negative") %>%
group_by(location) %>%
count(location) %>%
rename(state = location) %>%
filter(str_detect(state, "CA"))
## # A tibble: 24 x 2
## # Groups: state [24]
## state n
## <chr> <int>
## 1 Anaheim, CA 1
## 2 Bay Area, CA 1
## 3 Berkeley, CA 5
## 4 Campbell, CA 1
## 5 CANADA 1
## 6 Chino Hills, CA 1
## 7 Hidden Hills, CA 3
## 8 Lake Forest, CA 8
## 9 Los Angeles, CA 20
## 10 Oakland, CA 4
## # ... with 14 more rows
readxl::read_excel("Table_13_Hate_Crime_Incidents_per_Bias_Motivation_and_Quarter_by_State_Federal_and_Agency_2020.xlsx") -> fbi
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
fbi %>%
filter(...2 == "Total") %>%
select(`Table 13`, ...5) %>%
rename(state = "Table 13") %>%
rename(n = "...5") %>%
group_by(state) %>%
mutate(n = as.numeric(n)) %>%
arrange(desc(n)) -> hate_crime_data
library(usmap)
plot_usmap(data = hate_crime_data, values = "n", color = "blue") +
scale_fill_continuous(low = "white", high = "red",
name = "cases", label = scales::comma) +
labs(title = "US States",
subtitle = paste0("Total hate crime by State as of ")) +
theme(panel.background = element_rect(color = "black", fill = "white")) +
theme(legend.position = "top")
AS we can see the state of CA has the highest amount of hate crimes.