基本介紹

議題概述:自Covid-19疫情爆發以來，美國各地便頻頻傳出針對亞洲人的歧視事件，近日亞特蘭大地區連續發生數起槍擊案，死亡的8人中有6人為亞裔，本案引起了新一波的騷動，就連拜登也在推特呼籲停止仇恨行為。
分析目的:我們想藉由期中之前所學到的文字分析技巧來一窺民眾對於此議題的看法、情緒，以及整個議題的一些分析。
資料集來源:Twitter，4/24~4/28，9742筆，English
關鍵字 : asian hate

library(wordcloud2)
library(ggplot2)
library(scales)
library(rtweet)
library(dplyr)
library(xml2)
library(httr)
library(jsonlite)
library(magrittr)
library(data.tree)
library(tidytext)
library(stringr)
library(DiagrammeR)
library(magrittr)
library(lubridate)
library(janeaustenr)
library(tidyr)
require(jiebaR)
require(widyr)
require(NLP)
require(ggraph)
require(igraph)

1. 匯入資料

### 載入已抓取資料
load(file = "C:/Users/ASUS/Desktop/group2_project/ah.RData")

df共有90個欄位，但我們在這裡僅會使用幾個欄位:

user_id: 用戶id
status_id : 推文id
created_at : 發文時間
text : 推文內容
source : 發文來源

資料的資料筆數以及時間分布

created_at已經是一個date類型的欄位，因此可以直接用min,max來看最遠或最近的日期
註:rtweet最多只能抓到距今10天的資料

nrow(df)

## [1] 9742

min(df$created_at)

## [1] "2021-04-24 04:22:43 UTC"

max(df$created_at)

## [1] "2021-04-28 16:26:35 UTC"

觀察推文日期與推文數量的關係

data <- df %>% 
  dplyr::select(created_at, status_id,text) %>% 
  distinct()

data$date = as.Date(data$created_at)
article_count_by_date <- data %>% 
  group_by(date) %>% 
  summarise(count = n())

head(article_count_by_date, 20)

## # A tibble: 5 x 2
##   date       count
##   <date>     <int>
## 1 2021-04-24  2078
## 2 2021-04-25  2198
## 3 2021-04-26  2533
## 4 2021-04-27  1878
## 5 2021-04-28  1055

斷詞以及計算詞頻

tidy_data <- data  %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words)

## Joining, by = "word"

tidy_data

## # A tibble: 117,519 x 4
##    created_at          status_id           date       word     
##    <dttm>              <chr>               <date>     <chr>    
##  1 2021-04-28 16:26:35 1387443134104182786 2021-04-28 talk     
##  2 2021-04-28 16:26:35 1387443134104182786 2021-04-28 hate     
##  3 2021-04-28 16:26:35 1387443134104182786 2021-04-28 crimes   
##  4 2021-04-28 16:26:35 1387443134104182786 2021-04-28 asians   
##  5 2021-04-28 16:26:35 1387443134104182786 2021-04-28 lot      
##  6 2021-04-28 16:26:35 1387443134104182786 2021-04-28 tbd      
##  7 2021-04-28 16:26:35 1387443134104182786 2021-04-28 asian    
##  8 2021-04-28 16:26:35 1387443134104182786 2021-04-28 people   
##  9 2021-04-28 16:26:35 1387443134104182786 2021-04-28 excluding
## 10 2021-04-28 16:26:35 1387443134104182786 2021-04-28 hate     
## # ... with 117,509 more rows

tidy_data %>%
  count(word, sort = TRUE) %>%
  filter(n > 600) %>%
  top_n(10, n) %>% 
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(n, word)) +
  geom_col() +
  labs(y = NULL)

文字雲(把asian hate過濾掉)

y <- c("asian", "hate")

tidy_data %>% count(word, sort = TRUE) %>%filter(!word %in% c('asian','hate')) %>%  filter(n > 50) %>% wordcloud2()

#除去entity為Other，有多少種word有被標註entity
length(unique(tokens$word[tokens$ner != "O"]))

## [1] 3598

#轉小寫
tokens$lower_word = tolower(tokens$word)
tokens$lower_lemma = tolower(tokens$lemma)

2.探索分析 - NER

涉及到的國家(COUNTRY)

美國佔大宗，因許多asian hate crime發生在美國
china -> 之前有位亞洲老婦人被攻擊，而那位婦人是中國人。除此之外，也有人稱covid-19為China virus。
india -> 因獲取資料當時印度疫情非常嚴重，大家在討論時順帶提到印度的疫情。

tokens %>%
  filter(ner == "COUNTRY") %>%  #篩選NER為COUNTRY
  group_by(lower_word) %>% #根據word分組
  summarize(count = n()) %>% #計算每組
  top_n(n = 13, count) %>%
  ungroup() %>% 
  mutate(word = reorder(lower_word, count)) %>%
  ggplot(aes(word, count)) + 
  geom_col()+
  ggtitle("Word Frequency (NER is COUNTRY)") +
  theme(text=element_text(size=14))+
  coord_flip()

涉及到的組織(ORGANIZATION)

senate (參議院) : 因美國參議院剛通過anti-Asian hate crime bill
blm : 亞裔抗議約三個月便立了法案，黑人在BLM活動之後未有立任何法案，因此被拿來比較。
GOP(共和黨)

tokens %>%
  filter(ner == "ORGANIZATION") %>%  #篩選NER為ORGANIZATION
  group_by(lower_word) %>% #根據word分組
  summarize(count = n()) %>% #計算每組
  top_n(n = 10, count) %>%
  ungroup() %>% 
  mutate(word = reorder(lower_word, count)) %>%
  ggplot(aes(word, count)) + 
  geom_col()+
  ggtitle("Word Frequency (NER is ORGANIZATION)") +
  theme(text=element_text(size=14))+
  coord_flip()

涉及到的人物(PERSON)

Yan limeng : 香港大學公共衛生學院傳染病研究中心的病毒和免疫學博士後研究員。因2020年4月底前往美國後指控中華人民共和國政府隱瞞2019冠狀病毒病疫情而受到關注。
Josh Hawley : 美國參議員，在反亞裔歧視法案中唯一投反對票的人。
bannon : 常和Yan limeng一起出現在hashtag的人。
Guo wengui : 在美國的中國異議份子。

tokens %>%
  filter(ner == "PERSON") %>%  #篩選NER為PERSON
  group_by(lower_word) %>% #根據word分組
  summarize(count = n()) %>% #計算每組
  top_n(n = 10, count) %>%
  ungroup() %>% 
  mutate(word = reorder(lower_word, count)) %>%
  ggplot(aes(word, count)) + 
  geom_col()+
  ggtitle("Word Frequency (NER is PERSON)") +
  theme(text=element_text(size=14))+
  coord_flip()

3.探索分析 - Dependency

語句依存關係結果

dependencies = coreNLP_dependency_parser(obj)

視覺化 Dependency tree

parse_tree <- obj[[113]]$doc[[1]][[1]]$parse
tree <- parse2tree(parse_tree)
SetNodeStyle(tree, style = "filled,rounded", shape = "box")

4.探索分析 - Sentiment

語句情緒值

情緒分數從最低分0~最高分4
+ 0,1 : very negative,negative
+ 2 : neutral
+ 3,4 : very positive,postive

sentiment = coreNLP_sentiment_parser(obj)
head(sentiment,20)

資料集中的情緒種類

#了解情緒文章的分佈
sentiment$sentimentValue = sentiment$sentimentValue %>% as.numeric
sentiment$sentiment %>% table()

## .
##     Negative      Neutral     Positive Verynegative Verypositive 
##         6487         1882          806          498            6

平均情緒分數時間趨勢

df$date = as.Date(df$created_at)

sentiment %>% 
  merge(df[,c("status_id","source","date")]) %>%
  group_by(date) %>% 
  summarise(avg_sentiment = mean(sentimentValue,na.rm=T)) %>% 
  ggplot(aes(x=date,y=avg_sentiment)) + 
  geom_line()

不同用戶端情緒時間趨勢

sentiment %>% 
  merge(df[,c("status_id","source","date")]) %>%
  filter(source %in% c("Twitter Web Client","Twitter for iPhone","Twitter for Android")) %>% 
  group_by(date,source) %>% 
  summarise(avg_sentiment = mean(sentimentValue,na.rm=T)) %>% 
  ggplot(aes(x=date,y=avg_sentiment,color=source)) + 
  geom_line()

## `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.

### 正面文章的詞彙使用

sentiment %>% 
  merge(tokens) %>% 
  anti_join(stop_words) %>% 
  filter(!lower_word %in% c('i','the')) %>% 
  filter(sentiment == "Verypositive" | sentiment =='Positive') %>%
  group_by(lower_lemma) %>% #根據lemma分組
  summarize(count = n()) %>% 
  filter(count >5 & count<400)

負面文章的詞彙使用

sentiment %>% 
  merge(tokens) %>% 
  anti_join(stop_words) %>% 
  filter(!lower_word %in% c('i','the')) %>% 
  filter(sentiment == "Verynegative" | sentiment =='Negative') %>%
  group_by(lower_lemma) %>% 
  summarize(count = n()) %>% 
  filter(count >10 &count<400)

5.TF-IDF

計算 tf-idf 值

ah_words <- tokens %>%
  count(status_id, lower_word, sort = TRUE)

total_words <- ah_words %>% 
  group_by(status_id) %>% 
  summarize(total = sum(n))

ah_words <- left_join(ah_words, total_words)

## Joining, by = "status_id"

ah_words_tf_idf <- ah_words %>%
  bind_tf_idf(lower_word, status_id, n)

head(ah_words_tf_idf,20)

##              status_id lower_word  n total        tf       idf     tf_idf
## 1  1387389984202711042      asian 14    56 0.2500000 0.3035458 0.07588644
## 2  1387389984202711042       hate 14    56 0.2500000 0.1811857 0.04529643
## 3  1387389984202711042          i 14    56 0.2500000 1.6866263 0.42165657
## 4  1387389984202711042      women 14    56 0.2500000 4.3985904 1.09964759
## 5  1386690850051133449        not  9    44 0.2045455 2.1837809 0.44668246
## 6  1386923628374200323      black  9    45 0.2000000 1.9228291 0.38456581
## 7  1386108746976354308        and  8    45 0.1777778 1.0435396 0.18551815
## 8  1387151033554685953        the  8    50 0.1600000 0.6887144 0.11019431
## 9  1387275893463080961          i  8    53 0.1509434 1.6866263 0.25458510
## 10 1385999224249090054        you  7    52 0.1346154 1.9529605 0.26289852
## 11 1386140608092418059        you  7    46 0.1521739 1.9529605 0.29718964
## 12 1386289314548486149        the  7    48 0.1458333 0.6887144 0.10043752
## 13 1386418725885136897        the  7    43 0.1627907 0.6887144 0.11211630
## 14 1386449748211552258         we  7    52 0.1346154 2.4196194 0.32571799
## 15 1386461474676264961        the  7    51 0.1372549 0.6887144 0.09452943
## 16 1386523862343593986          a  7    44 0.1590909 1.1857833 0.18864735
## 17 1386703648391041028        the  7    44 0.1590909 0.6887144 0.10956820
## 18 1386716167188975616        the  7    52 0.1346154 0.6887144 0.09271156
## 19 1386724743265591297         on  7    50 0.1400000 1.7906236 0.25068731
## 20 1386740367442300928        the  7    55 0.1272727 0.6887144 0.08765456

+Stop asian hate,SHUT UP YOUR MOUTH, DON’T MAKE FAKE NEWS ABOUT COVID-19

每篇推文前五大tf-idf

ah_words_tf_idf %>% 
  group_by(status_id) %>%
  slice_max(tf_idf, n=5)

## # A tibble: 48,438 x 7
## # Groups:   status_id [9,679]
##    status_id           lower_word        n total     tf   idf tf_idf
##    <chr>               <chr>         <int> <int>  <dbl> <dbl>  <dbl>
##  1 1385811415051636737 counteract        1    48 0.0208  9.18  0.191
##  2 1385811415051636737 gained            1    48 0.0208  9.18  0.191
##  3 1385811415051636737 popularity.it     1    48 0.0208  9.18  0.191
##  4 1385811415051636737 floyds            1    48 0.0208  8.48  0.177
##  5 1385811415051636737 matter            2    48 0.0417  4.17  0.174
##  6 1385811416863629314 muchness!         1    15 0.0667  9.18  0.612
##  7 1385811416863629314 punish            1    15 0.0667  7.57  0.505
##  8 1385811416863629314 whiteness         1    15 0.0667  7.39  0.492
##  9 1385811416863629314 yup               1    15 0.0667  7.10  0.473
## 10 1385811416863629314 who's             1    15 0.0667  6.69  0.446
## # ... with 48,428 more rows

整個文件中最高tf-idf

ah_words_tf_idf %>% 
  group_by(status_id) %>%
  slice_max(tf_idf, n=10) %>%
  ungroup() %>%
  count(lower_word, sort=TRUE)

## # A tibble: 19,199 x 2
##    lower_word     n
##    <chr>      <int>
##  1 asian       1446
##  2 hate        1411
##  3 stop         573
##  4 antiasian    400
##  5 news         365
##  6 crime        351
##  7 bill         324
##  8 fake         321
##  9 the          309
## 10 up           309
## # ... with 19,189 more rows

6.n-gram

# load stop words
stop_words <- scan(file = "./dict/stop_words.txt", what=character(),sep='\n', 
                   encoding='utf-8',fileEncoding='utf-8')

Bigram

jieba_tokenizer = worker()

jieba_bigram <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      bigram<- ngrams(tokens, 2)
      bigram <- lapply(bigram, paste, collapse = " ")
      unlist(bigram)
    }
  })
}

df_gram <- subset(df,select = c(status_id,text,source))

bigram <- ngrams(tokens, 2)
df_bigram <- df_gram %>%
  unnest_tokens(bigram, text, token = jieba_bigram)

df_bigram %>%
  filter(!str_detect(bigram, regex("[0-9]"))) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!(word1 %in% stop_words), !(word2 %in% stop_words)) %>%
  count(word1, word2, sort = TRUE) %>%
  unite_("bigram", c("word1","word2"), sep=" ")

## # A tibble: 88,351 x 2
##    bigram              n
##    <chr>           <int>
##  1 asian hate       4580
##  2 hate crimes      1893
##  3 hate crime       1394
##  4 antiasian hate   1283
##  5 stop asian       1161
##  6 in the            632
##  7 the asian         591
##  8 asian americans   534
##  9 of the            520
## 10 don t             509
## # ... with 88,341 more rows

head(df_bigram,20)

## # A tibble: 20 x 3
##    status_id           source             bigram        
##    <chr>               <chr>              <chr>         
##  1 1387443134104182786 Twitter for iPhone we talk       
##  2 1387443134104182786 Twitter for iPhone talk about    
##  3 1387443134104182786 Twitter for iPhone about hate    
##  4 1387443134104182786 Twitter for iPhone hate crimes   
##  5 1387443134104182786 Twitter for iPhone crimes against
##  6 1387443134104182786 Twitter for iPhone against asians
##  7 1387443134104182786 Twitter for iPhone asians a      
##  8 1387443134104182786 Twitter for iPhone a lot         
##  9 1387443134104182786 Twitter for iPhone lot and       
## 10 1387443134104182786 Twitter for iPhone and tbd       
## 11 1387443134104182786 Twitter for iPhone tbd if        
## 12 1387443134104182786 Twitter for iPhone if any        
## 13 1387443134104182786 Twitter for iPhone any of        
## 14 1387443134104182786 Twitter for iPhone of these      
## 15 1387443134104182786 Twitter for iPhone these end     
## 16 1387443134104182786 Twitter for iPhone end up        
## 17 1387443134104182786 Twitter for iPhone up being      
## 18 1387443134104182786 Twitter for iPhone being that    
## 19 1387443134104182786 Twitter for iPhone that but      
## 20 1387443134104182786 Twitter for iPhone but asian

Trigram

jieba_trigram <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      ngram<- ngrams(unlist(tokens), 3)
      ngram <- lapply(ngram, paste, collapse = " ")
      unlist(ngram)
    }
  })
}

df_trigram <- df_gram %>%
  unnest_tokens(ngrams, text, token = jieba_trigram)

df_trigram %>%
  filter(!str_detect(ngrams, regex("[0-9]"))) %>%
  count(ngrams, sort = TRUE)

## # A tibble: 149,906 x 2
##    ngrams                    n
##    <chr>                 <int>
##  1 stop asian hate        1116
##  2 asian hate crimes       486
##  3 hate crime bill         474
##  4 antiasian hate crimes   473
##  5 asian hate crime        461
##  6 anti asian hate         434
##  7 make fake news          319
##  8 t make fake             319
##  9 fake news about         316
## 10 the asian hate          297
## # ... with 149,896 more rows

df_trigram %>%
  filter(!str_detect(ngrams, regex("[0-9]"))) %>%
  separate(ngrams, c("word1", "word2", "word3"), sep = " ") %>% 
  filter(!(word1 %in% stop_words), !(word2 %in% stop_words), !(word3 %in% stop_words)) %>%
  count(word1, word2, word3, sort = TRUE) %>%
  unite_("ngrams", c("word1", "word2", "word3"), sep=" ")

## # A tibble: 149,906 x 2
##    ngrams                    n
##    <chr>                 <int>
##  1 stop asian hate        1116
##  2 asian hate crimes       486
##  3 hate crime bill         474
##  4 antiasian hate crimes   473
##  5 asian hate crime        461
##  6 anti asian hate         434
##  7 make fake news          319
##  8 t make fake             319
##  9 fake news about         316
## 10 the asian hate          297
## # ... with 149,896 more rows

head(df_trigram,20)

## # A tibble: 20 x 3
##    status_id           source             ngrams               
##    <chr>               <chr>              <chr>                
##  1 1387443134104182786 Twitter for iPhone we talk about        
##  2 1387443134104182786 Twitter for iPhone talk about hate      
##  3 1387443134104182786 Twitter for iPhone about hate crimes    
##  4 1387443134104182786 Twitter for iPhone hate crimes against  
##  5 1387443134104182786 Twitter for iPhone crimes against asians
##  6 1387443134104182786 Twitter for iPhone against asians a     
##  7 1387443134104182786 Twitter for iPhone asians a lot         
##  8 1387443134104182786 Twitter for iPhone a lot and            
##  9 1387443134104182786 Twitter for iPhone lot and tbd          
## 10 1387443134104182786 Twitter for iPhone and tbd if           
## 11 1387443134104182786 Twitter for iPhone tbd if any           
## 12 1387443134104182786 Twitter for iPhone if any of            
## 13 1387443134104182786 Twitter for iPhone any of these         
## 14 1387443134104182786 Twitter for iPhone of these end         
## 15 1387443134104182786 Twitter for iPhone these end up         
## 16 1387443134104182786 Twitter for iPhone end up being         
## 17 1387443134104182786 Twitter for iPhone up being that        
## 18 1387443134104182786 Twitter for iPhone being that but       
## 19 1387443134104182786 Twitter for iPhone that but asian       
## 20 1387443134104182786 Twitter for iPhone but asian people

使用自建辭典進行斷詞與計算

# load mask_lexicon
asian_lexicon <- scan(file = "./dict/asian_lexicon.txt", what=character(),sep='\n', 
                   encoding='utf-8',fileEncoding='utf-8',quiet = T)
# 自建亞裔相關字典
asian_lexicon

## [1] "asian hate"      "hate crime"      "hate crimes"     "antiasian hate" 
## [5] "black people"    "hate crime bill" "fake news"

jieba_tokenizer = worker()

# 使用亞裔相關字典重新斷詞
new_user_word(jieba_tokenizer, c(asian_lexicon))

## [1] TRUE

chi_tokenizer <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      tokens <- tokens[!tokens %in% stop_words]
      # 去掉字串長度爲1的詞彙
      tokens <- tokens[nchar(tokens)>1]
      return(tokens)
    }
  })
}

# 剛才的斷詞結果沒有使用新增的辭典，因此我們重新進行斷詞，再計算各詞彙在各文章中出現的次數
df_words <- ah_sentences %>%
  unnest_tokens(word, text, token=chi_tokenizer) %>%
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(status_id, word, sort = TRUE)

head(df_words,20)

##              status_id  word  n
## 1  1387389984202711042 asian 14
## 2  1387389984202711042  hate 14
## 3  1387389984202711042 women 14
## 4  1386690850051133449   not 11
## 5  1386923628374200323   men 10
## 6  1386923628374200323 black  9
## 7  1386108746976354308   and  8
## 8  1387091619023712259   the  8
## 9  1387098777895378944   the  8
## 10 1387151033554685953   the  8
## 11 1387287630291873793   men  8
## 12 1385999224249090054   you  7
## 13 1386140608092418059   you  7
## 14 1386289314548486149   the  7
## 15 1386418725885136897   the  7
## 16 1386449748211552258    we  7
## 17 1386461474676264961   the  7
## 18 1386703648391041028   the  7
## 19 1386716167188975616   the  7
## 20 1386724743265591297    on  7

計算兩個詞彙同時出現的總次數

# 過濾掉兩個關鍵字"asian", "hate"
df_pairs <- df_words %>%
  pairwise_count(word, status_id, sort = TRUE)%>% 
  filter(!item1 %in% c("asian", "hate") & !item2 %in% c("asian", "hate"))

head(df_pairs,20)

計算兩個詞彙間的相關性

word_cors <- df_words %>%
  group_by(word) %>%
  filter(n() >= 30) %>%
  pairwise_cor(word, status_id, sort = TRUE)

head(word_cors,20)

## # A tibble: 20 x 3
##    item1       item2       correlation
##    <chr>       <chr>             <dbl>
##  1 hellbent    forcasting        1    
##  2 weaponizing forcasting        1    
##  3 forcasting  hellbent          1    
##  4 weaponizing hellbent          1    
##  5 forcasting  weaponizing       1    
##  6 hellbent    weaponizing       1    
##  7 hogans      maryland          0.978
##  8 hogans      doubled           0.978
##  9 maryland    hogans            0.978
## 10 doubled     hogans            0.978
## 11 sinners     humanity          0.977
## 12 humanity    sinners           0.977
## 13 sparks      headkicking       0.972
## 14 headkicking sparks            0.972
## 15 sold        profit            0.970
## 16 profit      sold              0.970
## 17 retweet     forcasting        0.964
## 18 retweet     hellbent          0.964
## 19 forcasting  retweet           0.964
## 20 hellbent    retweet           0.964

7.共現圖

找出與 “asian” “hate” 相關性最高的 15 個詞彙

word_cors %>%
  filter(item1 %in% c("asian", "hate")) %>%
  group_by(item1) %>%
  top_n(15) %>%
  ungroup() %>%
  mutate(item2 = reorder(item2, correlation)) %>%
  ggplot(aes(item2, correlation)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ item1, scales = "free") +
  coord_flip()+ 
  theme(text = element_text(family = "Heiti TC Light"))

## Selecting by correlation

使用詞彙關係圖畫出相關性大於0.9的組合

set.seed(2020)

word_cors %>%
  filter(correlation > 0.9) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light")

yao pan : 最近一件亞裔被攻擊事件的受害者
yan limeng : 香港一位公衛專家，因最近接受採訪而被多次提及
tyler perry : 在奧斯卡演講中發表反對歧視的言論
humanity sinners：人性與罪人之間的討論，希望美國人不再針對亞裔有犯罪的行為
maryland hogans：馬里蘭州的州長(Larry Hogan) 發表關於家人遭種族歧視(亞裔)的言論

結論

涉及到的組織以美國佔大宗，中國為其次，歐洲比較常被提到的國家為英國，因asian hate事件主要發生在美國，因此分析符合預期。
涉及到的組織以美國政治的組織為主，可知大家對反亞裔歧視的法案很重視。
涉及到的人物中，出現許多不熟悉的人名，依人名回去推特搜尋推文，發現有提到 Yan Limeng 、guo wengui和 bannon的推文內容全部都一樣，疑似有人在大量發內容同樣的推文引導風向。
Asian hate的起因是種族歧視，所以我們可以看到此議題被拿去和BLM作比較，同時也提升了種族歧視的討論頻率。而透過此次分析可以得知社群的風向是偏向停止亞裔歧視的方向。

第二組_亞裔歧視探討

B064020036 金妍彣 M094020017汪駿華 M094020029 劉沛萱

2021/5/4