library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
getwd()
## [1] "D:/NSYSU/labproject"
load("D:/NSYSU/labproject/TW_in_News_2020_05_20.rdata")

欄位名稱

ls(D)
##  [1] "Author"                             "Comments Count"                    
##  [3] "External Links"                     "Language"                          
##  [5] "Main Image"                         "Mentioned Locations"               
##  [7] "Mentioned Organizations (negative)" "Mentioned Organizations (neutral)" 
##  [9] "Mentioned Organizations (none)"     "Mentioned Organizations (positive)"
## [11] "Mentioned Persons (negative)"       "Mentioned Persons (neutral)"       
## [13] "Mentioned Persons (none)"           "Mentioned Persons (positive)"      
## [15] "Participants Count"                 "Performance Score"                 
## [17] "Post Link"                          "Post Order in Thread"              
## [19] "Post Publication Date"              "Rating"                            
## [21] "Section Link"                       "Section Title"                     
## [23] "Site Country"                       "Site Name"                         
## [25] "Site Type"                          "Text"                              
## [27] "Thread Link"                        "Thread Publication Date"           
## [29] "Title...19"                         "Title...5"                         
## [31] "ts"

每個語言的篇數

D_lang <- D %>% 
  group_by(Language) %>% 
  count(Language) %>% 
  arrange(desc(n))
head(D_lang,20)
## # A tibble: 20 x 2
## # Groups:   Language [20]
##    Language       n
##    <chr>      <int>
##  1 english    98301
##  2 chineset    5822
##  3 italian     5757
##  4 spanish     5447
##  5 indonesian  3980
##  6 french      2977
##  7 german      2672
##  8 chinese     1924
##  9 portuguese  1877
## 10 japanese    1277
## 11 vietnamese   931
## 12 swedish      730
## 13 dutch        496
## 14 romanian     478
## 15 russian      477
## 16 malay        233
## 17 danish       169
## 18 hungarian    132
## 19 norwegian    129
## 20 polish       108

報導的媒體哪一些

D_media <- D %>% 
  group_by(`Section Title`) %>% 
  count(`Section Title`) %>% 
  arrange(desc(n))
head(D_media,20)
## # A tibble: 20 x 2
## # Groups:   Section Title [20]
##    `Section Title`                                   n
##    <chr>                                         <int>
##  1 <NA>                                          62418
##  2 fitnell.com RSS feed                           1097
##  3 Agenzia Nova                                    982
##  4 digiblogbox.com RSS feed                        819
##  5 Agenzia Nova | Balcani                          749
##  6 Agenzia Nova | Medio Oriente                    513
##  7 Agenzia Nova | Nord Africa                      499
##  8 alltdesign.com RSS feed                         473
##  9 Latest | FOCUS TAIWAN - CNA ENGLISH NEWS        473
## 10 canariblogs.com RSS feed                        466
## 11 Flickr: Explore everyone's photos on a Map      464
## 12 isblog.net RSS feed                             464
## 13 批踢踢實業坊                                    435
## 14 blogzet.com RSS feed                            398
## 15 tblogz.com RSS feed                             365
## 16 Conservapedia  - Recent changes [en]            359
## 17 RTI Radio Taiwan International                  346
## 18 American Banking News                           319
## 19 Eat-Drink-Man-Woman - www.hardwarezone.com.sg   271
## 20 ReleaseWire - Latest Press Releases             254