library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
getwd()
## [1] "D:/NSYSU/labproject"
load("D:/NSYSU/labproject/TW_in_News_2020_05_20.rdata")
欄位名稱
ls(D)
## [1] "Author" "Comments Count"
## [3] "External Links" "Language"
## [5] "Main Image" "Mentioned Locations"
## [7] "Mentioned Organizations (negative)" "Mentioned Organizations (neutral)"
## [9] "Mentioned Organizations (none)" "Mentioned Organizations (positive)"
## [11] "Mentioned Persons (negative)" "Mentioned Persons (neutral)"
## [13] "Mentioned Persons (none)" "Mentioned Persons (positive)"
## [15] "Participants Count" "Performance Score"
## [17] "Post Link" "Post Order in Thread"
## [19] "Post Publication Date" "Rating"
## [21] "Section Link" "Section Title"
## [23] "Site Country" "Site Name"
## [25] "Site Type" "Text"
## [27] "Thread Link" "Thread Publication Date"
## [29] "Title...19" "Title...5"
## [31] "ts"
每個語言的篇數
D_lang <- D %>%
group_by(Language) %>%
count(Language) %>%
arrange(desc(n))
head(D_lang,20)
## # A tibble: 20 x 2
## # Groups: Language [20]
## Language n
## <chr> <int>
## 1 english 98301
## 2 chineset 5822
## 3 italian 5757
## 4 spanish 5447
## 5 indonesian 3980
## 6 french 2977
## 7 german 2672
## 8 chinese 1924
## 9 portuguese 1877
## 10 japanese 1277
## 11 vietnamese 931
## 12 swedish 730
## 13 dutch 496
## 14 romanian 478
## 15 russian 477
## 16 malay 233
## 17 danish 169
## 18 hungarian 132
## 19 norwegian 129
## 20 polish 108
報導的媒體哪一些
D_media <- D %>%
group_by(`Section Title`) %>%
count(`Section Title`) %>%
arrange(desc(n))
head(D_media,20)
## # A tibble: 20 x 2
## # Groups: Section Title [20]
## `Section Title` n
## <chr> <int>
## 1 <NA> 62418
## 2 fitnell.com RSS feed 1097
## 3 Agenzia Nova 982
## 4 digiblogbox.com RSS feed 819
## 5 Agenzia Nova | Balcani 749
## 6 Agenzia Nova | Medio Oriente 513
## 7 Agenzia Nova | Nord Africa 499
## 8 alltdesign.com RSS feed 473
## 9 Latest | FOCUS TAIWAN - CNA ENGLISH NEWS 473
## 10 canariblogs.com RSS feed 466
## 11 Flickr: Explore everyone's photos on a Map 464
## 12 isblog.net RSS feed 464
## 13 批踢踢實業坊 435
## 14 blogzet.com RSS feed 398
## 15 tblogz.com RSS feed 365
## 16 Conservapedia - Recent changes [en] 359
## 17 RTI Radio Taiwan International 346
## 18 American Banking News 319
## 19 Eat-Drink-Man-Woman - www.hardwarezone.com.sg 271
## 20 ReleaseWire - Latest Press Releases 254