packages = c("readr", "dplyr", "jiebaR", "tidyr", "tidytext", "igraph", "topicmodels", "ggplot2", "stringr")
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
library(readr)
library(dplyr)
library(jiebaR)
library(tidyr)
library(tidytext)
library(igraph)
library(topicmodels)
library(stringr)
library(ggplot2)
library(purrr)
require(tm)
require(data.table)
require(stringr)
require(udpipe)
require(LDAvis)
require(wordcloud2)
require(webshot)
require(htmlwidgets)
require(servr)
require(ramify)
require(RColorBrewer)
mycolors <- colorRampPalette(brewer.pal(8, "Set3"))(20)
# 文章資料
Ruyi_china_drama_articleMetaData <- read_csv("D:/OC Learn/NSYSU/Social Media Analysis/Final Paper/data/Ruyi-china drama_articleMetaData.csv")
Ruyi_articleMetaData <- read_csv("D:/OC Learn/NSYSU/Social Media Analysis/Final Paper/data/Ruyi_articleMetaData.csv")
posts <- rbind(Ruyi_china_drama_articleMetaData,Ruyi_articleMetaData)
posts
# 回覆資料
Ruyi_china_drama_articleReviews <- read_csv("D:/OC Learn/NSYSU/Social Media Analysis/Final Paper/data/Ruyi-china drama_articleReviews.csv")
Ruyi_articleReviews <- read_csv("D:/OC Learn/NSYSU/Social Media Analysis/Final Paper/data/Ruyi_articleReviews.csv")
reviews <- rbind(Ruyi_china_drama_articleReviews,Ruyi_articleReviews)
reviews
# 選取需要的欄位
reviews <- reviews %>%
select(artUrl, cmtPoster, cmtStatus, cmtContent)
reviews
posts %>%
group_by(artDate) %>%
summarise(count = n())%>%
ggplot(aes(artDate,count))+
geom_line(color="blue", size=1)+
theme_classic()+
geom_vline(aes(xintercept = as.numeric(artDate[which(artDate == as.Date('2018-09-14'))
[1]])),colour = "red")
#最高鋒為2018/9/14,超過15篇
length(unique(posts$artPoster)) #發文者共331人
## [1] 331
length(unique(reviews$cmtPoster)) #回覆者共6,289人
## [1] 6289
allPoster <- c(posts$artPoster, reviews$cmtPoster)
length(unique(allPoster)) #回覆者共6,413人, 與331+6,289不相等,表示有人同時發言和回覆
## [1] 6413
# 整理所有出現過得使用者
# 如果它曾發過文的話就標註他爲poster
# 如果沒有發過文的話則標註他爲replyer
userList <- data.frame(user=unique(allPoster)) %>%
mutate(type=ifelse(user%in%posts$artPoster, "poster", "replyer"))
userList
# 把原文與回覆依據artUrl innerJoin起來
posts_Reviews <- merge(x = posts, y = reviews, by = "artUrl")
posts_Reviews