library(readr)
library(dplyr)
library(jiebaR)
library(tidyr)
library(tidytext)
library(igraph)
library(topicmodels)
library(stringr)
library(ggplot2)
# 讀取ptt理科太太討論資料
posts <- read_csv("./ptt_LiKeTaiTai.csv")
Parsed with column specification:
cols(
artTitle = [31mcol_character()[39m,
artDate = [34mcol_date(format = "")[39m,
artTime = [34mcol_time(format = "")[39m,
artUrl = [31mcol_character()[39m,
artContent = [31mcol_character()[39m
)
posts
# 讀取ptt理科太太討論串回覆資料
reviews <- read_csv("./articleMetaData.csv")
Parsed with column specification:
cols(
artTitle = [31mcol_character()[39m,
artDate = [34mcol_date(format = "")[39m,
artTime = [34mcol_time(format = "")[39m,
artUrl = [31mcol_character()[39m,
artPoster = [31mcol_character()[39m,
artCat = [31mcol_character()[39m,
commentPoster = [31mcol_character()[39m,
commentStatus = [31mcol_character()[39m,
commentDate = [34mcol_datetime(format = "")[39m,
commentContent = [31mcol_character()[39m
)
reviews
# 只選取需要的欄位
reviews <- reviews %>%
select(artUrl, artPoster, commentPoster,commentStatus, commentContent)
reviews
# 發文者數量
length(unique(reviews$artPoster))
[1] 192
# 回覆者數量
length(unique(reviews$commentPoster))
[1] 5357
# 總共有參與的人數
allPoster <- c(reviews$artPoster, reviews$commentPoster)
length(unique(allPoster))
[1] 5478
建立社群網路圖
# 把評論和文章依據artUrl innerJoin起來
posts_Reviews <- merge(x = posts, y = reviews, by = "artUrl")
posts_Reviews
# 整理所有出現過的使用者
# 如果他曾發過文的話就標註他爲poster
# 如果沒有發過文的話則標註他爲replyer
userList <- data.frame(user=unique(allPoster)) %>%
mutate(type=ifelse(user%in%posts_Reviews$artPoster, "poster", "replyer"))
userList
# 取出 commentPoster(回覆者)、artPoster(發文者)、artUrl(文章連結)、commentStatus(評論狀態)、artDate(日期)五個欄位
link <- posts_Reviews %>%
select(commentPoster, artPoster, artUrl, commentStatus, artDate)
link
# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, directed=T)
reviewNetwork
IGRAPH c2a3a48 DN-- 3369 5556 --
+ attr: name (v/c), artUrl (e/c), commentStatus (e/c), artDate (e/n)
+ edges from c2a3a48 (vertex names):
[1] heat0204 ->moneybuy heat0204 ->moneybuy jimmyyyyyy ->moneybuy
[4] jerry83688 ->moneybuy jerry83688 ->moneybuy leon82guy ->moneybuy
[7] vivi830505 ->waymayday clamperni ->waymayday fangbr ->waymayday
[10] mij ->waymayday jajujo ->waymayday Bokolo ->waymayday
[13] fishouse ->waymayday Jin63916 ->waymayday zxc8424 ->waymayday
[16] sclbtlove ->waymayday s080014 ->waymayday fishouse ->waymayday
[19] sclbtlove ->waymayday Beanoodle ->waymayday motop7689 ->waymayday
[22] motop7689 ->waymayday sclbtlove ->waymayday g820215 ->charco
+ ... omitted several edges
# 畫出網路圖
plot(reviewNetwork)

只看11/4的文章和它的回覆
# 篩選11/4的文章和它的回覆
# 取出 commentPoster(回覆者)、artPoster(發文者)、artUrl(文章連結)、commentStatus(評論狀態)、artDate(發文日期)五個欄位
# ptt的回覆有三種,推文、噓文、箭頭
# 我們只要看推噓就好,因此把箭頭清掉
link2 <- posts_Reviews %>%
filter(artDate=='2018-11-4', commentStatus!="→") %>%
select(commentPoster, artPoster, artUrl, commentStatus, artDate)
link2
# 這邊要篩選link中有出現的使用者
# 因爲如果userList(igraph中graph_from_data_frame的v參數吃的那個東西)中出現了沒有在link中出現的使用者
# 也會被igraph畫上去,圖片就會變得沒有意義
filtered_user <- userList %>%
filter(user%in%link2$commentPoster | user%in%link2$artPoster) %>%
arrange(desc(type))
filtered_user
# 建立網路關係
reviewNetwork2 <- graph_from_data_frame(d=link2, v=filtered_user, directed=T)
reviewNetwork2
IGRAPH 574bd98 DN-B 21 19 --
+ attr: name (v/c), type (v/c), artUrl (e/c), commentStatus (e/c), artDate (e/n)
+ edges from 574bd98 (vertex names):
[1] vivi830505 ->waymayday clamperni ->waymayday fangbr ->waymayday
[4] mij ->waymayday jajujo ->waymayday fishouse ->waymayday
[7] Jin63916 ->waymayday zxc8424 ->waymayday s080014 ->waymayday
[10] motop7689 ->waymayday tim9527 ->charco chivalry70 ->charco
[13] RLH ->charco zukidelko ->charco Sougetu ->charco
[16] kanetakusan->charco nrxadsl ->charco osan ->charco
[19] kauosong ->charco
# 畫出網路圖
plot(reviewNetwork2)

針對node, edge 進行屬性標記
# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork2)
V(reviewNetwork2)$label <- names(labels)
V(reviewNetwork2)$color <- ifelse(V(reviewNetwork2)$type=="poster", "gold", "lightblue")
# 使用推噓文作爲連線的顏色
E(reviewNetwork2)$color <- ifelse(E(reviewNetwork2)$commentStatus == "推", "green", "red")
畫出社群網路圖
## 畫出社群網路圖
set.seed(217)
V(reviewNetwork2)$color <- ifelse(V(reviewNetwork2)$type=="poster", "gold", "lightblue")
plot(reviewNetwork2, vertex.size=15, edge.arrow.size=.5,
vertex.label=ifelse(degree(reviewNetwork2) >2, V(reviewNetwork2)$label, NA), vertex.label.ces=.8)
# 顯示中文內容
# set the font family to "serif", saving defaults in `op`
op <- par(family = "Heiti TC Light") # par函數用於設定或詢問繪圖參數
# 加入標示,於右下角顯示node color 說明
legend("bottomright",
title = "node color",
c("poster","replyer"), pch=21, col="#777777",pt.bg=c("gold","lightblue"), pt.cex=1, cex=.8)
# 加入標示,於左上角顯示edge color 說明
legend("topleft",
title = "edge color",
c("推","噓"), col=c("green","red"), lty=1, cex=.8, text.font = )
## reset plotting parameters
par(op)

結語:
使用network的視覺化可以讓我們了解11/4的文章 以charco、waymayday兩位發文者的評論為主,其中charco的發文都是推文 waymayday有3則噓文。 PTT是有名的反指標,當戰理科太太在八卦變成流行,那就代表理科太太成功了, 因為只有夠份量的公眾人物,才會引起PTT鄉民戰他的興趣。 透過這樣視覺化就能先了解初步的架構,進而對內容做細部的探討。
LS0tCnRpdGxlOiAncHR055CG56eR5aSq5aSq5paH56ug6KiO6KuW57ay6Lev6Zec5L+CJwphdXRob3I6ICLnrKzkuozntYQ8YnIvPk4wNDQwMjAwMTIg5p2c55GL6Iy5PGJyLz5OMDQ0MDIwMDI2IOW8tembheWptzxici8+TjA2NDAyMDAxNSDpkJjmmI7lv5c8YnIvPk4wNjQyMjAwMDcg6Zmz5oWn5YCpPGJyLz5OMDY0MjIwMDA5IOisneWHseWogTxici8+TjA2NDIyMDAyNiDlionlv5fmlL8iCmRhdGU6ICIyMDE5LzA1LzE0IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShyZWFkcikKbGlicmFyeShkcGx5cikKbGlicmFyeShqaWViYVIpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkodGlkeXRleHQpCmxpYnJhcnkoaWdyYXBoKQpsaWJyYXJ5KHRvcGljbW9kZWxzKQpsaWJyYXJ5KHN0cmluZ3IpCmxpYnJhcnkoZ2dwbG90MikKYGBgCgpgYGB7cn0KIyDoroDlj5ZwdHTnkIbnp5HlpKrlpKroqI7oq5bos4fmlpkKcG9zdHMgPC0gcmVhZF9jc3YoIi4vcHR0X0xpS2VUYWlUYWkuY3N2IikKcG9zdHMKYGBgCgpgYGB7cn0KIyDoroDlj5ZwdHTnkIbnp5HlpKrlpKroqI7oq5bkuLLlm57opobos4fmlpkKcmV2aWV3cyA8LSByZWFkX2NzdigiLi9hcnRpY2xlTWV0YURhdGEuY3N2IikKcmV2aWV3cwpgYGAKCmBgYHtyfQojIOWPqumBuOWPlumcgOimgeeahOashOS9jQpyZXZpZXdzIDwtIHJldmlld3MgJT4lCiAgICAgIHNlbGVjdChhcnRVcmwsIGFydFBvc3RlciwgY29tbWVudFBvc3Rlcixjb21tZW50U3RhdHVzLCBjb21tZW50Q29udGVudCkKcmV2aWV3cwpgYGAKCmBgYHtyfQojIOeZvOaWh+iAheaVuOmHjwpsZW5ndGgodW5pcXVlKHJldmlld3MkYXJ0UG9zdGVyKSkKYGBgCgpgYGB7cn0KIyDlm57opobogIXmlbjph48KbGVuZ3RoKHVuaXF1ZShyZXZpZXdzJGNvbW1lbnRQb3N0ZXIpKQpgYGAKCmBgYHtyfQojIOe4veWFseacieWPg+iIh+eahOS6uuaVuAphbGxQb3N0ZXIgPC0gYyhyZXZpZXdzJGFydFBvc3RlciwgcmV2aWV3cyRjb21tZW50UG9zdGVyKQpsZW5ndGgodW5pcXVlKGFsbFBvc3RlcikpCmBgYAoKIyMg5bu656uL56S+576k57ay6Lev5ZyWCmBgYHtyfQojIOaKiuipleirluWSjOaWh+eroOS+neaTmmFydFVybCBpbm5lckpvaW7otbfkvoYKcG9zdHNfUmV2aWV3cyA8LSBtZXJnZSh4ID0gcG9zdHMsIHkgPSByZXZpZXdzLCBieSA9ICJhcnRVcmwiKQpwb3N0c19SZXZpZXdzCmBgYAoKYGBge3J9CiMg5pW055CG5omA5pyJ5Ye654++6YGO55qE5L2/55So6ICFCiMg5aaC5p6c5LuW5pu+55m86YGO5paH55qE6Kmx5bCx5qiZ6Ki75LuW54iycG9zdGVyCiMg5aaC5p6c5rKS5pyJ55m86YGO5paH55qE6Kmx5YmH5qiZ6Ki75LuW54iycmVwbHllcgp1c2VyTGlzdCA8LSBkYXRhLmZyYW1lKHVzZXI9dW5pcXVlKGFsbFBvc3RlcikpICU+JQogICAgICAgICAgICAgIG11dGF0ZSh0eXBlPWlmZWxzZSh1c2VyJWluJXBvc3RzX1Jldmlld3MkYXJ0UG9zdGVyLCAicG9zdGVyIiwgInJlcGx5ZXIiKSkKdXNlckxpc3QKYGBgCgoKYGBge3J9CiMg5Y+W5Ye6IGNvbW1lbnRQb3N0ZXIo5Zue6KaG6ICFKeOAgWFydFBvc3RlcijnmbzmlofogIUp44CBYXJ0VXJsKOaWh+eroOmAo+e1kCnjgIFjb21tZW50U3RhdHVzKOipleirlueLgOaFiynjgIFhcnREYXRlKOaXpeacnynkupTlgIvmrITkvY0KbGluayA8LSBwb3N0c19SZXZpZXdzICU+JQogICAgICAgIHNlbGVjdChjb21tZW50UG9zdGVyLCBhcnRQb3N0ZXIsIGFydFVybCwgY29tbWVudFN0YXR1cywgYXJ0RGF0ZSkKbGluawpgYGAKCmBgYHtyfQojIOW7uueri+e2sui3r+mXnOS/ggpyZXZpZXdOZXR3b3JrIDwtIGdyYXBoX2Zyb21fZGF0YV9mcmFtZShkPWxpbmssIGRpcmVjdGVkPVQpCnJldmlld05ldHdvcmsKYGBgCgpgYGB7cn0KIyDnlavlh7rntrLot6/lnJYKcGxvdChyZXZpZXdOZXR3b3JrKQpgYGAKCiMjIOWPquecizExLzTnmoTmlofnq6DlkozlroPnmoTlm57opoYKYGBge3J9CiMg56+p6YG4MTEvNOeahOaWh+eroOWSjOWug+eahOWbnuimhgojIOWPluWHuiBjb21tZW50UG9zdGVyKOWbnuimhuiAhSnjgIFhcnRQb3N0ZXIo55m85paH6ICFKeOAgWFydFVybCjmlofnq6DpgKPntZAp44CBY29tbWVudFN0YXR1cyjoqZXoq5bni4DmhYsp44CBYXJ0RGF0ZSjnmbzmlofml6XmnJ8p5LqU5YCL5qyE5L2NCiMgcHR055qE5Zue6KaG5pyJ5LiJ56iu77yM5o6o5paH44CB5ZmT5paH44CB566t6aCtCiMg5oiR5YCR5Y+q6KaB55yL5o6o5ZmT5bCx5aW977yM5Zug5q2k5oqK566t6aCt5riF5o6JCmxpbmsyIDwtIHBvc3RzX1Jldmlld3MgJT4lCiAgZmlsdGVyKGFydERhdGU9PScyMDE4LTExLTQnLCBjb21tZW50U3RhdHVzIT0i4oaSIikgJT4lCiAgc2VsZWN0KGNvbW1lbnRQb3N0ZXIsIGFydFBvc3RlciwgYXJ0VXJsLCBjb21tZW50U3RhdHVzLCBhcnREYXRlKQpsaW5rMgpgYGAKCmBgYHtyfQojIOmAmemCiuimgeevqemBuGxpbmvkuK3mnInlh7rnj77nmoTkvb/nlKjogIUKIyDlm6DniLLlpoLmnpx1c2VyTGlzdO+8iGlncmFwaOS4rWdyYXBoX2Zyb21fZGF0YV9mcmFtZeeahHblj4PmlbjlkIPnmoTpgqPlgIvmnbHopb/vvInkuK3lh7rnj77kuobmspLmnInlnKhsaW5r5Lit5Ye654++55qE5L2/55So6ICFCiMg5Lmf5pyD6KKraWdyYXBo55Wr5LiK5Y6777yM5ZyW54mH5bCx5pyD6K6K5b6X5rKS5pyJ5oSP576pCmZpbHRlcmVkX3VzZXIgPC0gdXNlckxpc3QgJT4lCiAgICAgICAgICBmaWx0ZXIodXNlciVpbiVsaW5rMiRjb21tZW50UG9zdGVyIHwgdXNlciVpbiVsaW5rMiRhcnRQb3N0ZXIpICU+JQogICAgICAgICAgYXJyYW5nZShkZXNjKHR5cGUpKQpmaWx0ZXJlZF91c2VyCgojIOW7uueri+e2sui3r+mXnOS/ggpyZXZpZXdOZXR3b3JrMiA8LSBncmFwaF9mcm9tX2RhdGFfZnJhbWUoZD1saW5rMiwgdj1maWx0ZXJlZF91c2VyLCBkaXJlY3RlZD1UKQpyZXZpZXdOZXR3b3JrMgpgYGAKYGBge3J9CiMg55Wr5Ye657ay6Lev5ZyWCnBsb3QocmV2aWV3TmV0d29yazIpCmBgYAoKIyMg6Yed5bCNbm9kZSwgZWRnZSDpgLLooYzlsazmgKfmqJnoqJgKYGBge3J9CiMg5L6d5pOa5L2/55So6ICF6Lqr5Lu95bCN6bue6YCy6KGM5LiK6ImyCmxhYmVscyA8LSBkZWdyZWUocmV2aWV3TmV0d29yazIpClYocmV2aWV3TmV0d29yazIpJGxhYmVsIDwtIG5hbWVzKGxhYmVscykKVihyZXZpZXdOZXR3b3JrMikkY29sb3IgPC0gaWZlbHNlKFYocmV2aWV3TmV0d29yazIpJHR5cGU9PSJwb3N0ZXIiLCAiZ29sZCIsICJsaWdodGJsdWUiKQoKIyDkvb/nlKjmjqjlmZPmlofkvZzniLLpgKPnt5rnmoTpoY/oibIKRShyZXZpZXdOZXR3b3JrMikkY29sb3IgPC0gaWZlbHNlKEUocmV2aWV3TmV0d29yazIpJGNvbW1lbnRTdGF0dXMgPT0gIuaOqCIsICJncmVlbiIsICJyZWQiKQpgYGAKCiMjIOeVq+WHuuekvue+pOe2sui3r+WclgpgYGB7cn0KIyDku6XmmK/lkKbngrpwb3N0ZXLkvZzngrpub2RlIGNvbG9yCiMgZGVncmVlID4gMiDnmoRub2RlICwg5omN6aGv56S6bm9kZSBsYWJlbApzZXQuc2VlZCgyMTcpCnBsb3QocmV2aWV3TmV0d29yazIsIHZlcnRleC5zaXplPTE1LCBlZGdlLmFycm93LnNpemU9LjUsCiAgICAgdmVydGV4LmxhYmVsPWlmZWxzZShkZWdyZWUocmV2aWV3TmV0d29yazIpID4yLCBWKHJldmlld05ldHdvcmsyKSRsYWJlbCwgTkEpLCAgdmVydGV4LmxhYmVsLmNlcz0uOCkKCiMg6aGv56S65Lit5paH5YWn5a65CiMgc2V0IHRoZSBmb250IGZhbWlseSB0byAic2VyaWYiLCBzYXZpbmcgZGVmYXVsdHMgaW4gYG9wYApvcCA8LSBwYXIoZmFtaWx5ID0gIkhlaXRpIFRDIExpZ2h0IikgIyBwYXLlh73mlbjnlKjmlrzoqK3lrprmiJboqaLllY/nuarlnJblj4PmlbgKCiMg5Yqg5YWl5qiZ56S677yM5pa85Y+z5LiL6KeS6aGv56S6bm9kZSBjb2xvciDoqqrmmI4KbGVnZW5kKCJib3R0b21yaWdodCIsIAogICAgICAgdGl0bGUgPSAibm9kZSBjb2xvciIsCiAgICAgICBjKCJwb3N0ZXIiLCJyZXBseWVyIiksIHBjaD0yMSwgY29sPSIjNzc3Nzc3IixwdC5iZz1jKCJnb2xkIiwibGlnaHRibHVlIiksIHB0LmNleD0xLCBjZXg9LjgpCgojIOWKoOWFpeaomeekuu+8jOaWvOW3puS4iuinkumhr+ekumVkZ2UgY29sb3Ig6Kqq5piOCmxlZ2VuZCgidG9wbGVmdCIsIAogICAgICAgdGl0bGUgPSAiZWRnZSBjb2xvciIsCiAgICAgICBjKCLmjqgiLCLlmZMiKSwgY29sPWMoImdyZWVuIiwicmVkIiksIGx0eT0xLCBjZXg9LjgsIHRleHQuZm9udCA9ICkKCiMjIHJlc2V0IHBsb3R0aW5nIHBhcmFtZXRlcnMKcGFyKG9wKQpgYGAKCj4g57WQ6Kqe77yaPGJyPgrkvb/nlKhuZXR3b3Jr55qE6KaW6Ka65YyW5Y+v5Lul6K6T5oiR5YCR5LqG6KejMTEvNOeahOaWh+eroArku6VjaGFyY2/jgIF3YXltYXlkYXnlhankvY3nmbzmlofogIXnmoToqZXoq5bngrrkuLvvvIzlhbbkuK1jaGFyY2/nmoTnmbzmlofpg73mmK/mjqjmlocKd2F5bWF5ZGF55pyJM+WJh+WZk+aWh+OAggpQVFTmmK/mnInlkI3nmoTlj43mjIfmqJnvvIznlbbmiLDnkIbnp5HlpKrlpKrlnKjlhavljaborormiJDmtYHooYzvvIzpgqPlsLHku6PooajnkIbnp5HlpKrlpKrmiJDlip/kuobvvIwK5Zug54K65Y+q5pyJ5aSg5Lu96YeP55qE5YWs55y+5Lq654mp77yM5omN5pyD5byV6LW3UFRU6YSJ5rCR5oiw5LuW55qE6IiI6Laj44CCCumAj+mBjumAmeaoo+imluimuuWMluWwseiDveWFiOS6huino+WIneatpeeahOaetuani++8jOmAsuiAjOWwjeWFp+WuueWBmue0sOmDqOeahOaOouiojuOAgg==