title: “3_22 HW” author: “Kyle” date: “2019年3月22日” output: html_document — —
#讀取資料
library(readr)
data7 <- read_csv("201807_data.csv")
## Parsed with column specification:
## cols(
## Date = col_character(),
## Page_Name = col_character(),
## Page_ID = col_double(),
## Link = col_logical(),
## Type = col_character(),
## All_Reaction_Count = col_double(),
## LIKE_COUNT = col_double(),
## WOW_COUNT = col_double(),
## LOVE_COUNT = col_double(),
## HAHA_COUNT = col_double(),
## SAD_COUNT = col_double(),
## ANGRY_COUNT = col_double(),
## Comment_Count = col_double(),
## Share_Count = col_double(),
## Message = col_character(),
## Link_Title = col_character(),
## `Link Description` = col_character(),
## created_time = col_double()
## )
data8 <- read_csv("201808_data.csv")
## Parsed with column specification:
## cols(
## Date = col_character(),
## Page_Name = col_character(),
## Page_ID = col_double(),
## Link = col_logical(),
## Type = col_character(),
## All_Reaction_Count = col_double(),
## LIKE_COUNT = col_double(),
## WOW_COUNT = col_double(),
## LOVE_COUNT = col_double(),
## HAHA_COUNT = col_double(),
## SAD_COUNT = col_double(),
## ANGRY_COUNT = col_double(),
## Comment_Count = col_double(),
## Share_Count = col_double(),
## Message = col_character(),
## Link_Title = col_character(),
## `Link Description` = col_character(),
## created_time = col_double()
## )
data9 <- read_csv("201809_data.csv")
## Parsed with column specification:
## cols(
## Date = col_character(),
## Page_Name = col_character(),
## Page_ID = col_double(),
## Link = col_logical(),
## Type = col_character(),
## All_Reaction_Count = col_double(),
## LIKE_COUNT = col_double(),
## WOW_COUNT = col_double(),
## LOVE_COUNT = col_double(),
## HAHA_COUNT = col_double(),
## SAD_COUNT = col_double(),
## ANGRY_COUNT = col_double(),
## Comment_Count = col_double(),
## Share_Count = col_double(),
## Message = col_character(),
## Link_Title = col_character(),
## `Link Description` = col_character(),
## created_time = col_double()
## )
data10 <- read_csv("201810_data.csv")
## Parsed with column specification:
## cols(
## Date = col_character(),
## Page_Name = col_character(),
## Page_ID = col_double(),
## Link = col_logical(),
## Type = col_character(),
## All_Reaction_Count = col_double(),
## LIKE_COUNT = col_double(),
## WOW_COUNT = col_double(),
## LOVE_COUNT = col_double(),
## HAHA_COUNT = col_double(),
## SAD_COUNT = col_double(),
## ANGRY_COUNT = col_double(),
## Comment_Count = col_double(),
## Share_Count = col_double(),
## Message = col_character(),
## Link_Title = col_character(),
## `Link Description` = col_character(),
## created_time = col_double()
## )
data11 = read_csv("201811_data.csv")
## Parsed with column specification:
## cols(
## Date = col_character(),
## Page_Name = col_character(),
## Page_ID = col_double(),
## Link = col_logical(),
## Type = col_character(),
## All_Reaction_Count = col_double(),
## LIKE_COUNT = col_double(),
## WOW_COUNT = col_double(),
## LOVE_COUNT = col_double(),
## HAHA_COUNT = col_double(),
## SAD_COUNT = col_double(),
## ANGRY_COUNT = col_double(),
## Comment_Count = col_double(),
## Share_Count = col_double(),
## Message = col_character(),
## Link_Title = col_character(),
## `Link Description` = col_character(),
## created_time = col_double()
## )
data=rbind(data7, data8,data9,data10,data11)
rm(data7,data8,data9,data10,data11)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Su = filter(data, grepl("蘇貞昌", data$Page_Name) == T&grepl("蘇貞昌後援會", data$Page_Name) == F)
Hou = filter(data, grepl("侯友宜", data$Page_Name) == T&grepl("鐵漢柔情 力挺侯友宜", data$Page_Name) == F)
#設定時間
Su$Date = as.POSIXct(Su$Date,format = "%Y/%m/%d %H:%M:%S")
Hou$Date = as.POSIXct(Hou$Date,format = "%Y/%m/%d %H:%M:%S")
#把11/24之後的貼文刪除以及加一個只有日期沒有時間的欄位
library(ggplot2)
Pei = rbind(Hou,Su)
Pei = filter(Pei,Pei$Date<"2018/11/24")
Pei$cDate = as.character(Pei$Date)
Pei$cDate = sapply(Pei$cDate,function(x) strsplit(x,"[ ]") %>% unlist %>% .[1])
Pei$cDate = Pei$cDate %>% as.Date("%Y-%m-%d")
#兩候選人總Po文類型比較
Peitype=Pei%>%group_by(Type)%>%count()
label_value <- paste('(', round(Peitype$n/sum(Peitype$n) * 100, 1), '%)', sep = '')
label=paste(Peitype$Type,label_value,sep = "")
ggplot(Peitype, aes(x="", y=n, fill=Type)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0)+
labs(x = '', y = '', title = '')+
theme(axis.text = element_blank())+
scale_fill_discrete(labels = label)
#蘇貞昌貼文類型分析
Sutype=Su%>%group_by(Type)%>%count()
label_value <- paste('(', round(Sutype$n/sum(Sutype$n) * 100, 1), '%)', sep = '')
label=paste(Sutype$Type,label_value,sep = "")
ggplot(Sutype, aes(x="", y=n, fill=Type)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0)+
labs(x = '', y = '', title = '')+
theme(axis.text = element_blank())+
scale_fill_discrete(labels = label)
#侯友宜貼文類型分析
Houtype=Su%>%group_by(Type)%>%count()
label_value <- paste('(', round(Houtype$n/sum(Houtype$n) * 100, 1), '%)', sep = '')
label=paste(Houtype$Type,label_value,sep = "")
ggplot(Houtype, aes(x="", y=n, fill=Type)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0)+
labs(x = '', y = '', title = '')+
theme(axis.text = element_blank())+
scale_fill_discrete(labels = label)
#侯友宜6-11/24貼文數量變化
Houcount=Hou%>%group_by(month=format(Date,"%m"))%>%count()
ggplot(Houcount,aes(x=month,y=n))+
geom_bar(stat = "identity")
#蘇貞昌6-11/24貼文數量變化
Sucount=Su%>%group_by(month=format(Date,"%m"))%>%count()
ggplot(Sucount,aes(x=month,y=n))+
geom_bar(stat = "identity")
#蘇侯每月PO文字次數比較
Sucount=Su%>%group_by(month=format(Date,"%m"))%>%count()%>%mutate(group="Su")
Houcount=Hou%>%group_by(month=format(Date,"%m"))%>%count()%>%mutate(group="Hou")
monthcount=rbind(Sucount,Houcount)%>%arrange((month))
ggplot(monthcount,aes(x=month,y=n,fill=group))+
geom_bar(stat="identity",position = "dodge")
ggplot(Pei,aes(x=Type,y=LIKE_COUNT))+
geom_boxplot(col = "red")
library(ggpubr)
## Loading required package: magrittr
ggqqplot(Su$All_Reaction_Count)
library(corrplot)
## corrplot 0.84 loaded
cor(Su[c(6:14)])%>%corrplot.mixed(lower = "pie",tl.cex=0.6)
ggscatter(Su,x="All_Reaction_Count",y="LIKE_COUNT", add = "reg.line", conf.int = TRUE,cor.coef = TRUE, cor.method = "pearson")
#兩候選人like count分析
#折線圖(6到11月24),用月作為區分
ggplot(Pei, aes(x = cDate,y = LIKE_COUNT,colour = Page_Name))+ geom_line()+
geom_vline(xintercept = as.Date("2018-07-24"),col = "black") +
geom_vline(xintercept = as.Date("2018-08-24"),col = "black") +
geom_vline(xintercept = as.Date("2018-09-24"),col = "black") +
geom_vline(xintercept = as.Date("2018-10-24"),col = "black") +
geom_vline(xintercept = as.Date("2018-11-24"),col = "black")