#### 如何繪製多年期的資料
### RQ6:多年期的上網率的變化
## 兩個變數的關係(年度為類別變數、上網率為連續變數)
# a.使用折線圖
# b.使用長條圖
### RQ7:多年期的不同上網裝置使用天數的變化
## 三個變數的關係(年度和裝置為類別變數、上網天數為連續變數)
# a.使用折線圖
# b.使用長條圖

# 2015-2019年網路使用行為數據概況
# https://www.crctaiwan.nctu.edu.tw/ResultsShow_detail.asp?RS_ID=123
# RQ6:2015-2019年台灣民眾的上網率的趨勢為何?
## 1. 輸入資料:
# install.packages("sjlabelled")
library(sjlabelled)
library(haven) 
## 
## Attaching package: 'haven'
## The following objects are masked from 'package:sjlabelled':
## 
##     as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
tcs2015 <- read_spss("TCS2015.sav") 
tcs2016 <- read_spss("TCS2016.sav") 
tcs2017 <- read_spss("TCS2017.sav") 
tcs2018 <- read_spss("TCS2018.sav") 
tcs2019 <- read_spss("tcs2019.sav") 
# 2. 檢視資料框
# 當資料較大時,建議使用sjPlot套件
# install.packages("sjPlot")
# library(sjPlot)
# view_df(tcs2015,
#         file="tcs2015tab.html",  # 結果直接另存新檔
#         show.na = T, # 顯示未重新編碼前的無效值個數
#         show.frq = T, # 顯示次數
#         show.prc = T, # 顯示百分比
#         encoding = "big5"
# )
# view_df(tcs2016,
#         file="tcs2016tab.html",  # 結果直接另存新檔
#         show.na = T, # 顯示未重新編碼前的無效值個數
#         show.frq = T, # 顯示次數
#         show.prc = T, # 顯示百分比
#         encoding = "big5"
# )
# view_df(tcs2017,
#         file="tcs2017tab.html",  # 結果直接另存新檔
#         show.na = T, # 顯示未重新編碼前的無效值個數
#         show.frq = T, # 顯示次數
#         show.prc = T, # 顯示百分比
#         encoding = "big5"
# )
# view_df(tcs2018,
#         file="tcs2018tab.html",  # 結果直接另存新檔
#         show.na = T, # 顯示未重新編碼前的無效值個數
#         show.frq = T, # 顯示次數
#         show.prc = T, # 顯示百分比
#         encoding = "big5"
# )
# view_df(tcs2019,
#         file="tcs2019tab.html",  # 結果直接另存新檔
#         show.na = T, # 顯示未重新編碼前的無效值個數
#         show.frq = T, # 顯示次數
#         show.prc = T, # 顯示百分比
#         encoding = "big5"
# )

## 3. 應用實作
# (1)確認欲分析的變數
# 請問你平常會不會上網?
# 2015年:C3(連續變數)
# 2016年:B1(連續變數)
# 2017年:B1(連續變數)
# 2018年:D1(連續變數)
# 2019年:b1(連續變數)

# (2) 各年建立一個新資料框,包含年度及欲使用的變數(上網狀況),各年的變數名稱需一致
# 利用rbind函數將各年資料進行垂直合併
a1 <- data.frame(year=2015, online=tcs2015$C3)
a2 <- data.frame(year=2016, online=tcs2016$B1)
a3 <- data.frame(year=2017, online=tcs2017$B1)
a4 <- data.frame(year=2018, online=tcs2018$D1)
a5 <- data.frame(year=2019, online=tcs2019$b1)
df1 <- rbind(a1,a2,a3,a4,a5)
# (3) 運用plyr套件,計算各年的上網率與人數
library(plyr)
## Warning: package 'plyr' was built under R version 4.0.5
df2 = ddply(df1,.(year),function(.){
  res = prop.table(table(factor(.$online)))
  res2 = table(factor(.$online))
  data.frame(lab=names(res), y=c(res),yy =c(res2))
})

## (4) 製圖
# 載入 ggplot2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
# 解決Rstudio cloud圖形中文顯示問題
# install.packages("showtext")
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
showtext_auto()

# a.繪製折線圖
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.0.5
library(RColorBrewer)
ggplot(df2, aes(x=year, y=y,color=lab))+ 
  geom_line()+
  geom_point()+
  labs(title = "2015-2019年台灣民眾的上網率的趨勢",
       x="年度",y="百分比",
       subtitle="上網率有逐年增加嗎?",
       caption="金寶製 資料來源:台灣傳播調查資料庫")+
  theme_classic()+  
  theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
  scale_y_continuous(limits=c(0,1), breaks = c(0,0.2,0.4,0.6,0.8,1) ,labels =c("0%","20%","40%","60%","80%", "100%"))+
  theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
  geom_text(mapping = aes(label = sprintf("%.2f%%",y*100)),
            size = 3, colour = 'black',vjust = -2, hjust = .5)+
  scale_colour_discrete(name  ="上網情形",
                        breaks=c("1", "2"),
                        labels=c("上網率", "無上網率")) 

# 若僅要繪製上網率,可直接先篩選資料框
# 其他步驟一樣
df3 <- subset(df2,lab==1)
library(ggrepel)
library(RColorBrewer)
ggplot(df3, aes(x=year, y=y,color=lab))+ 
  geom_line()+
  geom_point()+
  labs(title = "2015-2019年台灣民眾的上網率的趨勢",
       x="年度",y="百分比",
       subtitle="上網率有逐年增加嗎?",
       caption="金寶製 資料來源:台灣傳播調查資料庫")+
  theme_classic()+  
  theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
  scale_y_continuous(limits=c(0,1), breaks = c(0,0.2,0.4,0.6,0.8,1) ,labels =c("0%","20%","40%","60%","80%", "100%"))+
  theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
  geom_text(mapping = aes(label = sprintf("%.2f%%",y*100)),
            size = 3, colour = 'black',vjust = -2, hjust = .5)+
  scale_colour_discrete(name  ="上網情形",
                        breaks="1",
                        labels="上網率") 

# b.繪製長條圖
ggplot(df2,aes(x = year,y=y,fill = lab))+
  geom_bar(stat = "identity",,position = "dodge")+
  labs(title = "2015-2019年台灣民眾的上網率的趨勢",
       x="年度",y="百分比",
       subtitle="上網率有逐年增加嗎?",
       caption="金寶製 資料來源:台灣傳播調查資料庫")+
  theme_classic()+  
  theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
  scale_y_continuous(limits=c(0,1), breaks = c(0,0.2,0.4,0.6,0.8,1) ,labels =c("0%","20%","40%","60%","80%", "100%"))+
  theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
  geom_text(mapping = aes(label = sprintf("%.2f%%",y*100)),
            size = 3, colour = 'black',vjust = -1, hjust = 0.5,
            position = position_dodge(width = 1))+
  scale_fill_manual(name="上網情形",
                    values=c("1"="rosybrown2", "2"="steelblue1"),
                    labels=c("上網率", "無上網率"))

# RQ7:2016-2019年台灣民眾的使用不同上網裝置的趨勢為何?
## 3. 應用實作
# (1)確認欲分析的變數
# 請問你平常會不會上網?
# 2016年:G1,G2,G3(連續變數)
# 2017年:H1,H2,H3(連續變數)
# 2018年:D2,D3,D4(連續變數)
# 2019年:h1,h2,h3(連續變數)

# (2) 各年建立一個新資料框,包含年度及欲使用的變數(上網狀況),各年的變數名稱需一致
# 利用rbind函數將各年資料進行垂直合併
a1 <- data.frame(year=2016, pc=tcs2016$G1, pad=tcs2016$G2,mobile=tcs2016$G3)
a2 <- data.frame(year=2017, pc=tcs2017$H1, pad=tcs2017$H2,mobile=tcs2017$H3)
a3 <- data.frame(year=2018, pc=tcs2018$D2, pad=tcs2018$D3,mobile=tcs2018$D4)
a4 <- data.frame(year=2019, pc=tcs2019$h1, pad=tcs2019$h2,mobile=tcs2019$h3)
df1 <- rbind(a1,a2,a3,a4)

# (3) 運用plyr套件,計算各年不同裝置的平均上網天數
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:sjlabelled':
## 
##     as_label
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df2 <- df1 %>% 
  group_by(year) %>%
  summarise(pc = mean(pc, na.rm=T), 
            pad = mean(pad, na.rm=T),
            mobile = mean(mobile, na.rm=T))
# (4) 透過tidyr套件中的gather將寬格式轉為長格式,以利繪圖
# install.packages("tidyr")
library(tidyr)
df3 <- gather(df2, key = "device", value = "day", pc,pad,mobile)

## (4) 製圖
# 載入 ggplot2
library(ggplot2)
# 解決Rstudio cloud圖形中文顯示問題
# install.packages("showtext")
library(showtext)
showtext_auto()

# a.繪製折線圖
library(ggrepel)
library(RColorBrewer)
ggplot(df3, aes(x=year, y=day, color=device))+ 
  geom_line()+
  geom_point()+
  labs(title = "2016-2019年台灣民眾的使用不同上網裝置的趨勢",
       x="年度",y="天數",
       subtitle="手機使用天數有逐年增加嗎?",
       caption="金寶製 資料來源:台灣傳播調查資料庫")+
  theme_classic()+  
  theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
  scale_y_continuous(limits=c(0,7))+
  theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
  geom_text(mapping = aes(label = sprintf("%.1f",day)),
            size = 3, colour = 'black',vjust = -2, hjust = .5)+
  scale_color_discrete(name  ="上網裝置",
                       breaks=c("mobile", "pad","pc"),
                       labels=c("手機", "平板","電腦")) #圖例scale_color_discrete

# b.繪製長條圖
ggplot(df3,aes(x = year,y=day,fill = device))+
  geom_bar(stat = "identity",position = "dodge")+
  labs(title = "2015-2019年台灣民眾的上網率的趨勢",
       x="年度",y="天數",
       subtitle="上網率有逐年增加嗎?",
       caption="金寶製 資料來源:台灣傳播調查資料庫")+
  theme_classic()+  
  theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
  scale_y_continuous(limits=c(0,7))+
  theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
  geom_text(mapping = aes(label = sprintf("%.1f",day)),
            size = 3, colour = 'black',vjust = -1, hjust = 0.5,
            position = position_dodge(width = 1))+
  scale_fill_manual(values=c("#F37A81", "#72B7F7", "#3E76AB"),
                    name  ="上網裝置",
                    breaks=c("mobile", "pad","pc"),
                    labels=c("手機", "平板","電腦"))