#### 如何繪製多年期的資料
### RQ6:多年期的上網率的變化
## 兩個變數的關係(年度為類別變數、上網率為連續變數)
# a.使用折線圖
# b.使用長條圖
### RQ7:多年期的不同上網裝置使用天數的變化
## 三個變數的關係(年度和裝置為類別變數、上網天數為連續變數)
# a.使用折線圖
# b.使用長條圖
# 2015-2019年網路使用行為數據概況
# https://www.crctaiwan.nctu.edu.tw/ResultsShow_detail.asp?RS_ID=123
# RQ6:2015-2019年台灣民眾的上網率的趨勢為何?
## 1. 輸入資料:
# install.packages("sjlabelled")
library(sjlabelled)
library(haven)
##
## Attaching package: 'haven'
## The following objects are masked from 'package:sjlabelled':
##
## as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
tcs2015 <- read_spss("TCS2015.sav")
tcs2016 <- read_spss("TCS2016.sav")
tcs2017 <- read_spss("TCS2017.sav")
tcs2018 <- read_spss("TCS2018.sav")
tcs2019 <- read_spss("tcs2019.sav")
# 2. 檢視資料框
# 當資料較大時,建議使用sjPlot套件
# install.packages("sjPlot")
# library(sjPlot)
# view_df(tcs2015,
# file="tcs2015tab.html", # 結果直接另存新檔
# show.na = T, # 顯示未重新編碼前的無效值個數
# show.frq = T, # 顯示次數
# show.prc = T, # 顯示百分比
# encoding = "big5"
# )
# view_df(tcs2016,
# file="tcs2016tab.html", # 結果直接另存新檔
# show.na = T, # 顯示未重新編碼前的無效值個數
# show.frq = T, # 顯示次數
# show.prc = T, # 顯示百分比
# encoding = "big5"
# )
# view_df(tcs2017,
# file="tcs2017tab.html", # 結果直接另存新檔
# show.na = T, # 顯示未重新編碼前的無效值個數
# show.frq = T, # 顯示次數
# show.prc = T, # 顯示百分比
# encoding = "big5"
# )
# view_df(tcs2018,
# file="tcs2018tab.html", # 結果直接另存新檔
# show.na = T, # 顯示未重新編碼前的無效值個數
# show.frq = T, # 顯示次數
# show.prc = T, # 顯示百分比
# encoding = "big5"
# )
# view_df(tcs2019,
# file="tcs2019tab.html", # 結果直接另存新檔
# show.na = T, # 顯示未重新編碼前的無效值個數
# show.frq = T, # 顯示次數
# show.prc = T, # 顯示百分比
# encoding = "big5"
# )
## 3. 應用實作
# (1)確認欲分析的變數
# 請問你平常會不會上網?
# 2015年:C3(連續變數)
# 2016年:B1(連續變數)
# 2017年:B1(連續變數)
# 2018年:D1(連續變數)
# 2019年:b1(連續變數)
# (2) 各年建立一個新資料框,包含年度及欲使用的變數(上網狀況),各年的變數名稱需一致
# 利用rbind函數將各年資料進行垂直合併
a1 <- data.frame(year=2015, online=tcs2015$C3)
a2 <- data.frame(year=2016, online=tcs2016$B1)
a3 <- data.frame(year=2017, online=tcs2017$B1)
a4 <- data.frame(year=2018, online=tcs2018$D1)
a5 <- data.frame(year=2019, online=tcs2019$b1)
df1 <- rbind(a1,a2,a3,a4,a5)
# (3) 運用plyr套件,計算各年的上網率與人數
library(plyr)
## Warning: package 'plyr' was built under R version 4.0.5
df2 = ddply(df1,.(year),function(.){
res = prop.table(table(factor(.$online)))
res2 = table(factor(.$online))
data.frame(lab=names(res), y=c(res),yy =c(res2))
})
## (4) 製圖
# 載入 ggplot2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
# 解決Rstudio cloud圖形中文顯示問題
# install.packages("showtext")
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
showtext_auto()
# a.繪製折線圖
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.0.5
library(RColorBrewer)
ggplot(df2, aes(x=year, y=y,color=lab))+
geom_line()+
geom_point()+
labs(title = "2015-2019年台灣民眾的上網率的趨勢",
x="年度",y="百分比",
subtitle="上網率有逐年增加嗎?",
caption="金寶製 資料來源:台灣傳播調查資料庫")+
theme_classic()+
theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
scale_y_continuous(limits=c(0,1), breaks = c(0,0.2,0.4,0.6,0.8,1) ,labels =c("0%","20%","40%","60%","80%", "100%"))+
theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
geom_text(mapping = aes(label = sprintf("%.2f%%",y*100)),
size = 3, colour = 'black',vjust = -2, hjust = .5)+
scale_colour_discrete(name ="上網情形",
breaks=c("1", "2"),
labels=c("上網率", "無上網率"))

# 若僅要繪製上網率,可直接先篩選資料框
# 其他步驟一樣
df3 <- subset(df2,lab==1)
library(ggrepel)
library(RColorBrewer)
ggplot(df3, aes(x=year, y=y,color=lab))+
geom_line()+
geom_point()+
labs(title = "2015-2019年台灣民眾的上網率的趨勢",
x="年度",y="百分比",
subtitle="上網率有逐年增加嗎?",
caption="金寶製 資料來源:台灣傳播調查資料庫")+
theme_classic()+
theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
scale_y_continuous(limits=c(0,1), breaks = c(0,0.2,0.4,0.6,0.8,1) ,labels =c("0%","20%","40%","60%","80%", "100%"))+
theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
geom_text(mapping = aes(label = sprintf("%.2f%%",y*100)),
size = 3, colour = 'black',vjust = -2, hjust = .5)+
scale_colour_discrete(name ="上網情形",
breaks="1",
labels="上網率")

# b.繪製長條圖
ggplot(df2,aes(x = year,y=y,fill = lab))+
geom_bar(stat = "identity",,position = "dodge")+
labs(title = "2015-2019年台灣民眾的上網率的趨勢",
x="年度",y="百分比",
subtitle="上網率有逐年增加嗎?",
caption="金寶製 資料來源:台灣傳播調查資料庫")+
theme_classic()+
theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
scale_y_continuous(limits=c(0,1), breaks = c(0,0.2,0.4,0.6,0.8,1) ,labels =c("0%","20%","40%","60%","80%", "100%"))+
theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
geom_text(mapping = aes(label = sprintf("%.2f%%",y*100)),
size = 3, colour = 'black',vjust = -1, hjust = 0.5,
position = position_dodge(width = 1))+
scale_fill_manual(name="上網情形",
values=c("1"="rosybrown2", "2"="steelblue1"),
labels=c("上網率", "無上網率"))

# RQ7:2016-2019年台灣民眾的使用不同上網裝置的趨勢為何?
## 3. 應用實作
# (1)確認欲分析的變數
# 請問你平常會不會上網?
# 2016年:G1,G2,G3(連續變數)
# 2017年:H1,H2,H3(連續變數)
# 2018年:D2,D3,D4(連續變數)
# 2019年:h1,h2,h3(連續變數)
# (2) 各年建立一個新資料框,包含年度及欲使用的變數(上網狀況),各年的變數名稱需一致
# 利用rbind函數將各年資料進行垂直合併
a1 <- data.frame(year=2016, pc=tcs2016$G1, pad=tcs2016$G2,mobile=tcs2016$G3)
a2 <- data.frame(year=2017, pc=tcs2017$H1, pad=tcs2017$H2,mobile=tcs2017$H3)
a3 <- data.frame(year=2018, pc=tcs2018$D2, pad=tcs2018$D3,mobile=tcs2018$D4)
a4 <- data.frame(year=2019, pc=tcs2019$h1, pad=tcs2019$h2,mobile=tcs2019$h3)
df1 <- rbind(a1,a2,a3,a4)
# (3) 運用plyr套件,計算各年不同裝置的平均上網天數
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:sjlabelled':
##
## as_label
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df2 <- df1 %>%
group_by(year) %>%
summarise(pc = mean(pc, na.rm=T),
pad = mean(pad, na.rm=T),
mobile = mean(mobile, na.rm=T))
# (4) 透過tidyr套件中的gather將寬格式轉為長格式,以利繪圖
# install.packages("tidyr")
library(tidyr)
df3 <- gather(df2, key = "device", value = "day", pc,pad,mobile)
## (4) 製圖
# 載入 ggplot2
library(ggplot2)
# 解決Rstudio cloud圖形中文顯示問題
# install.packages("showtext")
library(showtext)
showtext_auto()
# a.繪製折線圖
library(ggrepel)
library(RColorBrewer)
ggplot(df3, aes(x=year, y=day, color=device))+
geom_line()+
geom_point()+
labs(title = "2016-2019年台灣民眾的使用不同上網裝置的趨勢",
x="年度",y="天數",
subtitle="手機使用天數有逐年增加嗎?",
caption="金寶製 資料來源:台灣傳播調查資料庫")+
theme_classic()+
theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
scale_y_continuous(limits=c(0,7))+
theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
geom_text(mapping = aes(label = sprintf("%.1f",day)),
size = 3, colour = 'black',vjust = -2, hjust = .5)+
scale_color_discrete(name ="上網裝置",
breaks=c("mobile", "pad","pc"),
labels=c("手機", "平板","電腦")) #圖例scale_color_discrete

# b.繪製長條圖
ggplot(df3,aes(x = year,y=day,fill = device))+
geom_bar(stat = "identity",position = "dodge")+
labs(title = "2015-2019年台灣民眾的上網率的趨勢",
x="年度",y="天數",
subtitle="上網率有逐年增加嗎?",
caption="金寶製 資料來源:台灣傳播調查資料庫")+
theme_classic()+
theme(axis.title.y = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
scale_y_continuous(limits=c(0,7))+
theme(plot.title = element_text(vjust = 2,hjust = 0.5))+
geom_text(mapping = aes(label = sprintf("%.1f",day)),
size = 3, colour = 'black',vjust = -1, hjust = 0.5,
position = position_dodge(width = 1))+
scale_fill_manual(values=c("#F37A81", "#72B7F7", "#3E76AB"),
name ="上網裝置",
breaks=c("mobile", "pad","pc"),
labels=c("手機", "平板","電腦"))
