0. Data pre-processing

Variables Description Value
ovalue 請問,您對政府網站中提供給民眾的資訊滿不滿意? 1=非常不滿意; 2=不滿意; 3=滿意; 4=非常滿意
dvalue 請問,您常不常透過網路社群將您覺得重要的公共問題傳給其他人? 1=從來沒有; 2=很少; 3=有時; 4=經常
svalue 如果沒有網路,您的生活會變得比較快樂、還是比較不快樂? 1=比以前不快樂很多; 2=比以前不快樂一些; 3=比以前快樂一些; 4=比以前快樂很多
inter 網路使用 1=有;0=無
gender 性別 1=男性; 2=女性
edu 教育程度 1=小學及以下; 2=國初中; 3=高中職; 4=大專及以上
age 年齡 1=15-19歲; 2=20-29歲; 3=30-39歲; 4=40-49歲; 5=50-59歲; 6=60歲以上
w 權值 NULL
library(foreign)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(ggstatsplot)
library(magrittr)
library(car)
## Loading required package: carData
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
## The following object is masked from 'package:Matrix':
## 
##     expand
library(tibble)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)

1. 2013~2017 Public Value of Digital Governance (數位治理公共價值指標)

rm(list=ls())
options(digits = 3)
load("/Users/huangzongxian/Desktop/RLadies/Data/datlis.RData") # 讀取整理好的Rdata檔

#你也可以將 list 拆分成5個data.frame來執行喔,只是會稍微麻煩一點
#for (i in 1:5L){
#assign(paste0("data", i),datlis[[i]])}#


#Using weight
dwlist<- lapply(datlis,function(x){
  svydesign(ids = ~1, data = x, weights = x$w)
})



#2013~2017 操作性價值
#各年度結果
lapply(dwlist,function(x){svytable(~ovalue, design=x) %>%
  prop.table()})  %>%
  do.call(rbind,.) %>% 
  cbind(seq(2013,2017),.)    %>%
  set_colnames(c("year",names(svytable(~ovalue, design=dwlist[[1]]))))
##      year 不滿意 非常不滿意 非常滿意  滿意
## [1,] 2013  0.182     0.0210   0.0382 0.759
## [2,] 2014  0.224     0.0339   0.0676 0.674
## [3,] 2015  0.171     0.0347   0.0447 0.749
## [4,] 2016  0.178     0.0320   0.0433 0.747
## [5,] 2017  0.255     0.0335   0.0355 0.676
#透過tibble與ggplot2的合作來plot
lapply(dwlist,function(x){svytable(~ovalue, design=x) %>%
  prop.table()})  %>%
  do.call(rbind,.) %>% 
  cbind(seq(2013,2017),.)    %>%
  set_colnames(c("year",names(svytable(~ovalue, design=dwlist[[1]]))))    %>% 
  as_tibble() %>% 
  gather(terms, # this will be the new column for the  key columns
         value, # this will contain the  values
         c("非常不滿意","不滿意","滿意","非常滿意"), 
         na.rm = TRUE 
  ) %>%
  ggplot(data=.,aes(x=year,y=value,group=factor(terms),col=terms,linetype=terms))+
         geom_line() + 
         geom_point() +  
  labs(x = "時間", y = "percent", title = "請問,您對政府網站中提供給民眾的資訊滿不滿意?") + 
  theme(text=element_text(family="黑體-繁 中黑", size=13),axis.ticks = element_blank())

#2013~2017 政治性價值
#
lapply(dwlist,function(x){svytable(~dvalue, design=x) %>%
  prop.table()})  %>%
  do.call(rbind,.) %>% 
  cbind(seq(2013,2017),.)    %>%
  set_colnames(c("year",names(svytable(~dvalue, design=dwlist[[1]]))))
##      year 從來沒有  很少   經常  有時
## [1,] 2013    0.534 0.228 0.0866 0.151
## [2,] 2014    0.465 0.246 0.0872 0.202
## [3,] 2015    0.383 0.255 0.1167 0.246
## [4,] 2016    0.375 0.194 0.1430 0.288
## [5,] 2017    0.483 0.258 0.0917 0.167
#
lapply(dwlist,function(x){svytable(~dvalue, design=x) %>%
  prop.table()})  %>%
  do.call(rbind,.) %>% 
  cbind(seq(2013,2017),.)    %>%
  set_colnames(c("year",names(svytable(~dvalue, design=dwlist[[1]]))))    %>% 
  as_tibble() %>% 
  gather(terms, # this will be the new column for the  key columns
         value, # this will contain the  values
         c("從來沒有","很少","有時","經常"), # this is the range of columns we want gathered
         na.rm = TRUE # handles missing
  ) %>%
  ggplot(data=.,aes(x=year,y=value,group=factor(terms),col=terms,linetype=terms))+
  geom_line() + geom_point() +  
  labs(x = "時間", y = "percent", 
       title = "請問,您常不常透過網路社群將您覺得重要的公共問題傳給其他人?") + 
  theme(text=element_text(family="黑體-繁 中黑", size=13),axis.ticks = element_blank())

#2013~2017 社會性價值
#
lapply(dwlist,function(x){svytable(~svalue, design=x) %>%
  prop.table()})  %>%
  do.call(rbind,.) %>% 
  cbind(seq(2013,2017),.)    %>%
  set_colnames(c("year",names(svytable(~svalue, design=dwlist[[1]])))) 
##      year 比以前不快樂很多 比以前不快樂一些 比以前快樂很多 比以前快樂一些
## [1,] 2013            0.222            0.443         0.0817          0.253
## [2,] 2014            0.248            0.442         0.0894          0.221
## [3,] 2015            0.210            0.402         0.0947          0.293
## [4,] 2016            0.265            0.422         0.0826          0.230
## [5,] 2017            0.275            0.430         0.1099          0.185
#
lapply(dwlist,function(x){svytable(~svalue, design=x) %>%
  prop.table()})  %>%
  do.call(rbind,.) %>% 
  cbind(seq(2013,2017),.)    %>%
  set_colnames(c("year",names(svytable(~svalue, design=dwlist[[1]]))))    %>% 
  as_tibble() %>% 
  gather(terms, # this will be the new column for the  key columns
         value, # this will contain the  values
         c("比以前不快樂很多","比以前不快樂一些","比以前快樂很多","比以前快樂一些"), # this is the range of columns we want gathered
         na.rm = TRUE # handles missing
  ) %>%
  ggplot(data=.,aes(x=year,y=value,group=factor(terms),col=terms,linetype=terms))+
  geom_line() + geom_point() +  
  labs(x = "時間", y = "percent", 
       title = "如果沒有網路,您的生活會變得比較快樂、還是比較不快樂?") + 
  theme(text=element_text(family="黑體-繁 中黑", size=13),axis.ticks = element_blank())

2. Digital Divide

# 沒有任何一年的p-value < 0.05,無法推翻不同性別使用網路者比率相同的虛無假設
lapply(dwlist,function(x){svychisq(~gender+inter, design=x, statistic="Chisq")})
## [[1]]
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~gender + inter, design = x, statistic = "Chisq")
## X-squared = 0.3, df = 1, p-value = 0.6
## 
## 
## [[2]]
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~gender + inter, design = x, statistic = "Chisq")
## X-squared = 1, df = 1, p-value = 0.4
## 
## 
## [[3]]
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~gender + inter, design = x, statistic = "Chisq")
## X-squared = 0.2, df = 1, p-value = 0.7
## 
## 
## [[4]]
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~gender + inter, design = x, statistic = "Chisq")
## X-squared = 0.01, df = 1, p-value = 0.9
## 
## 
## [[5]]
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~gender + inter, design = x, statistic = "Chisq")
## X-squared = 0.07, df = 1, p-value = 0.8
# 各年度比例
lapply(dwlist,function(x){prop.table(svytable(~gender+inter, design=x),margin = 1)})
## [[1]]
##       inter
## gender     0     1
##      1 0.300 0.700
##      2 0.316 0.684
## 
## [[2]]
##       inter
## gender     0     1
##      1 0.309 0.691
##      2 0.338 0.662
## 
## [[3]]
##       inter
## gender     0     1
##      1 0.306 0.694
##      2 0.319 0.681
## 
## [[4]]
##       inter
## gender     0     1
##      1 0.318 0.682
##      2 0.320 0.680
## 
## [[5]]
##       inter
## gender     0     1
##      1 0.186 0.814
##      2 0.189 0.811

3. Weight vs. Unweight

#Age
data <- c()
for (i in 1L:5L){
  term <- names(prop.table(table(datlis[[1]]$age))*100)
  date <- rep((i+2012),6)
  start<- lapply(datlis,function(x){prop.table(table(x$age))*100})[[i]] %>% as.character() %>% as.numeric()
  end<- lapply(dwlist,function(x){prop.table(svytable(~age,design=x))*100}) [[i]] %>%    as.character() %>% as.numeric()
  amount <- end-start %>% as.character() %>% as.numeric()
  type <- ifelse(amount > 0, "Postive","Negative")
  dt <- cbind(term,date,start,end,type,amount)
  data <- rbind(data,dt)
}

data<- as.data.frame(data)
data$date<- data$date %>% as.character() %>% as.numeric() 
data$amount<- data$amount %>% as.character() %>% as.numeric() 

#plot
data %>%
  ggplot(aes(x = date
           , y = amount
           , fill = term)) +
  geom_col(position = "dodge") +
  scale_fill_brewer(palette = "Dark2") + 
  labs(x = "時間", y = "差比(%)", title = "populations - survey results (AGE)") +
  theme(text=element_text(family="黑體-繁 中黑", size=13),axis.ticks = element_blank())

#Education
data <- c()
for (i in 1L:5L){
  term <- names(prop.table(table(datlis[[1]]$edu))*100)
  date <- rep((i+2012),4)
  start<- lapply(datlis,function(x){prop.table(table(x$edu))*100})[[i]] %>% as.character() %>% as.numeric()
  end<- lapply(dwlist,function(x){prop.table(svytable(~edu,design=x))*100}) [[i]] %>%    as.character() %>% as.numeric()
  amount <- end-start %>% as.character() %>% as.numeric()
  type <- ifelse(amount > 0, "Postive","Negative")
  dt <- cbind(term,date,start,end,type,amount)
  data <- rbind(data,dt)
}

data<- as.data.frame(data)
data$date<- data$date %>% as.character() %>% as.numeric() 
data$amount<- data$amount %>% as.character() %>% as.numeric() 

#plot
data %>%
  ggplot(aes(x = date
           , y = amount
           , fill = term)) +
  geom_col(position = "dodge") +
  scale_fill_brewer(palette = "Dark2") + 
  labs(x = "時間", y = "差比(%)", title = "populations - survey results (Education)") +
  theme(text=element_text(family="黑體-繁 中黑", size=13),axis.ticks = element_blank())

#這張圖的結果非常有趣,因為2016年的調查公司恰好因為招標的原因跟其他四年都不一樣(剛好因為我是助理所以才知道,不然報告書上沒有寫XD),看來機構效應可能一定程度造成教育程度代表性的差異...!?

4. 當天沒有處理到的一個提問

data<- datlis[[5]] # 2017 data
data$edu<- Recode(data$edu,"'小學及以下'=1;'國初中'=2;'高中職'=3;'大專及以上'=4") %>% as.numeric()
data$age<- Recode(data$age,"'15-19歲'=1;'20-29歲'=2;'30-39歲'=3;'40-49歲'=4;'50-59歲'=5;'60歲以上'=6") %>% as.numeric()
data$dvalue<- Recode(data$dvalue,"'從來沒有'=1;'很少'=2;'有時'=3;'經常'=4")

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ordinal)
## 
## Attaching package: 'ordinal'
## The following object is masked from 'package:dplyr':
## 
##     slice
clm1<- clm(dvalue~edu+age+gender,data=data)
ggstatsplot::ggcoefstats(clm1,caption.summary=F,title = "Ordinal Regression Model")