library(car)
## Loading required package: carData
library(naniar)
library(foreign)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(knitr)
library(lavaan) ##lavaan套件
## This is lavaan 0.6-3
## lavaan is BETA software! Please report any bugs.
library(semPlot)
library(semTools)
##
## ###############################################################################
## This is semTools 0.5-1
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
options(digits = 3,scipen = 999,warn = -1)
setwd("/Users/huangzongxian/Desktop/高等量化分析/學期報告/Data")
rm(list=ls())
data <- read.spss("Telephone.sav",to.data.frame = T,use.value.labels = F)
原本的資料當中有許多不會使用到的變項,我們可以指定vars為想要的變數名稱,再透過data[names(data) %in% vars]取出想要的16個變項。
A1為電子化政府使用者,其中數值為5的為「幾乎未使用」的族群,因為本研究是以電子化整府的使用者為主,故將只取A1數值為1~4受訪者出來,經篩選過後剩餘329份有效樣本。
以summary()查看一下資料,發現最大值Max.為98,代表資料中帶有遺漏值存在,後續需先進行資料處理。
vars <- c("A1","B1","B2","B3","C1","C2","C3","C4","C5","D1","D2","D3","E1","E2","E3","E4")
data<- data[names(data) %in% vars]
data.2<- data[data$A1 %in% c(1,2,3,4),]
summary(data.2)
## A1 B1 D3 C1 C2
## Min. :1.0 Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 1.0
## 1st Qu.:4.0 1st Qu.: 3.0 1st Qu.: 3.0 1st Qu.: 3.0 1st Qu.: 2.0
## Median :4.0 Median : 3.0 Median : 3.0 Median : 3.0 Median : 3.0
## Mean :3.6 Mean :16.4 Mean : 8.5 Mean :29.7 Mean :28.4
## 3rd Qu.:4.0 3rd Qu.: 3.0 3rd Qu.: 3.0 3rd Qu.:98.0 3rd Qu.:98.0
## Max. :4.0 Max. :98.0 Max. :98.0 Max. :98.0 Max. :98.0
## D2 C3 C4 C5 B2
## Min. : 1 Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 1
## 1st Qu.: 2 1st Qu.: 3.0 1st Qu.: 2.0 1st Qu.: 2.0 1st Qu.: 4
## Median : 3 Median : 3.0 Median : 3.0 Median : 3.0 Median : 4
## Mean : 7 Mean :19.1 Mean :29.3 Mean :27.3 Mean :10
## 3rd Qu.: 3 3rd Qu.: 3.0 3rd Qu.:98.0 3rd Qu.:95.0 3rd Qu.: 4
## Max. :98 Max. :98.0 Max. :98.0 Max. :98.0 Max. :98
## B3 D1 E1 E2 E3
## Min. : 1.0 Min. : 1.0 Min. : 1.0 Min. : 1 Min. : 1.0
## 1st Qu.: 3.0 1st Qu.: 2.0 1st Qu.: 1.0 1st Qu.: 2 1st Qu.: 1.0
## Median : 3.0 Median : 3.0 Median : 2.0 Median : 2 Median : 2.0
## Mean :11.4 Mean : 5.5 Mean : 5.5 Mean : 5 Mean : 3.9
## 3rd Qu.: 4.0 3rd Qu.: 4.0 3rd Qu.: 2.0 3rd Qu.: 3 3rd Qu.: 3.0
## Max. :98.0 Max. :98.0 Max. :98.0 Max. :98 Max. :98.0
## E4
## Min. : 1.0
## 1st Qu.: 2.0
## Median : 3.0
## Mean : 6.3
## 3rd Qu.: 4.0
## Max. :98.0
首先,E4、D1這兩題在問卷當中是採反向問項,故先進行反向編碼。
replace_with_na_all可以指定特定數值轉為NA,再summary()一次就可以看到NA已經被挑出來了。
data.2$E4<- Recode(data.2$E4,"1=4;2=3;3=2;4=1") #反向編碼E4
data.2$D1 <- Recode(data.2$D1,"1=4;2=3;3=2;4=1") #反向編碼D1
data.3<- replace_with_na_all(data.2,condition = ~.x %in% c(98,95)) #遺漏轉成NA
summary(data.3)
## A1 B1 D3 C1 C2
## Min. :1.0 Min. :1.0 Min. :1.00 Min. :1.0 Min. :1.0
## 1st Qu.:4.0 1st Qu.:3.0 1st Qu.:3.00 1st Qu.:2.0 1st Qu.:2.0
## Median :4.0 Median :3.0 Median :3.00 Median :3.0 Median :3.0
## Mean :3.6 Mean :2.8 Mean :3.01 Mean :2.8 Mean :2.6
## 3rd Qu.:4.0 3rd Qu.:3.0 3rd Qu.:3.00 3rd Qu.:3.0 3rd Qu.:3.0
## Max. :4.0 Max. :4.0 Max. :4.00 Max. :4.0 Max. :4.0
## NA's :47 NA's :19 NA's :93 NA's :89
## D2 C3 C4 C5 B2
## Min. :1.00 Min. :1.0 Min. :1.0 Min. :1.0 Min. :1.00
## 1st Qu.:2.00 1st Qu.:3.0 1st Qu.:2.0 1st Qu.:2.0 1st Qu.:4.00
## Median :3.00 Median :3.0 Median :3.0 Median :2.0 Median :4.00
## Mean :2.68 Mean :2.9 Mean :2.6 Mean :2.4 Mean :3.95
## 3rd Qu.:3.00 3rd Qu.:3.0 3rd Qu.:3.0 3rd Qu.:3.0 3rd Qu.:4.00
## Max. :4.00 Max. :4.0 Max. :4.0 Max. :4.0 Max. :5.00
## NA's :15 NA's :56 NA's :92 NA's :88 NA's :21
## B3 D1 E1 E2
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:3.00 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:2.00
## Median :3.00 Median :2.00 Median :2.00 Median :2.00
## Mean :3.08 Mean :2.15 Mean :1.96 Mean :2.11
## 3rd Qu.:4.00 3rd Qu.:3.00 3rd Qu.:2.00 3rd Qu.:3.00
## Max. :4.00 Max. :4.00 Max. :4.00 Max. :4.00
## NA's :29 NA's :9 NA's :12 NA's :10
## E3 E4
## Min. :1.00 Min. :1.00
## 1st Qu.:1.00 1st Qu.:2.00
## Median :2.00 Median :2.00
## Mean :2.12 Mean :2.18
## 3rd Qu.:3.00 3rd Qu.:3.00
## Max. :4.00 Max. :4.00
## NA's :6 NA's :12
missmap可以讓我們看一下遺漏值的分布狀況,可以看到遺漏值總共大約佔11%,有點不太理想。library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(data.3)
data.4<- data.frame(sapply(data.3, na.aggregate)) #用平均數插補
summary(data.4)
## A1 B1 D3 C1 C2
## Min. :1.0 Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:4.0 1st Qu.:2.81 1st Qu.:3.00 1st Qu.:2.81 1st Qu.:2.00
## Median :4.0 Median :3.00 Median :3.00 Median :2.81 Median :2.64
## Mean :3.6 Mean :2.81 Mean :3.01 Mean :2.81 Mean :2.64
## 3rd Qu.:4.0 3rd Qu.:3.00 3rd Qu.:3.00 3rd Qu.:3.00 3rd Qu.:3.00
## Max. :4.0 Max. :4.00 Max. :4.00 Max. :4.00 Max. :4.00
## D2 C3 C4 C5
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:2.00 1st Qu.:2.95 1st Qu.:2.00 1st Qu.:2.00
## Median :3.00 Median :3.00 Median :2.57 Median :2.41
## Mean :2.68 Mean :2.95 Mean :2.57 Mean :2.41
## 3rd Qu.:3.00 3rd Qu.:3.00 3rd Qu.:3.00 3rd Qu.:3.00
## Max. :4.00 Max. :4.00 Max. :4.00 Max. :4.00
## B2 B3 D1 E1
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:4.00 1st Qu.:3.00 1st Qu.:1.00 1st Qu.:1.00
## Median :4.00 Median :3.00 Median :2.00 Median :2.00
## Mean :3.95 Mean :3.08 Mean :2.15 Mean :1.96
## 3rd Qu.:4.00 3rd Qu.:4.00 3rd Qu.:3.00 3rd Qu.:2.00
## Max. :5.00 Max. :4.00 Max. :4.00 Max. :4.00
## E2 E3 E4
## Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:2.00 1st Qu.:1.00 1st Qu.:2.00
## Median :2.00 Median :2.00 Median :2.00
## Mean :2.11 Mean :2.12 Mean :2.18
## 3rd Qu.:3.00 3rd Qu.:3.00 3rd Qu.:3.00
## Max. :4.00 Max. :4.00 Max. :4.00
missmap(data.4)
透過MVN套件當中的mvn可以檢查多元常態性問題,如同課本2-28當中所寫,最簡單的方式就是利用Mardia指標,除了LISREL之外,R也有相同功能可以檢測。
然後我們可以發現一件事…
沒有任何一個變數通過常態性檢定!!!!
library(MVN)
## sROC 0.1-2 loaded
mvn(data.4,mvnTest = "mardia")
## $multivariateNormality
## Test Statistic
## 1 Mardia Skewness 2425.14110955012
## 2 Mardia Kurtosis 35.8633233129084
## 3 MVN <NA>
## p value
## 1 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000382244379282419
## 2 0
## 3 <NA>
## Result
## 1 NO
## 2 NO
## 3 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk A1 0.523 <0.001 NO
## 2 Shapiro-Wilk B1 0.691 <0.001 NO
## 3 Shapiro-Wilk D3 0.629 <0.001 NO
## 4 Shapiro-Wilk C1 0.852 <0.001 NO
## 5 Shapiro-Wilk C2 0.872 <0.001 NO
## 6 Shapiro-Wilk D2 0.836 <0.001 NO
## 7 Shapiro-Wilk C3 0.597 <0.001 NO
## 8 Shapiro-Wilk C4 0.869 <0.001 NO
## 9 Shapiro-Wilk C5 0.900 <0.001 NO
## 10 Shapiro-Wilk B2 0.708 <0.001 NO
## 11 Shapiro-Wilk B3 0.804 <0.001 NO
## 12 Shapiro-Wilk D1 0.867 <0.001 NO
## 13 Shapiro-Wilk E1 0.829 <0.001 NO
## 14 Shapiro-Wilk E2 0.857 <0.001 NO
## 15 Shapiro-Wilk E3 0.862 <0.001 NO
## 16 Shapiro-Wilk E4 0.868 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## A1 329 3.60 0.831 4.00 1 4 4.00 4 -1.8896 2.1782
## B1 329 2.81 0.523 3.00 1 4 2.81 3 -1.1594 2.9447
## D3 329 3.01 0.494 3.00 1 4 3.00 3 -0.5944 3.5722
## C1 329 2.81 0.679 2.81 1 4 2.81 3 -0.3301 0.6522
## C2 329 2.64 0.618 2.64 1 4 2.00 3 -0.2201 0.8453
## D2 329 2.68 0.716 3.00 1 4 2.00 3 -0.4074 0.1429
## C3 329 2.95 0.463 3.00 1 4 2.95 3 -1.0255 5.1268
## C4 329 2.57 0.620 2.57 1 4 2.00 3 -0.4373 0.9214
## C5 329 2.41 0.725 2.41 1 4 2.00 3 -0.0953 0.1701
## B2 329 3.95 0.720 4.00 1 5 4.00 4 -1.3335 3.2781
## B3 329 3.08 0.789 3.00 1 4 3.00 4 -0.8617 0.6955
## D1 329 2.15 0.880 2.00 1 4 1.00 3 0.3478 -0.6190
## E1 329 1.96 0.756 2.00 1 4 1.00 2 0.4988 -0.0354
## E2 329 2.11 0.821 2.00 1 4 2.00 3 0.3831 -0.3635
## E3 329 2.12 0.856 2.00 1 4 1.00 3 0.3461 -0.5680
## E4 329 2.18 0.900 2.00 1 4 2.00 3 0.4025 -0.5681
cfamodel,將relationships都安排妥當cfamodel = '
users =~ B1 + B2 + B3
account =~ C1 + C2 + C3 +C4+C5
belief =~ D1 + D2 + D3
free =~ E1+E2+E3 +E4'
cfa_fit <- cfa(cfamodel, data=data.4,estimator = "ML",
orthogonal=FALSE)
summary(cfa_fit)
## lavaan 0.6-3 ended normally after 90 iterations
##
## Optimization method NLMINB
## Number of free parameters 36
##
## Number of observations 329
##
## Estimator ML
## Model Fit Test Statistic 170.408
## Degrees of freedom 84
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Information Expected
## Information saturated (h1) model Structured
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## users =~
## B1 1.000
## B2 1.491 0.220 6.781 0.000
## B3 1.171 0.216 5.415 0.000
## account =~
## C1 1.000
## C2 0.901 0.130 6.957 0.000
## C3 0.525 0.090 5.858 0.000
## C4 0.893 0.129 6.906 0.000
## C5 0.977 0.148 6.620 0.000
## belief =~
## D1 1.000
## D2 2.630 0.994 2.644 0.008
## D3 2.013 0.756 2.663 0.008
## free =~
## E1 1.000
## E2 2.445 0.727 3.364 0.001
## E3 1.363 0.305 4.469 0.000
## E4 0.622 0.240 2.595 0.009
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## users ~~
## account 0.089 0.015 5.767 0.000
## belief 0.042 0.016 2.571 0.010
## free 0.002 0.007 0.327 0.743
## account ~~
## belief 0.042 0.017 2.521 0.012
## free 0.002 0.008 0.265 0.791
## belief ~~
## free 0.005 0.004 1.172 0.241
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .B1 0.210 0.018 11.376 0.000
## .B2 0.379 0.035 10.846 0.000
## .B3 0.535 0.044 12.243 0.000
## .C1 0.314 0.030 10.606 0.000
## .C2 0.263 0.025 10.680 0.000
## .C3 0.173 0.015 11.733 0.000
## .C4 0.268 0.025 10.755 0.000
## .C5 0.386 0.035 11.109 0.000
## .D1 0.749 0.059 12.671 0.000
## .D2 0.347 0.034 10.102 0.000
## .D3 0.147 0.017 8.747 0.000
## .E1 0.495 0.044 11.264 0.000
## .E2 0.228 0.121 1.885 0.059
## .E3 0.593 0.060 9.903 0.000
## .E4 0.778 0.062 12.579 0.000
## users 0.062 0.016 3.905 0.000
## account 0.145 0.031 4.689 0.000
## belief 0.024 0.017 1.364 0.173
## free 0.074 0.030 2.443 0.015
fitmeasures(cfa_fit,c("chisq","df","agfi","cfi","rmsea","srmr"))
## chisq df agfi cfi rmsea srmr
## 170.408 84.000 0.908 0.866 0.056 0.059
account的信度大於0.6SMC <- function(fit, xlst) {
p <- parameterEstimates(fit, standardized=T)
a <- p$std.all[p$op == "=~" & p$rhs %in% xlst]^2
b <- p$std.all[p$op == "~~" & p$rhs %in% xlst]
a/(a+b)
}
cbind(sort(names(data.3))[2:16],
round(SMC(cfa_fit,sort(names(data.3))[2:16]),digits = 3))
## [,1] [,2]
## [1,] "B1" "0.229"
## [2,] "B2" "0.268"
## [3,] "B3" "0.138"
## [4,] "C1" "0.316"
## [5,] "C2" "0.309"
## [6,] "C3" "0.187"
## [7,] "C4" "0.302"
## [8,] "C5" "0.265"
## [9,] "D1" "0.031"
## [10,] "D2" "0.321"
## [11,] "D3" "0.396"
## [12,] "E1" "0.13"
## [13,] "E2" "0.66"
## [14,] "E3" "0.189"
## [15,] "E4" "0.036"
CR <- function(fit, xlst) {
p <- parameterEstimates(fit, standardized=T)
a <- sum(p$std.all[p$op == "=~" & p$rhs %in% xlst])^2
b <- sum(p$std.all[p$op == "~~" & p$rhs %in% xlst])
a / (a+b) }
cfamodel = '
users =~ B1 + B2 + B3
account =~ C1 + C2 + C3 +C4+C5
belief =~ D1 + D2 + D3
free =~ E1+E2+E3 +E4'
rbind(cbind("users",round(CR(cfa_fit,c("B1","B2","B3")),digits = 3)),
cbind("account",round(CR(cfa_fit,c("C1","C2","C3","C4","C5")),digits = 3)),
cbind("belief",round(CR(cfa_fit,c("D1","D2","D3")),digits = 3)),
cbind("free",round(CR(cfa_fit,c("E1","E2","E3","E4")),digits = 3)))
## [,1] [,2]
## [1,] "users" "0.442"
## [2,] "account" "0.654"
## [3,] "belief" "0.455"
## [4,] "free" "0.519"
AVE <- function(fit, xlst) {
p <- parameterEstimates(fit, standardized=T)
sum(p$std.all[p$op == "=~" & p$rhs %in% xlst]^2) / length(xlst) }
rbind(cbind("users",round(AVE(cfa_fit,c("B1","B2","B3")),digits = 3)),
cbind("account",round(AVE(cfa_fit,c("C1","C2","C3","C4","C5")),digits = 3)),
cbind("belief",round(AVE(cfa_fit,c("D1","D2","D3")),digits = 3)),
cbind("free",round(AVE(cfa_fit,c("E1","E2","E3","E4")),digits = 3)))
## [,1] [,2]
## [1,] "users" "0.212"
## [2,] "account" "0.276"
## [3,] "belief" "0.249"
## [4,] "free" "0.254"
cfamod_ind<- modificationIndices(cfa_fit)
head(cfamod_ind[order(cfamod_ind$mi,decreasing = T),],10)
## lhs op rhs mi epc sepc.lv sepc.all sepc.nox
## 125 C1 ~~ C2 26.18 0.105 0.105 0.364 0.364
## 146 C3 ~~ C4 20.29 0.063 0.063 0.291 0.291
## 172 D1 ~~ E1 18.85 0.150 0.150 0.246 0.246
## 176 D2 ~~ D3 17.40 -0.329 -0.329 -1.456 -1.456
## 188 E2 ~~ E3 15.96 0.707 0.707 1.921 1.921
## 71 belief =~ E1 15.34 1.260 0.194 0.257 0.257
## 42 users =~ C2 10.79 -3.204 -0.801 -1.297 -1.297
## 72 belief =~ E2 9.54 -1.607 -0.248 -0.302 -0.302
## 49 users =~ E1 9.18 0.555 0.139 0.184 0.184
## 108 B2 ~~ D3 8.86 0.056 0.056 0.238 0.238
semPaths(cfa_fit,
what = "equality",
whatLabels = "std", # est = 係數,std = factor loading
groups = "latents", # 依latent 分組上色
pastel = TRUE, # 柔和色
mar = c(3, 1, 3, 1), # 邊界
intercepts = TRUE, # 殘差顯示
edge.label.cex = 0.8, # label 的字體大小
sizeMan = 7, # measurement variable 顯示大小
sizeLat = 10, # latent variable 顯示大小
exoCov = FALSE, # 相互影響的線不要
optimizeLatRes = TRUE, # 讓latent 的殘差長整齊一點
style="lisrel", # 讓殘差格式跟LISREL一樣
layout="tree"
)