1. Read and Create Data
t = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 3-Mode shift\\So lieu mode choice tu phan 1\\7. Data MSI - 810.csv"
MS = read.csv(t, header = T, sep = ";")
head(MS)
## ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1 1 0 1 6 3 3 1 2 10 1 1 0 4 1 0 1 15 1 3 3 3 4 4
## 2 2 0 1 3 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5
## 3 3 0 1 3 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4
## 4 4 0 1 3 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4
## 5 5 0 1 3 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4
## 6 6 0 1 4 1 4 1 20 30 1 1 0 5 3 0 2 50 0 3 2 4 4 4
## BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1 4 4 3 4 4 4 4 1 1 1 0 0 5 4 2 2 0 0 0 0 0 1 2
## 2 2 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3
## 3 3 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3
## 4 4 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3
## 5 2 2 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2
## 6 3 3 3 2 4 4 4 2 0 0 0 1 4 3 4 2 1 1 1 1 1 1 2
## NR
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
str(MS$MSI)
## int [1:810] 0 0 0 0 0 0 0 1 0 0 ...
# View data (number of rows and colume)
dim(MS)
## [1] 810 47
# Data non bus
MotorS = subset(MS,TM == 3)
MotorS = MotorS[, 2:47]
head(MotorS)
## CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE
## 2 0 1 3 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5 2
## 3 0 1 3 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4 3
## 4 0 1 3 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4 4
## 5 0 1 3 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4 2
## 8 0 1 3 1 4 1 10 25 1 1 0 2 3 1 2 5 1 3 2 5 5 5 3
## 9 0 1 3 1 4 2 12 30 1 1 1 2 1 0 1 7 1 3 3 4 4 4 3
## BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 2 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3 0
## 3 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3 0
## 4 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3 0
## 5 2 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2 0
## 8 3 3 3 5 5 5 1 1 2 1 0 4 7 2 0 1 0 0 1 0 0 2 0
## 9 3 3 3 4 4 4 1 1 3 0 0 3 7 3 1 1 0 1 1 0 1 3 0
dim(MotorS)
## [1] 516 46
# Sumerize Data
summary(MotorS)
## CA BC TM TP FR
## Min. :0.0000 Min. :0.0000 Min. :3 Min. :1.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:3 1st Qu.:1.000 1st Qu.:4.000
## Median :0.0000 Median :1.0000 Median :3 Median :1.000 Median :4.000
## Mean :0.4516 Mean :0.5116 Mean :3 Mean :1.527 Mean :3.736
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:3 3rd Qu.:2.000 3rd Qu.:4.000
## Max. :1.0000 Max. :1.0000 Max. :3 Max. :4.000 Max. :4.000
## DT DI TI SC
## Min. :1.000 Min. : 0.200 Min. : 0.00 Min. :0.0000
## 1st Qu.:1.000 1st Qu.: 3.000 1st Qu.: 10.00 1st Qu.:1.0000
## Median :1.000 Median : 5.000 Median : 15.00 Median :1.0000
## Mean :1.804 Mean : 6.289 Mean : 15.84 Mean :0.8469
## 3rd Qu.:3.000 3rd Qu.:10.000 3rd Qu.: 20.00 3rd Qu.:1.0000
## Max. :4.000 Max. :30.000 Max. :180.00 Max. :1.0000
## LS TS MR WE
## Min. :0.0000 Min. :0.0000 Min. :1.000 Min. :1.000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:1.000
## Median :1.0000 Median :1.0000 Median :2.000 Median :1.000
## Mean :0.7868 Mean :0.7403 Mean :2.924 Mean :1.616
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :1.0000 Max. :3.0000 Max. :6.000 Max. :3.000
## WK NBR CO BUB
## Min. :0.0000 Min. :1.000 Min. : 0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.: 5.000 1st Qu.:0.0000
## Median :0.0000 Median :2.000 Median :10.000 Median :1.0000
## Mean :0.2229 Mean :2.841 Mean : 9.595 Mean :0.6163
## 3rd Qu.:0.0000 3rd Qu.:4.000 3rd Qu.:12.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :6.000 Max. :50.000 Max. :1.0000
## SBM SBS BST BSC BSA
## Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:3.00 1st Qu.:3.000
## Median :2.00 Median :2.000 Median :3.000 Median :4.00 Median :4.000
## Mean :2.63 Mean :2.471 Mean :2.684 Mean :3.45 Mean :3.578
## 3rd Qu.:3.00 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :5.00 Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000
## BRE BCO BIN BHE
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :4.000 Median :3.000 Median :3.000 Median :3.000
## Mean :3.351 Mean :3.103 Mean :3.025 Mean :2.818
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.250
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## BEN BRC BWE SP
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :0.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:1.000
## Median :4.000 Median :4.000 Median :4.000 Median :1.000
## Mean :3.715 Mean :3.733 Mean :3.895 Mean :1.014
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:1.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :2.000
## BI WI MSI GD
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.000 Median :0.0000 Median :1.0000
## Mean :0.5698 Mean :1.163 Mean :0.2422 Mean :0.5717
## 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :6.000 Max. :1.0000 Max. :1.0000
## AG OC IN NC
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.00 1st Qu.:0.0000
## Median :4.000 Median :4.000 Median :2.00 Median :1.0000
## Mean :3.669 Mean :4.496 Mean :2.18 Mean :0.7209
## 3rd Qu.:4.000 3rd Qu.:7.000 3rd Qu.:3.00 3rd Qu.:1.0000
## Max. :6.000 Max. :8.000 Max. :4.00 Max. :3.0000
## MC CC BO MO
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.9516 Mean :0.1376 Mean :0.3508 Mean :0.9186
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## RO NB NM NR
## Min. :0.00000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:2.000 1st Qu.:0.0000
## Median :0.00000 Median :1.000 Median :2.000 Median :0.0000
## Mean :0.07946 Mean :0.655 Mean :2.271 Mean :0.1705
## 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:3.000 3rd Qu.:0.0000
## Max. :1.00000 Max. :4.000 Max. :4.000 Max. :2.0000
head(MotorS$TM)
## [1] 3 3 3 3 3 3
str(MotorS)
## 'data.frame': 516 obs. of 46 variables:
## $ CA : int 0 0 0 0 0 0 0 0 1 1 ...
## $ BC : int 1 1 1 1 1 1 1 1 0 0 ...
## $ TM : int 3 3 3 3 3 3 3 3 3 3 ...
## $ TP : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FR : int 4 4 4 4 4 4 4 4 4 4 ...
## $ DT : int 1 3 1 3 1 2 1 1 1 1 ...
## $ DI : num 8 5 5 8 10 12 10 3 2 4 ...
## $ TI : num 15 15 10 15 25 30 25 5 5 10 ...
## $ SC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TS : int 1 1 1 0 0 1 1 0 1 0 ...
## $ MR : int 2 4 2 2 2 2 2 2 4 3 ...
## $ WE : int 3 1 1 3 3 1 3 1 3 1 ...
## $ WK : int 0 0 0 0 1 0 0 0 0 0 ...
## $ NBR: int 1 2 4 2 2 1 4 4 4 2 ...
## $ CO : num 5 3 10 5 5 7 7 3 3 10 ...
## $ BUB: int 0 1 1 1 1 1 1 1 1 1 ...
## $ SBM: int 2 2 2 2 3 3 3 1 4 4 ...
## $ SBS: int 2 2 2 2 2 3 2 1 3 2 ...
## $ BST: int 2 2 3 3 5 4 2 3 2 3 ...
## $ BSC: int 4 4 4 4 5 4 5 3 4 3 ...
## $ BSA: int 5 4 4 4 5 4 5 4 4 4 ...
## $ BRE: int 2 3 4 2 3 3 3 1 4 4 ...
## $ BCO: int 2 3 4 2 3 3 2 4 4 2 ...
## $ BIN: int 4 4 4 3 3 3 4 3 4 4 ...
## $ BHE: int 2 2 3 2 3 3 2 4 3 2 ...
## $ BEN: int 5 4 3 4 5 4 5 4 4 4 ...
## $ BRC: int 5 4 3 4 5 4 5 4 5 4 ...
## $ BWE: int 3 4 5 4 5 4 4 4 5 4 ...
## $ SP : int 2 1 1 2 1 1 1 1 1 1 ...
## $ BI : int 0 1 1 0 1 1 1 1 1 1 ...
## $ WI : int 0 2 1 0 2 3 2 1 1 1 ...
## $ MSI: int 0 0 0 0 1 0 0 0 1 1 ...
## $ GD : int 0 1 1 0 0 0 1 1 1 1 ...
## $ AG : int 3 3 3 4 4 3 4 2 4 3 ...
## $ OC : int 6 2 2 6 7 7 3 1 7 7 ...
## $ IN : int 3 4 3 3 2 3 3 2 2 1 ...
## $ NC : int 1 0 0 1 0 1 0 0 1 0 ...
## $ MC : int 1 1 1 1 1 1 1 0 1 1 ...
## $ CC : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BO : int 1 0 1 0 0 1 0 1 0 0 ...
## $ MO : int 1 1 1 1 1 1 1 0 1 1 ...
## $ RO : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NB : int 1 0 1 0 0 1 0 1 2 0 ...
## $ NM : int 3 3 3 2 2 3 3 3 2 1 ...
## $ NR : int 0 0 0 0 0 0 0 0 0 0 ...
MotorS = MotorS [ , -3] # Bỏ biến TM
dim(MotorS)
## [1] 516 45
head(MotorS)
## CA BC TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE BCO
## 2 0 1 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5 2 2
## 3 0 1 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4 3 3
## 4 0 1 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4 4 4
## 5 0 1 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4 2 2
## 8 0 1 1 4 1 10 25 1 1 0 2 3 1 2 5 1 3 2 5 5 5 3 3
## 9 0 1 1 4 2 12 30 1 1 1 2 1 0 1 7 1 3 3 4 4 4 3 3
## BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3 0
## 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3 0
## 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3 0
## 5 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2 0
## 8 3 3 5 5 5 1 1 2 1 0 4 7 2 0 1 0 0 1 0 0 2 0
## 9 3 3 4 4 4 1 1 3 0 0 3 7 3 1 1 0 1 1 0 1 3 0
# Đổi biến AG (1-<=24,2-24-60,3->60)
MotorS$AG[MotorS$AG == 2] <- 1
MotorS$AG[MotorS$AG == 3] <- 1
MotorS$AG[MotorS$AG == 4] <- 2
MotorS$AG[MotorS$AG == 5] <- 2
MotorS$AG[MotorS$AG == 6] <- 3
head(MotorS$AG)
## [1] 1 1 1 2 2 1
# Đổi biến OC (1-HS/SV,2-CNVC,3-Nội trợ,4-LĐ tự do,5-Khác)
MotorS$OC[MotorS$OC == 2] <- 1
MotorS$OC[MotorS$OC == 3] <- 2
MotorS$OC[MotorS$OC == 6] <- 2
MotorS$OC[MotorS$OC == 4] <- 3
MotorS$OC[MotorS$OC == 5] <- 3
MotorS$OC[MotorS$OC == 7] <- 4
MotorS$OC[MotorS$OC == 8] <- 5
head(MotorS$OC)
## [1] 2 1 1 2 4 4
# Đổi biến FR (1-1l/t,2-2l/t,3->=3l/t)
MotorS$FR[MotorS$FR == 4] <- 3
head(MotorS$FR)
## [1] 3 3 3 3 3 3
# Đổi biến DT (1-CĐ/6-8h/16-18h,0-Ngoài giờ CĐ)
MotorS$DT[MotorS$DT == 3] <- 1
MotorS$DT[MotorS$DT == 2] <- 0
MotorS$DT[MotorS$DT == 4] <- 0
head(MotorS$DT)
## [1] 1 1 1 1 1 0
# Đổi biến TS (1-có, 2-không)
MotorS$TS[MotorS$TS == 2] <- 1
MotorS$TS[MotorS$TS == 3] <- 1
head(MotorS$TS)
## [1] 1 1 1 0 0 1
# Đổi biến WE (1-có mưa, 2-không)
MotorS$WE[MotorS$WE == 1] <- 0
MotorS$WE[MotorS$WE == 2] <- 1
MotorS$WE[MotorS$WE == 3] <- 0
head(MotorS$WE)
## [1] 0 0 0 0 0 0
# Đổi biến SBM (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$SBM[MotorS$SBM == 2] <- 1
MotorS$SBM[MotorS$SBM == 3] <- 2
MotorS$SBM[MotorS$SBM == 4] <- 3
MotorS$SBM[MotorS$SBM == 5] <- 3
head(MotorS$SBM)
## [1] 1 1 1 1 2 2
# Đổi biến SBS (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$SBS[MotorS$SBS == 2] <- 1
MotorS$SBS[MotorS$SBS == 3] <- 2
MotorS$SBS[MotorS$SBS == 4] <- 3
MotorS$SBS[MotorS$SBS == 5] <- 3
head(MotorS$SBS)
## [1] 1 1 1 1 1 2
# Đổi biến BST (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BST[MotorS$BST == 2] <- 1
MotorS$BST[MotorS$BST == 3] <- 2
MotorS$BST[MotorS$BST == 4] <- 3
MotorS$BST[MotorS$BST == 5] <- 3
head(MotorS$BST)
## [1] 1 1 2 2 3 3
# Đổi biến BSC (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BSC[MotorS$BSC == 2] <- 1
MotorS$BSC[MotorS$BSC == 3] <- 2
MotorS$BSC[MotorS$BSC == 4] <- 3
MotorS$BSC[MotorS$BSC == 5] <- 3
head(MotorS$BSC)
## [1] 3 3 3 3 3 3
# Đổi biến BSA (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BSA[MotorS$BSA == 2] <- 1
MotorS$BSA[MotorS$BSA == 3] <- 2
MotorS$BSA[MotorS$BSA == 4] <- 3
MotorS$BSA[MotorS$BSA == 5] <- 3
head(MotorS$BSA)
## [1] 3 3 3 3 3 3
# Đổi biến BRE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BRE[MotorS$BRE == 2] <- 1
MotorS$BRE[MotorS$BRE == 3] <- 2
MotorS$BRE[MotorS$BRE == 4] <- 3
MotorS$BRE[MotorS$BRE == 5] <- 3
head(MotorS$BRE)
## [1] 1 2 3 1 2 2
# Đổi biến BCO (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BCO[MotorS$BCO == 2] <- 1
MotorS$BCO[MotorS$BCO == 3] <- 2
MotorS$BCO[MotorS$BCO == 4] <- 3
MotorS$BCO[MotorS$BCO == 5] <- 3
head(MotorS$BCO)
## [1] 1 2 3 1 2 2
# Đổi biến BIN (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BIN[MotorS$BIN == 2] <- 1
MotorS$BIN[MotorS$BIN == 3] <- 2
MotorS$BIN[MotorS$BIN == 4] <- 3
MotorS$BIN[MotorS$BIN == 5] <- 3
head(MotorS$BIN)
## [1] 3 3 3 2 2 2
# Đổi biến BHE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BHE[MotorS$BHE == 2] <- 1
MotorS$BHE[MotorS$BHE == 3] <- 2
MotorS$BHE[MotorS$BHE == 4] <- 3
MotorS$BHE[MotorS$BHE == 5] <- 3
head(MotorS$BHE)
## [1] 1 1 2 1 2 2
# Đổi biến BEN (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BEN[MotorS$BEN == 2] <- 1
MotorS$BEN[MotorS$BEN == 3] <- 2
MotorS$BEN[MotorS$BEN == 4] <- 3
MotorS$BEN[MotorS$BEN == 5] <- 3
head(MotorS$BEN)
## [1] 3 3 2 3 3 3
# Đổi biến BRC (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BRC[MotorS$BRC == 2] <- 1
MotorS$BRC[MotorS$BRC == 3] <- 2
MotorS$BRC[MotorS$BRC == 4] <- 3
MotorS$BRC[MotorS$BRC == 5] <- 3
head(MotorS$BRC)
## [1] 3 3 2 3 3 3
# Đổi biến BWE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BWE[MotorS$BWE == 2] <- 1
MotorS$BWE[MotorS$BWE == 3] <- 2
MotorS$BWE[MotorS$BWE == 4] <- 3
MotorS$BWE[MotorS$BWE == 5] <- 3
head(MotorS$BWE)
## [1] 2 3 3 3 3 3
# Đổi biến NB (0-0xe,1-1xe,2->=2xe)
MotorS$NB[MotorS$NB == 3] <- 2
MotorS$NB[MotorS$NB == 4] <- 2
head(MotorS$NB)
## [1] 1 0 1 0 0 1
# Đổi biến NM (0-0xe,1-1xe,2-2xe,3->=3xe)
MotorS$NM[MotorS$NM == 3] <- 2
MotorS$NM[MotorS$NM == 4] <- 2
head(MotorS$NM)
## [1] 2 2 2 2 2 2
# Đổi biến NR (0-0xe,1-Có xe)
MotorS$NR[MotorS$NR == 2] <- 1
MotorS$NR[MotorS$NR == 3] <- 1
head(MotorS$NR)
## [1] 0 0 0 0 0 0
# Đổi biến NC (0-No,1-Yes)
MotorS$NC[MotorS$NC == 2] <- 1
MotorS$NC[MotorS$NC == 3] <- 1
head(MotorS$NC)
## [1] 1 0 0 1 0 1
# Đổi biến WI (0-No,1-Yes)
MotorS$WI[MotorS$WI >= 5] <- 4
MotorS$WI[MotorS$WI >= 6] <- 4
head(MotorS$WI)
## [1] 0 2 1 0 2 3
head(MotorS$BUB)
## [1] 0 1 1 1 1 1
head(MotorS)
## CA BC TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE BCO
## 2 0 1 1 3 1 8 15 1 1 1 2 0 0 1 5 0 1 1 1 3 3 1 1
## 3 0 1 1 3 1 5 15 1 1 1 4 0 0 2 3 1 1 1 1 3 3 2 2
## 4 0 1 1 3 1 5 10 1 1 1 2 0 0 4 10 1 1 1 2 3 3 3 3
## 5 0 1 1 3 1 8 15 1 1 0 2 0 0 2 5 1 1 1 2 3 3 1 1
## 8 0 1 1 3 1 10 25 1 1 0 2 0 1 2 5 1 2 1 3 3 3 2 2
## 9 0 1 1 3 0 12 30 1 1 1 2 0 0 1 7 1 2 2 3 3 3 2 2
## BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 2 3 1 3 3 2 2 0 0 0 0 1 2 3 1 1 0 1 1 0 1 2 0
## 3 3 1 3 3 3 1 1 2 0 1 1 1 4 0 1 0 0 1 0 0 2 0
## 4 3 2 2 2 3 1 1 1 0 1 1 1 3 0 1 0 1 1 0 1 2 0
## 5 2 1 3 3 3 2 0 0 0 0 2 2 3 1 1 0 0 1 0 0 2 0
## 8 2 2 3 3 3 1 1 2 1 0 2 4 2 0 1 0 0 1 0 0 2 0
## 9 2 2 3 3 3 1 1 3 0 0 1 4 3 1 1 0 1 1 0 1 2 0
dim(MotorS)
## [1] 516 45
str(MotorS)
## 'data.frame': 516 obs. of 45 variables:
## $ CA : int 0 0 0 0 0 0 0 0 1 1 ...
## $ BC : int 1 1 1 1 1 1 1 1 0 0 ...
## $ TP : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FR : num 3 3 3 3 3 3 3 3 3 3 ...
## $ DT : num 1 1 1 1 1 0 1 1 1 1 ...
## $ DI : num 8 5 5 8 10 12 10 3 2 4 ...
## $ TI : num 15 15 10 15 25 30 25 5 5 10 ...
## $ SC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TS : num 1 1 1 0 0 1 1 0 1 0 ...
## $ MR : int 2 4 2 2 2 2 2 2 4 3 ...
## $ WE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ WK : int 0 0 0 0 1 0 0 0 0 0 ...
## $ NBR: int 1 2 4 2 2 1 4 4 4 2 ...
## $ CO : num 5 3 10 5 5 7 7 3 3 10 ...
## $ BUB: int 0 1 1 1 1 1 1 1 1 1 ...
## $ SBM: num 1 1 1 1 2 2 2 1 3 3 ...
## $ SBS: num 1 1 1 1 1 2 1 1 2 1 ...
## $ BST: num 1 1 2 2 3 3 1 2 1 2 ...
## $ BSC: num 3 3 3 3 3 3 3 2 3 2 ...
## $ BSA: num 3 3 3 3 3 3 3 3 3 3 ...
## $ BRE: num 1 2 3 1 2 2 2 1 3 3 ...
## $ BCO: num 1 2 3 1 2 2 1 3 3 1 ...
## $ BIN: num 3 3 3 2 2 2 3 2 3 3 ...
## $ BHE: num 1 1 2 1 2 2 1 3 2 1 ...
## $ BEN: num 3 3 2 3 3 3 3 3 3 3 ...
## $ BRC: num 3 3 2 3 3 3 3 3 3 3 ...
## $ BWE: num 2 3 3 3 3 3 3 3 3 3 ...
## $ SP : int 2 1 1 2 1 1 1 1 1 1 ...
## $ BI : int 0 1 1 0 1 1 1 1 1 1 ...
## $ WI : num 0 2 1 0 2 3 2 1 1 1 ...
## $ MSI: int 0 0 0 0 1 0 0 0 1 1 ...
## $ GD : int 0 1 1 0 0 0 1 1 1 1 ...
## $ AG : num 1 1 1 2 2 1 2 1 2 1 ...
## $ OC : num 2 1 1 2 4 4 2 1 4 4 ...
## $ IN : int 3 4 3 3 2 3 3 2 2 1 ...
## $ NC : num 1 0 0 1 0 1 0 0 1 0 ...
## $ MC : int 1 1 1 1 1 1 1 0 1 1 ...
## $ CC : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BO : int 1 0 1 0 0 1 0 1 0 0 ...
## $ MO : int 1 1 1 1 1 1 1 0 1 1 ...
## $ RO : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NB : num 1 0 1 0 0 1 0 1 2 0 ...
## $ NM : num 2 2 2 2 2 2 2 2 2 1 ...
## $ NR : num 0 0 0 0 0 0 0 0 0 0 ...
# Reformat variables in factor variables - Đổi định dạng biến
attach(MotorS)
MotorS = within(MotorS, {
CA = factor(CA, labels = c("No", "Yes"))
WE = factor(WE, labels = c("No", "Yes"))
WK = factor(WK, labels = c("No", "Yes"))
TP = factor(TP, labels = c("Work/Study", "Picking Children", "Entertainment/Food", "Others"))
FR = factor(FR, labels = c("Once", "twice", ">=3 times"))
DT = factor(DT, labels = c("Normal", "Rush hour"))
TS = factor(TS, labels = c("No", "Yes"))
SC = factor(SC, labels = c("No", "Yes"))
LS = factor(LS, labels = c("No", "Yes"))
BC = factor(BC,labels = c("No", "Yes"))
SP = factor(SP, labels = c("No", "Yes", "Don't know"))
BI = factor(BI, labels = c("No", "Yes"))
WI = factor(WI, labels = c("No", "At Bus Stops", "Internet/Facebook", "From Friends", "Others"))
MSI = factor(MSI, labels = c("No", "Yes"))
BST = factor(BST, labels = c("Don't agree", "Normal", "Agree"))
BSC = factor(BSC, labels = c("Don't agree", "Normal", "Agree"))
BSA = factor(BSA, labels = c("Don't agree", "Normal", "Agree"))
BRE = factor(BRE, labels = c("Don't agree", "Normal", "Agree"))
BCO = factor(BCO, labels = c("Don't agree", "Normal", "Agree"))
BIN = factor(BIN, labels = c("Don't agree", "Normal", "Agree"))
BHE = factor(BHE, labels = c("Don't agree", "Normal", "Agree"))
BEN = factor(BEN, labels = c("Don't agree", "Normal", "Agree"))
BRC = factor(BRC, labels = c("Don't agree", "Normal", "Agree"))
BWE = factor(BWE, labels = c("Don't agree", "Normal", "Agree"))
SBM = factor(SBM, labels = c("Don't agree", "Normal", "Agree"))
SBS = factor(SBS, labels = c("Don't agree", "Normal", "Agree"))
BWE = factor(BWE, labels = c("Don't agree", "Normal", "Agree"))
MR = factor(MR, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "Reliability", "others"))
NBR = factor(NBR, labels = c("No Route", "Inconvenience", "Unsafety", "Long waiting time", "Unreliability", "others"))
BUB = factor(BUB, labels = c("No", "Yes"))
GD = factor(GD, labels = c("Female", "Male"))
AG = factor(AG, labels = c("<=24", "25-60", ">60"))
OC = factor(OC, labels = c("Pupils/Students", "Officers/Workers", "Housewife/Unemployed", "Free labor", "Others"))
IN = factor(IN, labels = c("<8 millions", "(8-15) millions", "(15-25) millions", ">25 millions"))
NC = factor(NC, labels = c("No", "Yes"))
MC = factor(MC, labels = c("No", "Yes"))
CC = factor(CC, labels = c("No", "Yes"))
BO = factor(BO, labels = c("No", "Yes"))
MO = factor(MO, labels = c("No", "Yes"))
RO = factor(RO, labels = c("No", "Yes"))
NB = factor(NB, labels = c("None", "1", ">=2"))
NM = factor(NM, labels = c("None", "1", ">=2"))
NR = factor(NR, labels = c("No", "Yes"))
DI = as.numeric(DI)
CO = as.numeric(CO)
TI = as.numeric(TI)
} )
head(MotorS)
## CA BC TP FR DT DI TI SC LS TS MR WE WK
## 2 No Yes Work/Study >=3 times Rush hour 8 15 Yes Yes Yes Comfortable No No
## 3 No Yes Work/Study >=3 times Rush hour 5 15 Yes Yes Yes Accessibility No No
## 4 No Yes Work/Study >=3 times Rush hour 5 10 Yes Yes Yes Comfortable No No
## 5 No Yes Work/Study >=3 times Rush hour 8 15 Yes Yes No Comfortable No No
## 8 No Yes Work/Study >=3 times Rush hour 10 25 Yes Yes No Comfortable No Yes
## 9 No Yes Work/Study >=3 times Normal 12 30 Yes Yes Yes Comfortable No No
## NBR CO BUB SBM SBS BST BSC BSA
## 2 No Route 5 No Don't agree Don't agree Don't agree Agree Agree
## 3 Inconvenience 3 Yes Don't agree Don't agree Don't agree Agree Agree
## 4 Long waiting time 10 Yes Don't agree Don't agree Normal Agree Agree
## 5 Inconvenience 5 Yes Don't agree Don't agree Normal Agree Agree
## 8 Inconvenience 5 Yes Normal Don't agree Agree Agree Agree
## 9 No Route 7 Yes Normal Normal Agree Agree Agree
## BRE BCO BIN BHE BEN BRC BWE SP
## 2 Don't agree Don't agree Agree Don't agree Agree Agree Normal Don't know
## 3 Normal Normal Agree Don't agree Agree Agree Agree Yes
## 4 Agree Agree Agree Normal Normal Normal Agree Yes
## 5 Don't agree Don't agree Normal Don't agree Agree Agree Agree Don't know
## 8 Normal Normal Normal Normal Agree Agree Agree Yes
## 9 Normal Normal Normal Normal Agree Agree Agree Yes
## BI WI MSI GD AG OC IN NC
## 2 No No No Female <=24 Officers/Workers (15-25) millions Yes
## 3 Yes Internet/Facebook No Male <=24 Pupils/Students >25 millions No
## 4 Yes At Bus Stops No Male <=24 Pupils/Students (15-25) millions No
## 5 No No No Female 25-60 Officers/Workers (15-25) millions Yes
## 8 Yes Internet/Facebook Yes Female 25-60 Free labor (8-15) millions No
## 9 Yes From Friends No Female <=24 Free labor (15-25) millions Yes
## MC CC BO MO RO NB NM NR
## 2 Yes No Yes Yes No 1 >=2 No
## 3 Yes No No Yes No None >=2 No
## 4 Yes No Yes Yes No 1 >=2 No
## 5 Yes No No Yes No None >=2 No
## 8 Yes No No Yes No None >=2 No
## 9 Yes No Yes Yes No 1 >=2 No
str(MotorS)
## 'data.frame': 516 obs. of 45 variables:
## $ CA : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 2 ...
## $ BC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 1 1 ...
## $ TP : Factor w/ 4 levels "Work/Study","Picking Children",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ FR : Factor w/ 3 levels "Once","twice",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ DT : Factor w/ 2 levels "Normal","Rush hour": 2 2 2 2 2 1 2 2 2 2 ...
## $ DI : num 8 5 5 8 10 12 10 3 2 4 ...
## $ TI : num 15 15 10 15 25 30 25 5 5 10 ...
## $ SC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ LS : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ TS : Factor w/ 2 levels "No","Yes": 2 2 2 1 1 2 2 1 2 1 ...
## $ MR : Factor w/ 6 levels "Safety","Comfortable",..: 2 4 2 2 2 2 2 2 4 3 ...
## $ WE : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ WK : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
## $ NBR: Factor w/ 6 levels "No Route","Inconvenience",..: 1 2 4 2 2 1 4 4 4 2 ...
## $ CO : num 5 3 10 5 5 7 7 3 3 10 ...
## $ BUB: Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ SBM: Factor w/ 3 levels "Don't agree",..: 1 1 1 1 2 2 2 1 3 3 ...
## $ SBS: Factor w/ 3 levels "Don't agree",..: 1 1 1 1 1 2 1 1 2 1 ...
## $ BST: Factor w/ 3 levels "Don't agree",..: 1 1 2 2 3 3 1 2 1 2 ...
## $ BSC: Factor w/ 3 levels "Don't agree",..: 3 3 3 3 3 3 3 2 3 2 ...
## $ BSA: Factor w/ 3 levels "Don't agree",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ BRE: Factor w/ 3 levels "Don't agree",..: 1 2 3 1 2 2 2 1 3 3 ...
## $ BCO: Factor w/ 3 levels "Don't agree",..: 1 2 3 1 2 2 1 3 3 1 ...
## $ BIN: Factor w/ 3 levels "Don't agree",..: 3 3 3 2 2 2 3 2 3 3 ...
## $ BHE: Factor w/ 3 levels "Don't agree",..: 1 1 2 1 2 2 1 3 2 1 ...
## $ BEN: Factor w/ 3 levels "Don't agree",..: 3 3 2 3 3 3 3 3 3 3 ...
## $ BRC: Factor w/ 3 levels "Don't agree",..: 3 3 2 3 3 3 3 3 3 3 ...
## $ BWE: Factor w/ 3 levels "Don't agree",..: 2 3 3 3 3 3 3 3 3 3 ...
## $ SP : Factor w/ 3 levels "No","Yes","Don't know": 3 2 2 3 2 2 2 2 2 2 ...
## $ BI : Factor w/ 2 levels "No","Yes": 1 2 2 1 2 2 2 2 2 2 ...
## $ WI : Factor w/ 5 levels "No","At Bus Stops",..: 1 3 2 1 3 4 3 2 2 2 ...
## $ MSI: Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 2 2 ...
## $ GD : Factor w/ 2 levels "Female","Male": 1 2 2 1 1 1 2 2 2 2 ...
## $ AG : Factor w/ 3 levels "<=24","25-60",..: 1 1 1 2 2 1 2 1 2 1 ...
## $ OC : Factor w/ 5 levels "Pupils/Students",..: 2 1 1 2 4 4 2 1 4 4 ...
## $ IN : Factor w/ 4 levels "<8 millions",..: 3 4 3 3 2 3 3 2 2 1 ...
## $ NC : Factor w/ 2 levels "No","Yes": 2 1 1 2 1 2 1 1 2 1 ...
## $ MC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 2 2 ...
## $ CC : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ BO : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 2 1 2 1 1 ...
## $ MO : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 2 2 ...
## $ RO : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ NB : Factor w/ 3 levels "None","1",">=2": 2 1 2 1 1 2 1 2 3 1 ...
## $ NM : Factor w/ 3 levels "None","1",">=2": 3 3 3 3 3 3 3 3 3 2 ...
## $ NR : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
2. Descriptive analyse
# Descriptive analyse with package "table 1" - sort by MSI
## install.packages("table1", dependencies = T)
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1 (~ GD + AG + OC + IN + NC + MC + CC + BO + MO + RO + NB + NM + NR + TP + FR + DT + TS + SP + BI + WI + LS + SC + BC + WE + WK + CA + BST + BSC + BSA + BRE + BCO + BIN + BHE + BEN + BRC + BWE + MR + NBR + BUB + SBM + SBS + CO + DI + TI| MSI , data = MotorS)
| No (N=391) |
Yes (N=125) |
Overall (N=516) |
|
|---|---|---|---|
| GD | |||
| Female | 172 (44.0%) | 49 (39.2%) | 221 (42.8%) |
| Male | 219 (56.0%) | 76 (60.8%) | 295 (57.2%) |
| AG | |||
| <=24 | 155 (39.6%) | 59 (47.2%) | 214 (41.5%) |
| 25-60 | 231 (59.1%) | 62 (49.6%) | 293 (56.8%) |
| >60 | 5 (1.3%) | 4 (3.2%) | 9 (1.7%) |
| OC | |||
| Pupils/Students | 117 (29.9%) | 47 (37.6%) | 164 (31.8%) |
| Officers/Workers | 124 (31.7%) | 29 (23.2%) | 153 (29.7%) |
| Housewife/Unemployed | 24 (6.1%) | 6 (4.8%) | 30 (5.8%) |
| Free labor | 108 (27.6%) | 31 (24.8%) | 139 (26.9%) |
| Others | 18 (4.6%) | 12 (9.6%) | 30 (5.8%) |
| IN | |||
| <8 millions | 90 (23.0%) | 23 (18.4%) | 113 (21.9%) |
| (8-15) millions | 173 (44.2%) | 63 (50.4%) | 236 (45.7%) |
| (15-25) millions | 99 (25.3%) | 29 (23.2%) | 128 (24.8%) |
| >25 millions | 29 (7.4%) | 10 (8.0%) | 39 (7.6%) |
| NC | |||
| No | 171 (43.7%) | 52 (41.6%) | 223 (43.2%) |
| Yes | 220 (56.3%) | 73 (58.4%) | 293 (56.8%) |
| MC | |||
| No | 17 (4.3%) | 8 (6.4%) | 25 (4.8%) |
| Yes | 374 (95.7%) | 117 (93.6%) | 491 (95.2%) |
| CC | |||
| No | 335 (85.7%) | 110 (88.0%) | 445 (86.2%) |
| Yes | 56 (14.3%) | 15 (12.0%) | 71 (13.8%) |
| BO | |||
| No | 245 (62.7%) | 90 (72.0%) | 335 (64.9%) |
| Yes | 146 (37.3%) | 35 (28.0%) | 181 (35.1%) |
| MO | |||
| No | 32 (8.2%) | 10 (8.0%) | 42 (8.1%) |
| Yes | 359 (91.8%) | 115 (92.0%) | 474 (91.9%) |
| RO | |||
| No | 362 (92.6%) | 113 (90.4%) | 475 (92.1%) |
| Yes | 29 (7.4%) | 12 (9.6%) | 41 (7.9%) |
| NB | |||
| None | 187 (47.8%) | 66 (52.8%) | 253 (49.0%) |
| 1 | 151 (38.6%) | 46 (36.8%) | 197 (38.2%) |
| >=2 | 53 (13.6%) | 13 (10.4%) | 66 (12.8%) |
| NM | |||
| None | 5 (1.3%) | 0 (0%) | 5 (1.0%) |
| 1 | 54 (13.8%) | 14 (11.2%) | 68 (13.2%) |
| >=2 | 332 (84.9%) | 111 (88.8%) | 443 (85.9%) |
| NR | |||
| No | 332 (84.9%) | 103 (82.4%) | 435 (84.3%) |
| Yes | 59 (15.1%) | 22 (17.6%) | 81 (15.7%) |
| TP | |||
| Work/Study | 255 (65.2%) | 95 (76.0%) | 350 (67.8%) |
| Picking Children | 57 (14.6%) | 12 (9.6%) | 69 (13.4%) |
| Entertainment/Food | 72 (18.4%) | 16 (12.8%) | 88 (17.1%) |
| Others | 7 (1.8%) | 2 (1.6%) | 9 (1.7%) |
| FR | |||
| Once | 21 (5.4%) | 2 (1.6%) | 23 (4.5%) |
| twice | 15 (3.8%) | 2 (1.6%) | 17 (3.3%) |
| >=3 times | 355 (90.8%) | 121 (96.8%) | 476 (92.2%) |
| DT | |||
| Normal | 85 (21.7%) | 20 (16.0%) | 105 (20.3%) |
| Rush hour | 306 (78.3%) | 105 (84.0%) | 411 (79.7%) |
| TS | |||
| No | 178 (45.5%) | 69 (55.2%) | 247 (47.9%) |
| Yes | 213 (54.5%) | 56 (44.8%) | 269 (52.1%) |
| SP | |||
| No | 40 (10.2%) | 8 (6.4%) | 48 (9.3%) |
| Yes | 300 (76.7%) | 113 (90.4%) | 413 (80.0%) |
| Don't know | 51 (13.0%) | 4 (3.2%) | 55 (10.7%) |
| BI | |||
| No | 194 (49.6%) | 28 (22.4%) | 222 (43.0%) |
| Yes | 197 (50.4%) | 97 (77.6%) | 294 (57.0%) |
| WI | |||
| No | 194 (49.6%) | 28 (22.4%) | 222 (43.0%) |
| At Bus Stops | 105 (26.9%) | 39 (31.2%) | 144 (27.9%) |
| Internet/Facebook | 43 (11.0%) | 30 (24.0%) | 73 (14.1%) |
| From Friends | 24 (6.1%) | 18 (14.4%) | 42 (8.1%) |
| Others | 25 (6.4%) | 10 (8.0%) | 35 (6.8%) |
| LS | |||
| No | 76 (19.4%) | 34 (27.2%) | 110 (21.3%) |
| Yes | 315 (80.6%) | 91 (72.8%) | 406 (78.7%) |
| SC | |||
| No | 66 (16.9%) | 13 (10.4%) | 79 (15.3%) |
| Yes | 325 (83.1%) | 112 (89.6%) | 437 (84.7%) |
| BC | |||
| No | 176 (45.0%) | 76 (60.8%) | 252 (48.8%) |
| Yes | 215 (55.0%) | 49 (39.2%) | 264 (51.2%) |
| WE | |||
| No | 389 (99.5%) | 125 (100%) | 514 (99.6%) |
| Yes | 2 (0.5%) | 0 (0%) | 2 (0.4%) |
| WK | |||
| No | 302 (77.2%) | 99 (79.2%) | 401 (77.7%) |
| Yes | 89 (22.8%) | 26 (20.8%) | 115 (22.3%) |
| CA | |||
| No | 209 (53.5%) | 74 (59.2%) | 283 (54.8%) |
| Yes | 182 (46.5%) | 51 (40.8%) | 233 (45.2%) |
| BST | |||
| Don't agree | 182 (46.5%) | 66 (52.8%) | 248 (48.1%) |
| Normal | 121 (30.9%) | 23 (18.4%) | 144 (27.9%) |
| Agree | 88 (22.5%) | 36 (28.8%) | 124 (24.0%) |
| BSC | |||
| Don't agree | 61 (15.6%) | 20 (16.0%) | 81 (15.7%) |
| Normal | 108 (27.6%) | 21 (16.8%) | 129 (25.0%) |
| Agree | 222 (56.8%) | 84 (67.2%) | 306 (59.3%) |
| BSA | |||
| Don't agree | 40 (10.2%) | 12 (9.6%) | 52 (10.1%) |
| Normal | 113 (28.9%) | 20 (16.0%) | 133 (25.8%) |
| Agree | 238 (60.9%) | 93 (74.4%) | 331 (64.1%) |
| BRE | |||
| Don't agree | 81 (20.7%) | 20 (16.0%) | 101 (19.6%) |
| Normal | 117 (29.9%) | 33 (26.4%) | 150 (29.1%) |
| Agree | 193 (49.4%) | 72 (57.6%) | 265 (51.4%) |
| BCO | |||
| Don't agree | 119 (30.4%) | 39 (31.2%) | 158 (30.6%) |
| Normal | 117 (29.9%) | 32 (25.6%) | 149 (28.9%) |
| Agree | 155 (39.6%) | 54 (43.2%) | 209 (40.5%) |
| BIN | |||
| Don't agree | 104 (26.6%) | 37 (29.6%) | 141 (27.3%) |
| Normal | 167 (42.7%) | 46 (36.8%) | 213 (41.3%) |
| Agree | 120 (30.7%) | 42 (33.6%) | 162 (31.4%) |
| BHE | |||
| Don't agree | 182 (46.5%) | 45 (36.0%) | 227 (44.0%) |
| Normal | 123 (31.5%) | 37 (29.6%) | 160 (31.0%) |
| Agree | 86 (22.0%) | 43 (34.4%) | 129 (25.0%) |
| BEN | |||
| Don't agree | 37 (9.5%) | 11 (8.8%) | 48 (9.3%) |
| Normal | 87 (22.3%) | 21 (16.8%) | 108 (20.9%) |
| Agree | 267 (68.3%) | 93 (74.4%) | 360 (69.8%) |
| BRC | |||
| Don't agree | 43 (11.0%) | 14 (11.2%) | 57 (11.0%) |
| Normal | 70 (17.9%) | 17 (13.6%) | 87 (16.9%) |
| Agree | 278 (71.1%) | 94 (75.2%) | 372 (72.1%) |
| BWE | |||
| Don't agree | 32 (8.2%) | 11 (8.8%) | 43 (8.3%) |
| Normal | 37 (9.5%) | 13 (10.4%) | 50 (9.7%) |
| Agree | 322 (82.4%) | 101 (80.8%) | 423 (82.0%) |
| MR | |||
| Safety | 21 (5.4%) | 6 (4.8%) | 27 (5.2%) |
| Comfortable | 200 (51.2%) | 54 (43.2%) | 254 (49.2%) |
| Low price | 19 (4.9%) | 5 (4.0%) | 24 (4.7%) |
| Accessibility | 109 (27.9%) | 52 (41.6%) | 161 (31.2%) |
| Reliability | 35 (9.0%) | 7 (5.6%) | 42 (8.1%) |
| others | 7 (1.8%) | 1 (0.8%) | 8 (1.6%) |
| NBR | |||
| No Route | 86 (22.0%) | 36 (28.8%) | 122 (23.6%) |
| Inconvenience | 123 (31.5%) | 36 (28.8%) | 159 (30.8%) |
| Unsafety | 6 (1.5%) | 1 (0.8%) | 7 (1.4%) |
| Long waiting time | 120 (30.7%) | 38 (30.4%) | 158 (30.6%) |
| Unreliability | 39 (10.0%) | 8 (6.4%) | 47 (9.1%) |
| others | 17 (4.3%) | 6 (4.8%) | 23 (4.5%) |
| BUB | |||
| No | 168 (43.0%) | 30 (24.0%) | 198 (38.4%) |
| Yes | 223 (57.0%) | 95 (76.0%) | 318 (61.6%) |
| SBM | |||
| Don't agree | 218 (55.8%) | 45 (36.0%) | 263 (51.0%) |
| Normal | 136 (34.8%) | 33 (26.4%) | 169 (32.8%) |
| Agree | 37 (9.5%) | 47 (37.6%) | 84 (16.3%) |
| SBS | |||
| Don't agree | 235 (60.1%) | 64 (51.2%) | 299 (57.9%) |
| Normal | 138 (35.3%) | 35 (28.0%) | 173 (33.5%) |
| Agree | 18 (4.6%) | 26 (20.8%) | 44 (8.5%) |
| CO | |||
| Mean (SD) | 9.42 (7.27) | 10.1 (6.33) | 9.60 (7.06) |
| Median [Min, Max] | 8.00 [0, 50.0] | 10.0 [0.800, 30.0] | 10.0 [0, 50.0] |
| DI | |||
| Mean (SD) | 6.19 (4.79) | 6.58 (5.45) | 6.29 (4.95) |
| Median [Min, Max] | 5.00 [0.200, 30.0] | 5.00 [0.200, 30.0] | 5.00 [0.200, 30.0] |
| TI | |||
| Mean (SD) | 16.0 (13.1) | 15.3 (11.8) | 15.8 (12.8) |
| Median [Min, Max] | 15.0 [0, 180] | 10.0 [2.00, 60.0] | 15.0 [0, 180] |
# Descriptive analyse with package "compareGroups" - sort by MSI (hon table1 la cung cap them tri so p. Doi voi bien phan loai, tri so p cung duoc tinh theo Chi-Square)
library(compareGroups)
t = compareGroups(MSI ~ GD + AG + OC + IN + NC + MC + CC + BO + MO + RO + NB + NM + NR + TP + FR + DT + TS + SP + BI + WI + LS + SC + BC + WE + WK + CA + BST + BSC + BSA + BRE + BCO + BIN + BHE + BEN + BRC + BWE + MR + NBR + BUB + SBM + SBS + CO + DI + TI, data = MotorS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t)
##
## --------Summary descriptives table by 'MSI'---------
##
## __________________________________________________________
## No Yes p.overall
## N=391 N=125
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## GD: 0.402
## Female 172 (44.0%) 49 (39.2%)
## Male 219 (56.0%) 76 (60.8%)
## AG: 0.075
## <=24 155 (39.6%) 59 (47.2%)
## 25-60 231 (59.1%) 62 (49.6%)
## >60 5 (1.28%) 4 (3.20%)
## OC: 0.069
## Pupils/Students 117 (29.9%) 47 (37.6%)
## Officers/Workers 124 (31.7%) 29 (23.2%)
## Housewife/Unemployed 24 (6.14%) 6 (4.80%)
## Free labor 108 (27.6%) 31 (24.8%)
## Others 18 (4.60%) 12 (9.60%)
## IN: 0.589
## <8 millions 90 (23.0%) 23 (18.4%)
## (8-15) millions 173 (44.2%) 63 (50.4%)
## (15-25) millions 99 (25.3%) 29 (23.2%)
## >25 millions 29 (7.42%) 10 (8.00%)
## NC: 0.752
## No 171 (43.7%) 52 (41.6%)
## Yes 220 (56.3%) 73 (58.4%)
## MC: 0.490
## No 17 (4.35%) 8 (6.40%)
## Yes 374 (95.7%) 117 (93.6%)
## CC: 0.612
## No 335 (85.7%) 110 (88.0%)
## Yes 56 (14.3%) 15 (12.0%)
## BO: 0.072
## No 245 (62.7%) 90 (72.0%)
## Yes 146 (37.3%) 35 (28.0%)
## MO: 1.000
## No 32 (8.18%) 10 (8.00%)
## Yes 359 (91.8%) 115 (92.0%)
## RO: 0.551
## No 362 (92.6%) 113 (90.4%)
## Yes 29 (7.42%) 12 (9.60%)
## NB: 0.523
## None 187 (47.8%) 66 (52.8%)
## 1 151 (38.6%) 46 (36.8%)
## >=2 53 (13.6%) 13 (10.4%)
## NM: 0.434
## None 5 (1.28%) 0 (0.00%)
## 1 54 (13.8%) 14 (11.2%)
## >=2 332 (84.9%) 111 (88.8%)
## NR: 0.596
## No 332 (84.9%) 103 (82.4%)
## Yes 59 (15.1%) 22 (17.6%)
## TP: 0.158
## Work/Study 255 (65.2%) 95 (76.0%)
## Picking Children 57 (14.6%) 12 (9.60%)
## Entertainment/Food 72 (18.4%) 16 (12.8%)
## Others 7 (1.79%) 2 (1.60%)
## FR: 0.103
## Once 21 (5.37%) 2 (1.60%)
## twice 15 (3.84%) 2 (1.60%)
## >=3 times 355 (90.8%) 121 (96.8%)
## DT: 0.208
## Normal 85 (21.7%) 20 (16.0%)
## Rush hour 306 (78.3%) 105 (84.0%)
## TS: 0.075
## No 178 (45.5%) 69 (55.2%)
## Yes 213 (54.5%) 56 (44.8%)
## SP: 0.002
## No 40 (10.2%) 8 (6.40%)
## Yes 300 (76.7%) 113 (90.4%)
## Don't know 51 (13.0%) 4 (3.20%)
## BI: <0.001
## No 194 (49.6%) 28 (22.4%)
## Yes 197 (50.4%) 97 (77.6%)
## WI: <0.001
## No 194 (49.6%) 28 (22.4%)
## At Bus Stops 105 (26.9%) 39 (31.2%)
## Internet/Facebook 43 (11.0%) 30 (24.0%)
## From Friends 24 (6.14%) 18 (14.4%)
## Others 25 (6.39%) 10 (8.00%)
## LS: 0.086
## No 76 (19.4%) 34 (27.2%)
## Yes 315 (80.6%) 91 (72.8%)
## SC: 0.108
## No 66 (16.9%) 13 (10.4%)
## Yes 325 (83.1%) 112 (89.6%)
## BC: 0.003
## No 176 (45.0%) 76 (60.8%)
## Yes 215 (55.0%) 49 (39.2%)
## WE: 1.000
## No 389 (99.5%) 125 (100%)
## Yes 2 (0.51%) 0 (0.00%)
## WK: 0.737
## No 302 (77.2%) 99 (79.2%)
## Yes 89 (22.8%) 26 (20.8%)
## CA: 0.307
## No 209 (53.5%) 74 (59.2%)
## Yes 182 (46.5%) 51 (40.8%)
## BST: 0.022
## Don't agree 182 (46.5%) 66 (52.8%)
## Normal 121 (30.9%) 23 (18.4%)
## Agree 88 (22.5%) 36 (28.8%)
## BSC: 0.045
## Don't agree 61 (15.6%) 20 (16.0%)
## Normal 108 (27.6%) 21 (16.8%)
## Agree 222 (56.8%) 84 (67.2%)
## BSA: 0.012
## Don't agree 40 (10.2%) 12 (9.60%)
## Normal 113 (28.9%) 20 (16.0%)
## Agree 238 (60.9%) 93 (74.4%)
## BRE: 0.255
## Don't agree 81 (20.7%) 20 (16.0%)
## Normal 117 (29.9%) 33 (26.4%)
## Agree 193 (49.4%) 72 (57.6%)
## BCO: 0.629
## Don't agree 119 (30.4%) 39 (31.2%)
## Normal 117 (29.9%) 32 (25.6%)
## Agree 155 (39.6%) 54 (43.2%)
## BIN: 0.504
## Don't agree 104 (26.6%) 37 (29.6%)
## Normal 167 (42.7%) 46 (36.8%)
## Agree 120 (30.7%) 42 (33.6%)
## BHE: 0.016
## Don't agree 182 (46.5%) 45 (36.0%)
## Normal 123 (31.5%) 37 (29.6%)
## Agree 86 (22.0%) 43 (34.4%)
## BEN: 0.387
## Don't agree 37 (9.46%) 11 (8.80%)
## Normal 87 (22.3%) 21 (16.8%)
## Agree 267 (68.3%) 93 (74.4%)
## BRC: 0.531
## Don't agree 43 (11.0%) 14 (11.2%)
## Normal 70 (17.9%) 17 (13.6%)
## Agree 278 (71.1%) 94 (75.2%)
## BWE: 0.925
## Don't agree 32 (8.18%) 11 (8.80%)
## Normal 37 (9.46%) 13 (10.4%)
## Agree 322 (82.4%) 101 (80.8%)
## MR: 0.129
## Safety 21 (5.37%) 6 (4.80%)
## Comfortable 200 (51.2%) 54 (43.2%)
## Low price 19 (4.86%) 5 (4.00%)
## Accessibility 109 (27.9%) 52 (41.6%)
## Reliability 35 (8.95%) 7 (5.60%)
## others 7 (1.79%) 1 (0.80%)
## NBR: 0.611
## No Route 86 (22.0%) 36 (28.8%)
## Inconvenience 123 (31.5%) 36 (28.8%)
## Unsafety 6 (1.53%) 1 (0.80%)
## Long waiting time 120 (30.7%) 38 (30.4%)
## Unreliability 39 (9.97%) 8 (6.40%)
## others 17 (4.35%) 6 (4.80%)
## BUB: <0.001
## No 168 (43.0%) 30 (24.0%)
## Yes 223 (57.0%) 95 (76.0%)
## SBM: <0.001
## Don't agree 218 (55.8%) 45 (36.0%)
## Normal 136 (34.8%) 33 (26.4%)
## Agree 37 (9.46%) 47 (37.6%)
## SBS: <0.001
## Don't agree 235 (60.1%) 64 (51.2%)
## Normal 138 (35.3%) 35 (28.0%)
## Agree 18 (4.60%) 26 (20.8%)
## CO 9.42 (7.27) 10.1 (6.33) 0.283
## DI 6.19 (4.79) 6.58 (5.45) 0.475
## TI 16.0 (13.1) 15.3 (11.8) 0.586
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05 (32 variables): GD, IN, NC, MC, CC, BO, RO, NB, NR, DT, SC, WE, WK, BRE, BCO, BIN, BEN, BRC, BWE, NBR, CO, DI
t1 = compareGroups(MSI ~ NM + SP + BI + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, data = MotorS)
createTable(t1)
##
## --------Summary descriptives table by 'MSI'---------
##
## _______________________________________________________
## No Yes p.overall
## N=391 N=125
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## NM: 0.434
## None 5 (1.28%) 0 (0.00%)
## 1 54 (13.8%) 14 (11.2%)
## >=2 332 (84.9%) 111 (88.8%)
## SP: 0.002
## No 40 (10.2%) 8 (6.40%)
## Yes 300 (76.7%) 113 (90.4%)
## Don't know 51 (13.0%) 4 (3.20%)
## BI: <0.001
## No 194 (49.6%) 28 (22.4%)
## Yes 197 (50.4%) 97 (77.6%)
## WI: <0.001
## No 194 (49.6%) 28 (22.4%)
## At Bus Stops 105 (26.9%) 39 (31.2%)
## Internet/Facebook 43 (11.0%) 30 (24.0%)
## From Friends 24 (6.14%) 18 (14.4%)
## Others 25 (6.39%) 10 (8.00%)
## BC: 0.003
## No 176 (45.0%) 76 (60.8%)
## Yes 215 (55.0%) 49 (39.2%)
## BST: 0.022
## Don't agree 182 (46.5%) 66 (52.8%)
## Normal 121 (30.9%) 23 (18.4%)
## Agree 88 (22.5%) 36 (28.8%)
## BSC: 0.045
## Don't agree 61 (15.6%) 20 (16.0%)
## Normal 108 (27.6%) 21 (16.8%)
## Agree 222 (56.8%) 84 (67.2%)
## BSA: 0.012
## Don't agree 40 (10.2%) 12 (9.60%)
## Normal 113 (28.9%) 20 (16.0%)
## Agree 238 (60.9%) 93 (74.4%)
## BHE: 0.016
## Don't agree 182 (46.5%) 45 (36.0%)
## Normal 123 (31.5%) 37 (29.6%)
## Agree 86 (22.0%) 43 (34.4%)
## BUB: <0.001
## No 168 (43.0%) 30 (24.0%)
## Yes 223 (57.0%) 95 (76.0%)
## SBM: <0.001
## Don't agree 218 (55.8%) 45 (36.0%)
## Normal 136 (34.8%) 33 (26.4%)
## Agree 37 (9.46%) 47 (37.6%)
## SBS: <0.001
## Don't agree 235 (60.1%) 64 (51.2%)
## Normal 138 (35.3%) 35 (28.0%)
## Agree 18 (4.60%) 26 (20.8%)
## TI 16.0 (13.1) 15.3 (11.8) 0.586
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05 (22/32 variables): GD, IN, NC, MC, CC, BO, RO, NB, NR, DT, SC, WE, WK, BRE, BCO, BIN, BEN, BRC, BWE, NBR, CO, DI; remains 10/32 variables p-value to test (AG, OC, MO, TP, FR, TS, LS, CA, MR, TI); 1 Cor>0.7 (BI) --> 21 variables
t.c = compareGroups(MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI, data = MotorS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t.c)
##
## --------Summary descriptives table by 'MSI'---------
##
## __________________________________________________________
## No Yes p.overall
## N=391 N=125
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## AG: 0.075
## <=24 155 (39.6%) 59 (47.2%)
## 25-60 231 (59.1%) 62 (49.6%)
## >60 5 (1.28%) 4 (3.20%)
## OC: 0.069
## Pupils/Students 117 (29.9%) 47 (37.6%)
## Officers/Workers 124 (31.7%) 29 (23.2%)
## Housewife/Unemployed 24 (6.14%) 6 (4.80%)
## Free labor 108 (27.6%) 31 (24.8%)
## Others 18 (4.60%) 12 (9.60%)
## MO: 1.000
## No 32 (8.18%) 10 (8.00%)
## Yes 359 (91.8%) 115 (92.0%)
## NM: 0.434
## None 5 (1.28%) 0 (0.00%)
## 1 54 (13.8%) 14 (11.2%)
## >=2 332 (84.9%) 111 (88.8%)
## TP: 0.158
## Work/Study 255 (65.2%) 95 (76.0%)
## Picking Children 57 (14.6%) 12 (9.60%)
## Entertainment/Food 72 (18.4%) 16 (12.8%)
## Others 7 (1.79%) 2 (1.60%)
## FR: 0.103
## Once 21 (5.37%) 2 (1.60%)
## twice 15 (3.84%) 2 (1.60%)
## >=3 times 355 (90.8%) 121 (96.8%)
## TS: 0.075
## No 178 (45.5%) 69 (55.2%)
## Yes 213 (54.5%) 56 (44.8%)
## SP: 0.002
## No 40 (10.2%) 8 (6.40%)
## Yes 300 (76.7%) 113 (90.4%)
## Don't know 51 (13.0%) 4 (3.20%)
## WI: <0.001
## No 194 (49.6%) 28 (22.4%)
## At Bus Stops 105 (26.9%) 39 (31.2%)
## Internet/Facebook 43 (11.0%) 30 (24.0%)
## From Friends 24 (6.14%) 18 (14.4%)
## Others 25 (6.39%) 10 (8.00%)
## LS: 0.086
## No 76 (19.4%) 34 (27.2%)
## Yes 315 (80.6%) 91 (72.8%)
## BC: 0.003
## No 176 (45.0%) 76 (60.8%)
## Yes 215 (55.0%) 49 (39.2%)
## CA: 0.307
## No 209 (53.5%) 74 (59.2%)
## Yes 182 (46.5%) 51 (40.8%)
## BST: 0.022
## Don't agree 182 (46.5%) 66 (52.8%)
## Normal 121 (30.9%) 23 (18.4%)
## Agree 88 (22.5%) 36 (28.8%)
## BSC: 0.045
## Don't agree 61 (15.6%) 20 (16.0%)
## Normal 108 (27.6%) 21 (16.8%)
## Agree 222 (56.8%) 84 (67.2%)
## BSA: 0.012
## Don't agree 40 (10.2%) 12 (9.60%)
## Normal 113 (28.9%) 20 (16.0%)
## Agree 238 (60.9%) 93 (74.4%)
## BHE: 0.016
## Don't agree 182 (46.5%) 45 (36.0%)
## Normal 123 (31.5%) 37 (29.6%)
## Agree 86 (22.0%) 43 (34.4%)
## MR: 0.129
## Safety 21 (5.37%) 6 (4.80%)
## Comfortable 200 (51.2%) 54 (43.2%)
## Low price 19 (4.86%) 5 (4.00%)
## Accessibility 109 (27.9%) 52 (41.6%)
## Reliability 35 (8.95%) 7 (5.60%)
## others 7 (1.79%) 1 (0.80%)
## BUB: <0.001
## No 168 (43.0%) 30 (24.0%)
## Yes 223 (57.0%) 95 (76.0%)
## SBM: <0.001
## Don't agree 218 (55.8%) 45 (36.0%)
## Normal 136 (34.8%) 33 (26.4%)
## Agree 37 (9.46%) 47 (37.6%)
## SBS: <0.001
## Don't agree 235 (60.1%) 64 (51.2%)
## Normal 138 (35.3%) 35 (28.0%)
## Agree 18 (4.60%) 26 (20.8%)
## TI 16.0 (13.1) 15.3 (11.8) 0.586
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
3. Descriptive analyse by graphs
library(magrittr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract() masks magrittr::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(car)
## Warning: package 'car' was built under R version 4.0.4
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
MotorS %>%
group_by(AG, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = AG)) +
geom_col(position = "fill") +
xlab("Mode shift intention") +
ylab("Age") +
ggtitle("Probability of Bus Shift ~ Age") # Đối tuổi học sinh và người già có khả năng chuyển đổi sang xe buýt cao hơn
MotorS %>%
group_by(OC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = OC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Occupation") +
ggtitle("Proportion of Bus Shift ~ Occupation") # Đối tượng học sinh và nghỉ hưu có khả năng chuyển đổi sang xe buýt cao hơn
MotorS %>%
group_by(MO, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = MO)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Motor Owning") +
ggtitle("Proportion of Bus Shift ~ Motor Owning") # Không có sự khác biệt giữa 2 nhóm
MotorS %>%
group_by(NM, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = NM)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Number of Motor") +
ggtitle("Proportion of Bus Shift ~ Number of Motor")
MotorS %>%
group_by(TP, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = TP)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Travel Purpose") +
ggtitle("Proportion of Bus Shift ~ Travel Purpose") # Mục đích công việc có khả năng chuyển đổi nhiều hơn
MotorS %>%
group_by(FR, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = FR)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Travel Frequency") +
ggtitle("Proportion of Bus Shift ~ Travel Frequency") # Có vẻ không liên quan đến khả năng chuyển đổi
MotorS %>%
group_by(TS, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = TS)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Temporary Stops") +
ggtitle("Proportion of Bus Shift ~ Temporary Stops") # Có diểm dừng tạm thời ít khả năng chuyển đổi hơn
MotorS %>%
group_by(SP, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = SP)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Bus Stop Present") +
ggtitle("Proportion of Bus Shift ~ Bus Stop Present") # Gần trạm xe buýt có khẳ năng chuyển đổi cao hơn
MotorS %>%
group_by(BI, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BI)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Bus Information") +
ggtitle("Proportion of Bus Shift ~ Bus Information") # Biết thông tin về hệ thống buýt có khả năng chuyển đổi cao hơn
MotorS %>%
group_by(WI, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = WI)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Way to have bus information") +
ggtitle("Proportion of Bus Shift ~ Way to have bus information") # Không biết thông tin về buýt ít có khả năng chuyển dổi hơn
MotorS %>%
group_by(LS, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = LS)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Lane Separate") +
ggtitle("Proportion of Bus Shift ~ Lane Separate") # Yếu tố đường có phân làn dường như là tạo đk thuận lợi cho xe máy nên ít có khả năng chuyển đổi hơn
MotorS %>%
group_by(BC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Bus Condition") +
ggtitle("Proportion of Bus Shift ~ Bus Condition") # Điều kiện trạm dừng (có mái che) tỷ lệ nghich với khả năng chuyển đổi (Mâu thuẩn ??? Kiểm tra)
MotorS %>%
group_by(CA, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = CA)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Center Area") +
ggtitle("Proportion of Bus Shift ~ Center Area") # Chuyến đi ở khu vực trung tâm ít có khả năng chuyển đổi hơn
MotorS %>%
group_by(BST, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BST)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Perception of bus time saving") +
ggtitle("Proportion of Bus Shift ~ Perception of bus time saving") # Cảm nhận xe buýt tiết kiệm thời gian đi lại có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi
MotorS %>%
group_by(BSC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BSC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Perception of cost saving") +
ggtitle("Proportion of Bus Shift ~ Perception of cost saving") # Cảm nhận xe buýt tiết kiệm chi phí đi lại có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi
MotorS %>%
group_by(BSA, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BSA)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Perception of Safe") +
ggtitle("Proportion of Bus Shift ~ Perception of Safe") # Cảm nhận xe buýt an toàn có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi
MotorS %>%
group_by(BHE, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BHE)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Perception of Healthy") +
ggtitle("Proportion of Bus Shift ~ Perception of Healthy") # Cảm nhận xe buýt có lợi cho sức khỏe có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi
MotorS %>%
group_by(MR, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = MR)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Mode Choice Reason") +
ggtitle("Proportion of Bus Shift ~ Mode Choice Reason")
MotorS %>%
group_by(BUB, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BUB)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Bus use before") +
ggtitle("Proportion of Bus Shift ~ Bus use before") # Trải nghiệm xe buýt có ảnh hưởng tích cực đến khả năng chuyển đổi
MotorS %>%
group_by(SBM, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = SBM)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Like to shift to bus for main trips") +
ggtitle("Proportion of Bus Shift ~ Like to shift to bus for main trips")
MotorS %>%
group_by(SBS, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = SBS)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Like to shift to bus for sub trips") +
ggtitle("Proportion of Bus Shift ~ Like to shift to bus for sub trips")
## Travel Mode Choice ~ Countinuous Variables (Distance, Travel Period and Cost)
ggplot(MotorS, aes(x=TI, fill = MSI, color = MSI)) +
geom_histogram (position = "dodge") +
xlab("Travel Period") +
ylab("Count") +
ggtitle("Histogram of Travel Time")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(MotorS, aes(x=DI, fill = MSI, color = MSI)) +
geom_histogram (position = "dodge") +
xlab("Distance") +
ylab("Count") +
ggtitle("Histogram of Distance")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(MotorS, aes(x=CO, fill = MSI, color = MSI)) +
geom_histogram (position = "dodge") +
xlab("Cost") +
ylab("Count") +
ggtitle("Histogram of Cost")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Statistic Analysis by boxplot (continuous variales)
MotorS %>%
group_by(DI, MSI) %>%
count() %>%
ggplot(aes(x = MSI, y = DI, fill = MSI)) +
geom_boxplot() +
xlab("Mode Shift to Bus") +
ylab("Distance") +
ggtitle("Boxplot of Bus Shift ~ Distance") # Không có sự khác biệt giữa 2 nhóm
MotorS %>%
group_by(CO, MSI) %>%
count() %>%
ggplot(aes(x = MSI, y = CO, fill = MSI)) +
geom_boxplot() +
xlab("Mode Shift to Bus") +
ylab("Cost") +
ggtitle("Boxplot of Bus Shift ~ Cost") # Không có sự khác biệt giữa 2 nhóm
MotorS %>%
group_by(TI, MSI) %>%
count() %>%
ggplot(aes(x = MSI, y = TI, fill = MSI)) +
geom_boxplot() +
xlab("Mode Shift to Bus") +
ylab("Travel Period") +
ggtitle("Boxplot of Bus Shift ~ Travel Period") # Có sự khác biệt giữa 2 nhóm
## Correlation in continuous variables
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
CVMSI_MoBS = data.frame(MotorS$MSI, MotorS$DI, MotorS$TI, MotorS$CO)
pairs.panels(CVMSI_MoBS)
summary(MotorS)
## CA BC TP FR DT
## No :283 No :252 Work/Study :350 Once : 23 Normal :105
## Yes:233 Yes:264 Picking Children : 69 twice : 17 Rush hour:411
## Entertainment/Food: 88 >=3 times:476
## Others : 9
##
##
## DI TI SC LS TS
## Min. : 0.200 Min. : 0.00 No : 79 No :110 No :247
## 1st Qu.: 3.000 1st Qu.: 10.00 Yes:437 Yes:406 Yes:269
## Median : 5.000 Median : 15.00
## Mean : 6.289 Mean : 15.84
## 3rd Qu.:10.000 3rd Qu.: 20.00
## Max. :30.000 Max. :180.00
## MR WE WK NBR
## Safety : 27 No :514 No :401 No Route :122
## Comfortable :254 Yes: 2 Yes:115 Inconvenience :159
## Low price : 24 Unsafety : 7
## Accessibility:161 Long waiting time:158
## Reliability : 42 Unreliability : 47
## others : 8 others : 23
## CO BUB SBM SBS
## Min. : 0.000 No :198 Don't agree:263 Don't agree:299
## 1st Qu.: 5.000 Yes:318 Normal :169 Normal :173
## Median :10.000 Agree : 84 Agree : 44
## Mean : 9.595
## 3rd Qu.:12.000
## Max. :50.000
## BST BSC BSA BRE
## Don't agree:248 Don't agree: 81 Don't agree: 52 Don't agree:101
## Normal :144 Normal :129 Normal :133 Normal :150
## Agree :124 Agree :306 Agree :331 Agree :265
##
##
##
## BCO BIN BHE BEN
## Don't agree:158 Don't agree:141 Don't agree:227 Don't agree: 48
## Normal :149 Normal :213 Normal :160 Normal :108
## Agree :209 Agree :162 Agree :129 Agree :360
##
##
##
## BRC BWE SP BI
## Don't agree: 57 Don't agree: 43 No : 48 No :222
## Normal : 87 Normal : 50 Yes :413 Yes:294
## Agree :372 Agree :423 Don't know: 55
##
##
##
## WI MSI GD AG
## No :222 No :391 Female:221 <=24 :214
## At Bus Stops :144 Yes:125 Male :295 25-60:293
## Internet/Facebook: 73 >60 : 9
## From Friends : 42
## Others : 35
##
## OC IN NC MC
## Pupils/Students :164 <8 millions :113 No :223 No : 25
## Officers/Workers :153 (8-15) millions :236 Yes:293 Yes:491
## Housewife/Unemployed: 30 (15-25) millions:128
## Free labor :139 >25 millions : 39
## Others : 30
##
## CC BO MO RO NB NM NR
## No :445 No :335 No : 42 No :475 None:253 None: 5 No :435
## Yes: 71 Yes:181 Yes:474 Yes: 41 1 :197 1 : 68 Yes: 81
## >=2 : 66 >=2 :443
##
##
##
# Correlations between MSI and variables of Group 1 (Individual Characteristics)
Cor1 = data.frame(MotorS$MSI, MotorS$GD, MotorS$AG, MotorS$OC, MotorS$IN, MotorS$NC, MotorS$MC, MotorS$CC, MotorS$BO, MotorS$MO, MotorS$RO, MotorS$NB, MotorS$NM, MotorS$NR)
pairs.panels(Cor1)
Cor1_1 = data.frame(MotorS$MSI, MotorS$GD, MotorS$AG, MotorS$OC, MotorS$IN, MotorS$NC)
pairs.panels(Cor1_1)
Cor1_2 = data.frame(MotorS$MSI, MotorS$MC, MotorS$CC, MotorS$BO, MotorS$MO, MotorS$RO, MotorS$NB, MotorS$NM, MotorS$NR)
pairs.panels(Cor1_2)
# Correlation between MSI and variables of Group 2 (Trip Characteristics)
Cor2 = data.frame(MotorS$MSI, MotorS$TP, MotorS$FR, MotorS$DT, MotorS$TS, MotorS$CO, MotorS$DI)
pairs.panels(Cor2)
# Correlation between MSI and variables of Group 3 (Infrastructures)
Cor3 = data.frame(MotorS$MSI, MotorS$SP, MotorS$BI, MotorS$WI, MotorS$LS, MotorS$SC, MotorS$BC, MotorS$TI)
pairs.panels(Cor3)
# Correlation between MSI and variables of Group 4 (Surrouding Environment) and 5 (Others)
Cor45 = data.frame(MotorS$MSI, MotorS$WE, MotorS$WK, MotorS$CA, MotorS$MR, MotorS$NBR, MotorS$BUB, MotorS$SBM, MotorS$SBS)
pairs.panels(Cor45)
# Correlation between MSI and variables of Group 6 (Perception of Bus)
Cor6 = data.frame(MotorS$MSI, MotorS$BST, MotorS$BSC, MotorS$BSA, MotorS$BRE, MotorS$BCO, MotorS$BIN, MotorS$BHE, MotorS$BEN, MotorS$BRC, MotorS$BWE)
pairs.panels(Cor6)
3. Binary Model
# Model with multi-variables (in t.c) --> 21 variables
mg = glm(MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI, family = binomial, data = MotorS)
mg
##
## Call: glm(formula = MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI +
## LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS +
## TI, family = binomial, data = MotorS)
##
## Coefficients:
## (Intercept) AG25-60 AG>60
## -1.749e+01 -1.375e-01 1.945e+00
## OCOfficers/Workers OCHousewife/Unemployed OCFree labor
## -3.472e-01 2.321e-01 -3.092e-01
## OCOthers MOYes NM1
## 7.343e-01 -1.427e-01 1.598e+01
## NM>=2 TPPicking Children TPEntertainment/Food
## 1.599e+01 -7.910e-01 -6.188e-01
## TPOthers FRtwice FR>=3 times
## -9.471e-01 4.573e-01 1.152e+00
## TSYes SPYes SPDon't know
## -5.712e-01 -3.686e-01 -4.860e-01
## WIAt Bus Stops WIInternet/Facebook WIFrom Friends
## 7.663e-01 1.852e+00 1.498e+00
## WIOthers LSYes BCYes
## 1.229e+00 -6.671e-01 -5.902e-01
## CAYes BSTNormal BSTAgree
## -5.145e-01 -5.892e-01 -4.991e-01
## BSCNormal BSCAgree BSANormal
## -5.346e-01 -5.667e-01 2.374e-01
## BSAAgree BHENormal BHEAgree
## 9.654e-01 -3.817e-01 5.094e-02
## MRComfortable MRLow price MRAccessibility
## -4.249e-01 -2.663e-01 3.595e-01
## MRReliability MRothers BUBYes
## -1.049e+00 -1.302e+00 5.175e-01
## SBMNormal SBMAgree SBSNormal
## 5.631e-01 2.020e+00 -3.276e-01
## SBSAgree TI
## 7.508e-01 -4.722e-04
##
## Degrees of Freedom: 515 Total (i.e. Null); 472 Residual
## Null Deviance: 571.4
## Residual Deviance: 412.7 AIC: 500.7
summary(mg)
##
## Call:
## glm(formula = MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI +
## LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS +
## TI, family = binomial, data = MotorS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.68669 -0.57742 -0.36532 -0.00083 2.92882
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.749e+01 8.037e+02 -0.022 0.98263
## AG25-60 -1.375e-01 4.484e-01 -0.307 0.75904
## AG>60 1.945e+00 1.064e+00 1.827 0.06766 .
## OCOfficers/Workers -3.472e-01 5.110e-01 -0.680 0.49680
## OCHousewife/Unemployed 2.321e-01 7.494e-01 0.310 0.75680
## OCFree labor -3.092e-01 4.877e-01 -0.634 0.52616
## OCOthers 7.343e-01 7.214e-01 1.018 0.30873
## MOYes -1.427e-01 4.965e-01 -0.287 0.77379
## NM1 1.598e+01 8.037e+02 0.020 0.98414
## NM>=2 1.599e+01 8.037e+02 0.020 0.98413
## TPPicking Children -7.910e-01 4.675e-01 -1.692 0.09066 .
## TPEntertainment/Food -6.188e-01 3.911e-01 -1.582 0.11360
## TPOthers -9.471e-01 1.198e+00 -0.791 0.42911
## FRtwice 4.573e-01 1.197e+00 0.382 0.70247
## FR>=3 times 1.152e+00 8.573e-01 1.344 0.17895
## TSYes -5.712e-01 2.693e-01 -2.121 0.03396 *
## SPYes -3.686e-01 5.883e-01 -0.627 0.53097
## SPDon't know -4.860e-01 7.534e-01 -0.645 0.51890
## WIAt Bus Stops 7.663e-01 4.079e-01 1.879 0.06028 .
## WIInternet/Facebook 1.852e+00 4.601e-01 4.025 5.71e-05 ***
## WIFrom Friends 1.498e+00 5.181e-01 2.892 0.00383 **
## WIOthers 1.229e+00 5.712e-01 2.152 0.03139 *
## LSYes -6.671e-01 3.272e-01 -2.039 0.04150 *
## BCYes -5.902e-01 2.824e-01 -2.090 0.03663 *
## CAYes -5.145e-01 2.936e-01 -1.753 0.07968 .
## BSTNormal -5.892e-01 3.912e-01 -1.506 0.13204
## BSTAgree -4.991e-01 3.929e-01 -1.270 0.20407
## BSCNormal -5.346e-01 5.332e-01 -1.003 0.31602
## BSCAgree -5.667e-01 4.867e-01 -1.164 0.24426
## BSANormal 2.374e-01 5.706e-01 0.416 0.67738
## BSAAgree 9.654e-01 5.654e-01 1.707 0.08777 .
## BHENormal -3.817e-01 3.393e-01 -1.125 0.26060
## BHEAgree 5.094e-02 3.333e-01 0.153 0.87854
## MRComfortable -4.249e-01 6.778e-01 -0.627 0.53075
## MRLow price -2.663e-01 9.399e-01 -0.283 0.77690
## MRAccessibility 3.595e-01 6.947e-01 0.518 0.60479
## MRReliability -1.049e+00 8.501e-01 -1.234 0.21736
## MRothers -1.302e+00 1.604e+00 -0.812 0.41703
## BUBYes 5.175e-01 3.064e-01 1.689 0.09120 .
## SBMNormal 5.631e-01 3.704e-01 1.520 0.12839
## SBMAgree 2.020e+00 4.291e-01 4.707 2.51e-06 ***
## SBSNormal -3.276e-01 3.342e-01 -0.980 0.32694
## SBSAgree 7.508e-01 4.858e-01 1.546 0.12222
## TI -4.722e-04 1.261e-02 -0.037 0.97014
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 412.66 on 472 degrees of freedom
## AIC: 500.66
##
## Number of Fisher Scoring iterations: 15
## Remove insignificant variables: Occupation (19), Number.of.Children (21), Car.Certificate (23), Car.Owning (26), Number.of.Motors (28), Number.of.Car (29), Distance (6) --> 12 variables
#mg1 = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
## Way 2: Use function lmr in package "rms" - library(rms) -ml = lrm(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI)
# Find Parsimonious model - Tìm mô hình tối ưu - AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI --> 21 variables
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.5-5)
xvars <- MotorS[, c(34,35,41, 44, 3, 4, 10, 29, 31, 9, 2, 1, 19, 20, 21, 25, 11, 16, 17, 18, 7)]
y <- MotorS$MSI
bma.search <- bic.glm (xvars, y, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search)
##
## Call:
## bic.glm.data.frame(x = xvars, y = y, glm.family = "binomial", strict = F, OR = 20)
##
##
## 15 models were selected
## Best 5 models (cumulative posterior probability = 0.6784 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -1.752737 0.41813 -1.583e+00 -1.873e+00
## AG 0.0
## .25-60 0.000000 0.00000 . .
## .>60 0.000000 0.00000 . .
## OC 0.0
## .Officers/Workers 0.000000 0.00000 . .
## .Housewife/Unemployed 0.000000 0.00000 . .
## .Free labor 0.000000 0.00000 . .
## .Others 0.000000 0.00000 . .
## MO 0.0
## .Yes 0.000000 0.00000 . .
## NM 0.0
## .1 0.000000 0.00000 . .
## .>=2 0.000000 0.00000 . .
## TP 0.0
## .Picking Children 0.000000 0.00000 . .
## .Entertainment/Food 0.000000 0.00000 . .
## .Others 0.000000 0.00000 . .
## FR 1.9
## .twice 0.002312 0.15820 . .
## .>=3 times 0.027007 0.22183 . .
## TS 14.0
## .Yes -0.071426 0.19746 . .
## SP 0.0
## .Yes 0.000000 0.00000 . .
## .Don't know 0.000000 0.00000 . .
## WI 94.9
## .At Bus Stops 0.591693 0.32161 6.198e-01 6.499e-01
## .Internet/Facebook 1.484951 0.47839 1.620e+00 1.489e+00
## .From Friends 1.379271 0.50125 1.425e+00 1.444e+00
## .Others 0.871717 0.50114 9.161e-01 7.519e-01
## LS 8.0
## .Yes -0.047928 0.18227 . .
## BC 65.9
## .Yes -0.439201 0.36929 -6.867e-01 -6.296e-01
## CA 62.0
## .Yes -0.421534 0.38334 -6.961e-01 .
## BST 0.0
## .Normal 0.000000 0.00000 . .
## .Agree 0.000000 0.00000 . .
## BSC 0.0
## .Normal 0.000000 0.00000 . .
## .Agree 0.000000 0.00000 . .
## BSA 3.1
## .Normal -0.008968 0.09369 . .
## .Agree 0.015071 0.10975 . .
## BHE 0.0
## .Normal 0.000000 0.00000 . .
## .Agree 0.000000 0.00000 . .
## MR 0.0
## .Comfortable 0.000000 0.00000 . .
## .Low price 0.000000 0.00000 . .
## .Accessibility 0.000000 0.00000 . .
## .Reliability 0.000000 0.00000 . .
## .others 0.000000 0.00000 . .
## BUB 6.4
## .Yes 0.029706 0.13088 . .
## SBM 100.0
## .Normal 0.019322 0.27131 2.715e-03 5.202e-02
## .Agree 1.773231 0.30597 1.833e+00 1.681e+00
## SBS 0.0
## .Normal 0.000000 0.00000 . .
## .Agree 0.000000 0.00000 . .
## TI 0.0 0.000000 0.00000 . .
##
## nVar 4 3
## BIC -2.688e+03 -2.685e+03
## post prob 0.343 0.118
## model 3 model 4 model 5
## Intercept -1.924e+00 -1.411e+00 -2.162e+00
## AG
## .25-60 . . .
## .>60 . . .
## OC
## .Officers/Workers . . .
## .Housewife/Unemployed . . .
## .Free labor . . .
## .Others . . .
## MO
## .Yes . . .
## NM
## .1 . . .
## .>=2 . . .
## TP
## .Picking Children . . .
## .Entertainment/Food . . .
## .Others . . .
## FR
## .twice . . .
## .>=3 times . . .
## TS
## .Yes . . .
## SP
## .Yes . . .
## .Don't know . . .
## WI
## .At Bus Stops 6.032e-01 6.783e-01 6.352e-01
## .Internet/Facebook 1.566e+00 1.546e+00 1.471e+00
## .From Friends 1.490e+00 1.544e+00 1.514e+00
## .Others 1.029e+00 1.006e+00 8.948e-01
## LS
## .Yes . -6.503e-01 .
## BC
## .Yes . . .
## CA
## .Yes -6.314e-01 -7.242e-01 .
## BST
## .Normal . . .
## .Agree . . .
## BSC
## .Normal . . .
## .Agree . . .
## BSA
## .Normal . . .
## .Agree . . .
## BHE
## .Normal . . .
## .Agree . . .
## MR
## .Comfortable . . .
## .Low price . . .
## .Accessibility . . .
## .Reliability . . .
## .others . . .
## BUB
## .Yes . . .
## SBM
## .Normal -1.765e-02 -1.136e-02 1.838e-02
## .Agree 1.809e+00 1.834e+00 1.668e+00
## SBS
## .Normal . . .
## .Agree . . .
## TI . . .
##
## nVar 3 4 2
## BIC -2.685e+03 -2.684e+03 -2.684e+03
## post prob 0.094 0.063 0.061
# Remove ns variables (AG, OC, MO, TP, FR, TS, LS, CA, MR) and change DI (7) by TI (8) --> 12 variables
xvars1 <- MotorS[, c(44, 29, 31, 2, 19, 20, 21, 25, 16, 17, 18, 7)]
y1 <- MotorS$MSI
bma.search1 <- bic.glm (xvars1, y1, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search1)
##
## Call:
## bic.glm.data.frame(x = xvars1, y = y1, glm.family = "binomial", strict = F, OR = 20)
##
##
## 6 models were selected
## Best 5 models (cumulative posterior probability = 0.9741 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -2.01437 0.31491 -1.873e+00 -2.162e+00
## NM 0.0
## .1 0.00000 0.00000 . .
## .>=2 0.00000 0.00000 . .
## SP 0.0
## .Yes 0.00000 0.00000 . .
## .Don't know 0.00000 0.00000 . .
## WI 100.0
## .At Bus Stops 0.63184 0.29730 6.499e-01 6.352e-01
## .Internet/Facebook 1.46875 0.33345 1.489e+00 1.471e+00
## .From Friends 1.44774 0.39615 1.444e+00 1.514e+00
## .Others 0.78514 0.45320 7.519e-01 8.948e-01
## BC 67.2
## .Yes -0.42544 0.35331 -6.296e-01 .
## BST 2.6
## .Normal -0.02005 0.13299 . .
## .Agree -0.01151 0.08613 . .
## BSC 0.0
## .Normal 0.00000 0.00000 . .
## .Agree 0.00000 0.00000 . .
## BSA 3.4
## .Normal -0.01030 0.09915 . .
## .Agree 0.01503 0.10872 . .
## BHE 0.0
## .Normal 0.00000 0.00000 . .
## .Agree 0.00000 0.00000 . .
## BUB 19.3
## .Yes 0.09150 0.21839 . .
## SBM 100.0
## .Normal 0.03610 0.27425 5.202e-02 1.838e-02
## .Agree 1.66220 0.29412 1.681e+00 1.668e+00
## SBS 0.0
## .Normal 0.00000 0.00000 . .
## .Agree 0.00000 0.00000 . .
## TI 0.0 0.00000 0.00000 . .
##
## nVar 3 2
## BIC -2.685e+03 -2.684e+03
## post prob 0.493 0.255
## model 3 model 4 model 5
## Intercept -2.106e+00 -2.390e+00 -2.075e+00
## NM
## .1 . . .
## .>=2 . . .
## SP
## .Yes . . .
## .Don't know . . .
## WI
## .At Bus Stops 5.817e-01 5.561e-01 6.564e-01
## .Internet/Facebook 1.405e+00 1.368e+00 1.587e+00
## .From Friends 1.365e+00 1.416e+00 1.467e+00
## .Others 6.725e-01 8.001e-01 8.648e-01
## BC
## .Yes -6.178e-01 . -7.251e-01
## BST
## .Normal . . .
## .Agree . . .
## BSC
## .Normal . . .
## .Agree . . .
## BSA
## .Normal . . -3.066e-01
## .Agree . . 4.472e-01
## BHE
## .Normal . . .
## .Agree . . .
## BUB
## .Yes 4.664e-01 4.888e-01 .
## SBM
## .Normal -1.303e-02 -5.210e-02 6.190e-02
## .Agree 1.604e+00 1.580e+00 1.568e+00
## SBS
## .Normal . . .
## .Agree . . .
## TI . . .
##
## nVar 4 3 4
## BIC -2.683e+03 -2.682e+03 -2.680e+03
## post prob 0.119 0.073 0.034
#imageplot.bma(bma.search1)
## Model 1 from bma.search1 : Remove ns variables (AG, OC, MO, TP, FR, TS, LS, CA, MR) and change DI (7) by TI (8) --> 12 variables
mg1 = glm(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, family = binomial, data = MotorS)
mg1
##
## Call: glm(formula = MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE +
## BUB + SBM + SBS + TI, family = binomial, data = MotorS)
##
## Coefficients:
## (Intercept) NM1 NM>=2
## -16.38213 14.26799 14.31772
## SPYes SPDon't know WIAt Bus Stops
## -0.48827 -0.54986 0.72698
## WIInternet/Facebook WIFrom Friends WIOthers
## 1.73430 1.35463 0.95725
## BCYes BSTNormal BSTAgree
## -0.62908 -0.68942 -0.64951
## BSCNormal BSCAgree BSANormal
## -0.29375 -0.31145 0.20265
## BSAAgree BHENormal BHEAgree
## 0.96808 -0.21547 0.15164
## BUBYes SBMNormal SBMAgree
## 0.49882 0.40746 1.48605
## SBSNormal SBSAgree TI
## -0.20278 1.01296 0.00233
##
## Degrees of Freedom: 515 Total (i.e. Null); 492 Residual
## Null Deviance: 571.4
## Residual Deviance: 458.2 AIC: 506.2
summary(mg1)
##
## Call:
## glm(formula = MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE +
## BUB + SBM + SBS + TI, family = binomial, data = MotorS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.80649 -0.69419 -0.43485 -0.00113 2.63043
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -16.38213 552.17363 -0.030 0.97633
## NM1 14.26799 552.17330 0.026 0.97939
## NM>=2 14.31772 552.17321 0.026 0.97931
## SPYes -0.48827 0.51806 -0.942 0.34594
## SPDon't know -0.54986 0.68980 -0.797 0.42538
## WIAt Bus Stops 0.72698 0.36374 1.999 0.04565 *
## WIInternet/Facebook 1.73429 0.40686 4.263 2.02e-05 ***
## WIFrom Friends 1.35463 0.45332 2.988 0.00281 **
## WIOthers 0.95725 0.50130 1.910 0.05619 .
## BCYes -0.62908 0.24950 -2.521 0.01169 *
## BSTNormal -0.68942 0.35610 -1.936 0.05286 .
## BSTAgree -0.64951 0.34536 -1.881 0.06002 .
## BSCNormal -0.29375 0.48745 -0.603 0.54676
## BSCAgree -0.31145 0.43655 -0.713 0.47558
## BSANormal 0.20265 0.53556 0.378 0.70515
## BSAAgree 0.96808 0.52099 1.858 0.06315 .
## BHENormal -0.21547 0.30297 -0.711 0.47698
## BHEAgree 0.15164 0.29906 0.507 0.61212
## BUBYes 0.49882 0.27314 1.826 0.06781 .
## SBMNormal 0.40746 0.33913 1.201 0.22956
## SBMAgree 1.48605 0.36085 4.118 3.82e-05 ***
## SBSNormal -0.20278 0.30871 -0.657 0.51128
## SBSAgree 1.01296 0.43072 2.352 0.01868 *
## TI 0.00233 0.01126 0.207 0.83612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 458.17 on 492 degrees of freedom
## AIC: 506.17
##
## Number of Fisher Scoring iterations: 14
## CHON: Model 2 --> So với 21 biến ở mg bỏ ns: và có cor gồm: OC, MO, NM, FR, SP, BST, BSC, BHE, MR, BSB --> Thử và chọn theo AIC min
mg2 = glm(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial, data = MotorS)
mg2
##
## Call: glm(formula = MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial,
## data = MotorS)
##
## Coefficients:
## (Intercept) AG25-60 AG>60
## -0.75079 -0.07254 2.30511
## TSYes WIAt Bus Stops WIInternet/Facebook
## -0.46179 0.75331 1.67211
## WIFrom Friends WIOthers LSYes
## 1.54306 1.11330 -0.60908
## BCYes CAYes SBMNormal
## -0.70517 -0.83271 0.04398
## SBMAgree TPPicking Children TPEntertainment/Food
## 2.02537 -0.58872 -0.79116
## TPOthers
## -0.71397
##
## Degrees of Freedom: 515 Total (i.e. Null); 500 Residual
## Null Deviance: 571.4
## Residual Deviance: 457.1 AIC: 489.1
summary(mg2)
##
## Call:
## glm(formula = MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial,
## data = MotorS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1109 -0.6696 -0.4643 -0.1752 2.6984
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.75079 0.40307 -1.863 0.062508 .
## AG25-60 -0.07254 0.25689 -0.282 0.777664
## AG>60 2.30511 0.83491 2.761 0.005764 **
## TSYes -0.46179 0.24422 -1.891 0.058643 .
## WIAt Bus Stops 0.75331 0.31163 2.417 0.015637 *
## WIInternet/Facebook 1.67211 0.35823 4.668 3.04e-06 ***
## WIFrom Friends 1.54306 0.41987 3.675 0.000238 ***
## WIOthers 1.11330 0.49058 2.269 0.023246 *
## LSYes -0.60908 0.28641 -2.127 0.033451 *
## BCYes -0.70517 0.24843 -2.838 0.004533 **
## CAYes -0.83271 0.25749 -3.234 0.001221 **
## SBMNormal 0.04398 0.27512 0.160 0.872991
## SBMAgree 2.02537 0.32084 6.313 2.74e-10 ***
## TPPicking Children -0.58872 0.41530 -1.418 0.156319
## TPEntertainment/Food -0.79116 0.35106 -2.254 0.024221 *
## TPOthers -0.71397 0.99890 -0.715 0.474759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 457.08 on 500 degrees of freedom
## AIC: 489.08
##
## Number of Fisher Scoring iterations: 5
## Calculate OR (odd ratio)
exp(mg2$coefficients)
## (Intercept) AG25-60 AG>60
## 0.4719945 0.9300307 10.0252841
## TSYes WIAt Bus Stops WIInternet/Facebook
## 0.6301538 2.1240088 5.3234125
## WIFrom Friends WIOthers LSYes
## 4.6788976 3.0443939 0.5438493
## BCYes CAYes SBMNormal
## 0.4940226 0.4348711 1.0449626
## SBMAgree TPPicking Children TPEntertainment/Food
## 7.5789143 0.5550382 0.4533186
## TPOthers
## 0.4896951
exp(coef(mg2))
## (Intercept) AG25-60 AG>60
## 0.4719945 0.9300307 10.0252841
## TSYes WIAt Bus Stops WIInternet/Facebook
## 0.6301538 2.1240088 5.3234125
## WIFrom Friends WIOthers LSYes
## 4.6788976 3.0443939 0.5438493
## BCYes CAYes SBMNormal
## 0.4940226 0.4348711 1.0449626
## SBMAgree TPPicking Children TPEntertainment/Food
## 7.5789143 0.5550382 0.4533186
## TPOthers
## 0.4896951
library(epiDisplay)
## Loading required package: foreign
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
##
## alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
##
## alpha
logistic.display(mg2)
##
## Logistic regression predicting MSI : Yes vs No
##
## crude OR(95%CI) adj. OR(95%CI) P(Wald's test)
## AG: ref.=<=24
## 25-60 0.71 (0.47,1.06) 0.93 (0.56,1.54) 0.778
## >60 2.1 (0.55,8.1) 10.03 (1.95,51.5) 0.006
##
## TS: Yes vs No 0.68 (0.45,1.02) 0.63 (0.39,1.02) 0.059
##
## WI: ref.=No
## At Bus Stops 2.57 (1.5,4.42) 2.12 (1.15,3.91) 0.016
## Internet/Facebook 4.83 (2.62,8.91) 5.32 (2.64,10.74) < 0.001
## From Friends 5.2 (2.51,10.77) 4.68 (2.05,10.65) < 0.001
## Others 2.77 (1.2,6.38) 3.04 (1.16,7.96) 0.023
##
## LS: Yes vs No 0.65 (0.4,1.03) 0.54 (0.31,0.95) 0.033
##
## BC: Yes vs No 0.53 (0.35,0.8) 0.49 (0.3,0.8) 0.005
##
## CA: Yes vs No 0.79 (0.53,1.19) 0.43 (0.26,0.72) 0.001
##
## SBM: ref.=Don't agree
## Normal 1.18 (0.71,1.93) 1.04 (0.61,1.79) 0.873
## Agree 6.15 (3.6,10.53) 7.58 (4.04,14.21) < 0.001
##
## TP: ref.=Work/Study
## Picking Children 0.57 (0.29,1.1) 0.56 (0.25,1.25) 0.156
## Entertainment/Food 0.6 (0.33,1.08) 0.45 (0.23,0.9) 0.024
## Others 0.77 (0.16,3.76) 0.49 (0.07,3.47) 0.475
##
## P(LR-test)
## AG: ref.=<=24 0.029
## 25-60
## >60
##
## TS: Yes vs No 0.057
##
## WI: ref.=No < 0.001
## At Bus Stops
## Internet/Facebook
## From Friends
## Others
##
## LS: Yes vs No 0.035
##
## BC: Yes vs No 0.004
##
## CA: Yes vs No < 0.001
##
## SBM: ref.=Don't agree < 0.001
## Normal
## Agree
##
## TP: ref.=Work/Study 0.072
## Picking Children
## Entertainment/Food
## Others
##
## Log-likelihood = -228.5402
## No. of observations = 516
## AIC value = 489.0804
## Thay vì sử dụng hàm glm dùng cách 2 với hàm lrm trong gói rms: mg2-8 variables, mg1-19 variables, mg-23 variables
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
##
## dotplot
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:table1':
##
## label, label<-, units
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
## Registered S3 method overwritten by 'rms':
## method from
## print.lrtest epiDisplay
##
## Attaching package: 'rms'
## The following object is masked from 'package:epiDisplay':
##
## lrtest
## The following objects are masked from 'package:car':
##
## Predict, vif
mg2.c2 = lrm(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, data = MotorS)
mg2.c2
## Logistic Regression Model
##
## lrm(formula = MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, data = MotorS)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 516 LR chi2 114.29 R2 0.297 C 0.792
## No 391 d.f. 15 g 1.393 Dxy 0.584
## Yes 125 Pr(> chi2) <0.0001 gr 4.025 gamma 0.586
## max |deriv| 2e-11 gp 0.215 tau-a 0.215
## Brier 0.140
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -0.7508 0.4031 -1.86 0.0625
## AG=25-60 -0.0725 0.2569 -0.28 0.7777
## AG=>60 2.3051 0.8349 2.76 0.0058
## TS=Yes -0.4618 0.2442 -1.89 0.0586
## WI=At Bus Stops 0.7533 0.3116 2.42 0.0156
## WI=Internet/Facebook 1.6721 0.3582 4.67 <0.0001
## WI=From Friends 1.5431 0.4199 3.68 0.0002
## WI=Others 1.1133 0.4906 2.27 0.0232
## LS=Yes -0.6091 0.2864 -2.13 0.0335
## BC=Yes -0.7052 0.2484 -2.84 0.0045
## CA=Yes -0.8327 0.2575 -3.23 0.0012
## SBM=Normal 0.0440 0.2751 0.16 0.8730
## SBM=Agree 2.0254 0.3208 6.31 <0.0001
## TP=Picking Children -0.5887 0.4153 -1.42 0.1563
## TP=Entertainment/Food -0.7912 0.3511 -2.25 0.0242
## TP=Others -0.7140 0.9989 -0.71 0.4748
##
mg1.c2 = lrm(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, data = MotorS)
mg1.c2
## Logistic Regression Model
##
## lrm(formula = MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE +
## BUB + SBM + SBS + TI, data = MotorS)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 516 LR chi2 113.20 R2 0.294 C 0.797
## No 391 d.f. 23 g 1.523 Dxy 0.594
## Yes 125 Pr(> chi2) <0.0001 gr 4.588 gamma 0.594
## max |deriv| 0.03 gp 0.216 tau-a 0.218
## Brier 0.145
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -9.3693 27.3434 -0.34 0.7319
## NM=1 7.2551 27.3366 0.27 0.7907
## NM=>=2 7.3049 27.3348 0.27 0.7893
## SP=Yes -0.4883 0.5181 -0.94 0.3459
## SP=Don't know -0.5499 0.6898 -0.80 0.4254
## WI=At Bus Stops 0.7270 0.3637 2.00 0.0456
## WI=Internet/Facebook 1.7343 0.4069 4.26 <0.0001
## WI=From Friends 1.3546 0.4533 2.99 0.0028
## WI=Others 0.9572 0.5013 1.91 0.0562
## BC=Yes -0.6291 0.2495 -2.52 0.0117
## BST=Normal -0.6894 0.3561 -1.94 0.0529
## BST=Agree -0.6495 0.3454 -1.88 0.0600
## BSC=Normal -0.2937 0.4874 -0.60 0.5468
## BSC=Agree -0.3114 0.4366 -0.71 0.4756
## BSA=Normal 0.2026 0.5356 0.38 0.7051
## BSA=Agree 0.9681 0.5210 1.86 0.0631
## BHE=Normal -0.2155 0.3030 -0.71 0.4770
## BHE=Agree 0.1516 0.2991 0.51 0.6121
## BUB=Yes 0.4988 0.2731 1.83 0.0678
## SBM=Normal 0.4075 0.3391 1.20 0.2296
## SBM=Agree 1.4860 0.3609 4.12 <0.0001
## SBS=Normal -0.2028 0.3087 -0.66 0.5113
## SBS=Agree 1.0130 0.4307 2.35 0.0187
## TI 0.0023 0.0113 0.21 0.8361
##
mg.c2 = lrm(MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI, data = MotorS)
mg.c2
## Logistic Regression Model
##
## lrm(formula = MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI +
## LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS +
## TI, data = MotorS)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 516 LR chi2 158.71 R2 0.395 C 0.854
## No 391 d.f. 43 g 1.935 Dxy 0.708
## Yes 125 Pr(> chi2) <0.0001 gr 6.921 gamma 0.708
## max |deriv| 0.03 gp 0.250 tau-a 0.260
## Brier 0.126
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -9.5085 24.5401 -0.39 0.6984
## AG=25-60 -0.1375 0.4484 -0.31 0.7590
## AG=>60 1.9446 1.0642 1.83 0.0677
## OC=Officers/Workers -0.3472 0.5110 -0.68 0.4968
## OC=Housewife/Unemployed 0.2321 0.7494 0.31 0.7568
## OC=Free labor -0.3092 0.4877 -0.63 0.5262
## OC=Others 0.7343 0.7214 1.02 0.3087
## MO=Yes -0.1427 0.4965 -0.29 0.7738
## NM=1 7.9915 24.5100 0.33 0.7444
## NM=>=2 8.0031 24.5068 0.33 0.7440
## TP=Picking Children -0.7910 0.4675 -1.69 0.0907
## TP=Entertainment/Food -0.6188 0.3911 -1.58 0.1136
## TP=Others -0.9471 1.1978 -0.79 0.4291
## FR=twice 0.4573 1.1971 0.38 0.7025
## FR=>=3 times 1.1522 0.8573 1.34 0.1790
## TS=Yes -0.5712 0.2693 -2.12 0.0340
## SP=Yes -0.3686 0.5883 -0.63 0.5310
## SP=Don't know -0.4860 0.7534 -0.65 0.5189
## WI=At Bus Stops 0.7663 0.4079 1.88 0.0603
## WI=Internet/Facebook 1.8519 0.4601 4.02 <0.0001
## WI=From Friends 1.4981 0.5181 2.89 0.0038
## WI=Others 1.2293 0.5712 2.15 0.0314
## LS=Yes -0.6671 0.3272 -2.04 0.0415
## BC=Yes -0.5902 0.2824 -2.09 0.0366
## CA=Yes -0.5145 0.2936 -1.75 0.0797
## BST=Normal -0.5892 0.3912 -1.51 0.1320
## BST=Agree -0.4991 0.3929 -1.27 0.2041
## BSC=Normal -0.5346 0.5332 -1.00 0.3160
## BSC=Agree -0.5667 0.4867 -1.16 0.2443
## BSA=Normal 0.2374 0.5706 0.42 0.6774
## BSA=Agree 0.9654 0.5654 1.71 0.0878
## BHE=Normal -0.3817 0.3393 -1.12 0.2606
## BHE=Agree 0.0509 0.3333 0.15 0.8785
## MR=Comfortable -0.4249 0.6778 -0.63 0.5307
## MR=Low price -0.2663 0.9399 -0.28 0.7769
## MR=Accessibility 0.3595 0.6947 0.52 0.6048
## MR=Reliability -1.0487 0.8501 -1.23 0.2174
## MR=others -1.3017 1.6040 -0.81 0.4170
## BUB=Yes 0.5175 0.3064 1.69 0.0912
## SBM=Normal 0.5631 0.3704 1.52 0.1284
## SBM=Agree 2.0200 0.4291 4.71 <0.0001
## SBS=Normal -0.3276 0.3342 -0.98 0.3269
## SBS=Agree 0.7508 0.4858 1.55 0.1222
## TI -0.0005 0.0126 -0.04 0.9701
##
4. Test models
# 4.1. Deviance - càng nhỏ càng tốt (sai khác giữa giá trị thực tế và giá trị ước lượng theo mô hình): mg2 tốt nhất
deviance(mg)
## [1] 412.662
deviance(mg1)
## [1] 458.174
deviance(mg2)
## [1] 457.0804
# 4.2. LRT (Likelihood Ratio Test) - Chênh lệch deviance giữa mô hình đơn giản và mô hình phức tạp --> có ý nghĩa thống kê - đều có ý nghĩa tk
library(rms)
lrtest(mg2, mg)
##
## Model 1: MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP
## Model 2: MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC +
## CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI
##
## L.R. Chisq d.f. P
## 44.41840539 28.00000000 0.02524525
lrtest(mg,mg2)
##
## Model 1: MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC +
## CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI
## Model 2: MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP
##
## L.R. Chisq d.f. P
## 44.41840539 28.00000000 0.02524525
lrtest(mg2, mg1)
##
## Model 1: MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP
## Model 2: MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM +
## SBS + TI
##
## L.R. Chisq d.f. P
## 1.0935863 8.0000000 0.9975851
# 4.3. AIC (Aikake Information Criterion) - càng nhỏ càng tốt - mg2 tốt nhất
AIC(mg)
## [1] 500.662
AIC(mg1)
## [1] 506.174
AIC(mg2)
## [1] 489.0804
## Calculate MacFadden's R2 voi packages "pscl" - mg tốt nhất
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(mg2)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -228.5402071 -285.6872179 114.2940216 0.2000335 0.1986841 0.2967406
pR2(mg1)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -229.0870002 -285.6872179 113.2004353 0.1981195 0.1969840 0.2942015
pR2(mg)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -206.3310044 -285.6872179 158.7124270 0.2777731 0.2647776 0.3954531
# 4.4. Calcule the number of observation that model predict correctly = Accuracy (Độ chính xác của mô hình)
## Model mg2
contrasts(MotorS$MSI) # Qs là không chuyển đổi nhưng phân loại thành chuyển đổi thì gán giá trị là 0 và ngược lại
## Yes
## No 0
## Yes 1
dim(MotorS)
## [1] 516 45
glm.probs <- predict(mg2, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg2 (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng). Có thể dùng hàm probmg2 <- fitted(mg2), head(probmg2), tail(probmg2)
glm.probs[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên
## 2 3 4 5 8 9 10
## 0.07399812 0.29844325 0.14510373 0.10549728 0.39616094 0.28094218 0.29249884
## 13 15 16
## 0.21219585 0.51293946 0.64246947
glm.pred <- rep("No", 516)
glm.pred[glm.probs >= 0.5] = "Yes"
table(glm.pred, MotorS$MSI)
##
## glm.pred No Yes
## No 374 74
## Yes 17 51
mean(glm.pred == MotorS$MSI) # Tỷ lệ dự báo chính xác của mô hình 80% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-82,36 = 17,64%
## [1] 0.8236434
round(prop.table(table(MotorS$MSI))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không chuyển đổi xe buýt 75.78% < 82,36% --> KL: mức độ chính xác dự báo của mô hình 2 > lớp có tỷ lệ cao nhất ở biến phụ thuộc
##
## No Yes
## 75.78 24.22
## Model mg
contrasts(MotorS$MSI)
## Yes
## No 0
## Yes 1
glm.probs.mg1 <- predict(mg1, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng)
glm.probs.mg1[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên
## 2 3 4 5 8 9 10
## 0.07230818 0.43608906 0.10147891 0.06051944 0.33375020 0.22061850 0.54331774
## 13 15 16
## 0.14091064 0.60090204 0.52938246
glm.pred.mg1 <- rep("No", 516)
glm.pred.mg1[glm.probs.mg1 >= 0.5] = "Yes"
table(glm.pred.mg1, MotorS$MSI)
##
## glm.pred.mg1 No Yes
## No 368 85
## Yes 23 40
mean(glm.pred.mg1 == MotorS$MSI) # Tỷ lệ dự báo chính xác của mô hình 81% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-79,1 = 20,9%
## [1] 0.7906977
round(prop.table(table(MotorS$MSI))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không chuyển đổi sang xe buýt 73.2% < 81% --> KL: mức độ chính xác dự báo của mô hình > lớp có tỷ lệ cao nhất ở biến phụ thuộc
##
## No Yes
## 75.78 24.22
# 4.5. Calculate Sensitivity - Độ nhạy
374/(374+74) # Đạt 83,5% --> Rất lý tưởng: Mô hình 2 xác định tới 81% người không chuyển đổi sang xe buýt
## [1] 0.8348214
# 4.6. Calculate Specificity - Độ đặc hiệu
51/(51+17) # Đạt 75% --> Mô hình 2 có khả năng dự báo 63,5% người chuyển đổi sang xe buýt
## [1] 0.75
# 4.7. Hosmer-Lemeshow = Goodness of fit test - Kiểm định sự phù hợp của mô hình với library(MKmisc) và hàm HLgof.test(fit = fitted(mg2), obs = BSs$MSI) # trong package "MKmisc" (Version 3.6.3) : Nếu p-value < 0.05 --> MH không phù hợp --> Ít sử dụng
# 4.8. Kiểm tra tính phổ quát của mô hình
## Chạy lại hai mô hình với hàm train thuộc package "caret"
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
## The following object is masked from 'package:purrr':
##
## lift
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10, classProbs = TRUE, summaryFunction = defaultSummary) # sử dụng kiểm tra chéo 10 lớp (number=10), lặp lại 10 lần (repeats=10) với lựa chọn method="repeatedcv, lựa chọn classProbs = TRUE áp dụng cho mô hình phân loại và lựa chọn summaryFunction = defaultSummary để chỉ thị R tính toán chỉ 2 thông tin về phẩm chất của mô hình là Accuracy và Kappa.
mg1train = train(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, data = MotorS, method = "glm", family = "binomial", trControl = ctrl)
summary(mg1train)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.80649 -0.69419 -0.43485 -0.00113 2.63043
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -16.38213 552.17363 -0.030 0.97633
## NM1 14.26799 552.17330 0.026 0.97939
## `NM>=2` 14.31772 552.17321 0.026 0.97931
## SPYes -0.48827 0.51806 -0.942 0.34594
## `SPDon't know` -0.54986 0.68980 -0.797 0.42538
## `WIAt Bus Stops` 0.72698 0.36374 1.999 0.04565 *
## `WIInternet/Facebook` 1.73429 0.40686 4.263 2.02e-05 ***
## `WIFrom Friends` 1.35463 0.45332 2.988 0.00281 **
## WIOthers 0.95725 0.50130 1.910 0.05619 .
## BCYes -0.62908 0.24950 -2.521 0.01169 *
## BSTNormal -0.68942 0.35610 -1.936 0.05286 .
## BSTAgree -0.64951 0.34536 -1.881 0.06002 .
## BSCNormal -0.29375 0.48745 -0.603 0.54676
## BSCAgree -0.31145 0.43655 -0.713 0.47558
## BSANormal 0.20265 0.53556 0.378 0.70515
## BSAAgree 0.96808 0.52099 1.858 0.06315 .
## BHENormal -0.21547 0.30297 -0.711 0.47698
## BHEAgree 0.15164 0.29906 0.507 0.61212
## BUBYes 0.49882 0.27314 1.826 0.06781 .
## SBMNormal 0.40746 0.33913 1.201 0.22956
## SBMAgree 1.48605 0.36085 4.118 3.82e-05 ***
## SBSNormal -0.20278 0.30871 -0.657 0.51128
## SBSAgree 1.01296 0.43072 2.352 0.01868 *
## TI 0.00233 0.01126 0.207 0.83612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 458.17 on 492 degrees of freedom
## AIC: 506.17
##
## Number of Fisher Scoring iterations: 14
mg2train = train(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, data = MotorS, method = "glm", family = "binomial", trControl = ctrl)
summary(mg2train)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1109 -0.6696 -0.4643 -0.1752 2.6984
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.75079 0.40307 -1.863 0.062508 .
## `AG25-60` -0.07254 0.25689 -0.282 0.777664
## `AG>60` 2.30511 0.83491 2.761 0.005764 **
## TSYes -0.46179 0.24422 -1.891 0.058643 .
## `WIAt Bus Stops` 0.75331 0.31163 2.417 0.015637 *
## `WIInternet/Facebook` 1.67211 0.35823 4.668 3.04e-06 ***
## `WIFrom Friends` 1.54306 0.41987 3.675 0.000238 ***
## WIOthers 1.11330 0.49058 2.269 0.023246 *
## LSYes -0.60908 0.28641 -2.127 0.033451 *
## BCYes -0.70517 0.24843 -2.838 0.004533 **
## CAYes -0.83271 0.25749 -3.234 0.001221 **
## SBMNormal 0.04398 0.27512 0.160 0.872991
## SBMAgree 2.02537 0.32084 6.313 2.74e-10 ***
## `TPPicking Children` -0.58872 0.41530 -1.418 0.156319
## `TPEntertainment/Food` -0.79116 0.35106 -2.254 0.024221 *
## TPOthers -0.71397 0.99890 -0.715 0.474759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 457.08 on 500 degrees of freedom
## AIC: 489.08
##
## Number of Fisher Scoring iterations: 5
## Các thông tin về khả năng dự báo của mô hình bằng confusionMatrix()
predmg1 <- predict(mg1train, newdata = MotorS)
confusionMatrix(data = predmg1, MotorS$MSI)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 368 85
## Yes 23 40
##
## Accuracy : 0.7907
## 95% CI : (0.753, 0.825)
## No Information Rate : 0.7578
## P-Value [Acc > NIR] : 0.04332
##
## Kappa : 0.3142
##
## Mcnemar's Test P-Value : 4.365e-09
##
## Sensitivity : 0.9412
## Specificity : 0.3200
## Pos Pred Value : 0.8124
## Neg Pred Value : 0.6349
## Prevalence : 0.7578
## Detection Rate : 0.7132
## Detection Prevalence : 0.8779
## Balanced Accuracy : 0.6306
##
## 'Positive' Class : No
##
predmg2 <- predict(mg2train, newdata = MotorS)
confusionMatrix(data = predmg2, MotorS$MSI)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 374 74
## Yes 17 51
##
## Accuracy : 0.8236
## 95% CI : (0.788, 0.8556)
## No Information Rate : 0.7578
## P-Value [Acc > NIR] : 0.0001897
##
## Kappa : 0.4314
##
## Mcnemar's Test P-Value : 4.348e-09
##
## Sensitivity : 0.9565
## Specificity : 0.4080
## Pos Pred Value : 0.8348
## Neg Pred Value : 0.7500
## Prevalence : 0.7578
## Detection Rate : 0.7248
## Detection Prevalence : 0.8682
## Balanced Accuracy : 0.6823
##
## 'Positive' Class : No
##
## Lấy dataframe với thông tin Accuracy của mô hình
a = mg1train$resample
a = a[, -3] # Trừ cột resample
a$Mohinh = "Logitmg1" # Thêm biến mô hình
a
## Accuracy Kappa Mohinh
## 1 0.8269231 0.46575342 Logitmg1
## 2 0.7692308 0.22580645 Logitmg1
## 3 0.7500000 0.27777778 Logitmg1
## 4 0.8431373 0.47286822 Logitmg1
## 5 0.7058824 0.04494382 Logitmg1
## 6 0.7115385 0.06250000 Logitmg1
## 7 0.8269231 0.40000000 Logitmg1
## 8 0.7450980 0.04329004 Logitmg1
## 9 0.7647059 0.15000000 Logitmg1
## 10 0.6923077 0.17948718 Logitmg1
## 11 0.8461538 0.51515152 Logitmg1
## 12 0.7647059 0.20930233 Logitmg1
## 13 0.7307692 0.03448276 Logitmg1
## 14 0.6538462 -0.10377358 Logitmg1
## 15 0.8627451 0.52208835 Logitmg1
## 16 0.8039216 0.45512821 Logitmg1
## 17 0.7500000 0.18750000 Logitmg1
## 18 0.6923077 0.03030303 Logitmg1
## 19 0.8076923 0.35483871 Logitmg1
## 20 0.7647059 0.26086957 Logitmg1
## 21 0.8039216 0.34108527 Logitmg1
## 22 0.7307692 0.09677419 Logitmg1
## 23 0.8235294 0.49504950 Logitmg1
## 24 0.7450980 0.17228464 Logitmg1
## 25 0.7115385 0.00000000 Logitmg1
## 26 0.7692308 0.22580645 Logitmg1
## 27 0.7735849 0.22815534 Logitmg1
## 28 0.7843137 0.34385965 Logitmg1
## 29 0.7500000 0.23529412 Logitmg1
## 30 0.7254902 0.13768116 Logitmg1
## 31 0.7254902 0.19047619 Logitmg1
## 32 0.7058824 0.04494382 Logitmg1
## 33 0.7358491 0.15489749 Logitmg1
## 34 0.7058824 0.04494382 Logitmg1
## 35 0.8461538 0.48387097 Logitmg1
## 36 0.7115385 0.00000000 Logitmg1
## 37 0.7692308 0.31428571 Logitmg1
## 38 0.7843137 0.24899598 Logitmg1
## 39 0.8431373 0.47286822 Logitmg1
## 40 0.7500000 0.23529412 Logitmg1
## 41 0.7692308 0.31428571 Logitmg1
## 42 0.7884615 0.35294118 Logitmg1
## 43 0.7450980 0.04329004 Logitmg1
## 44 0.8846154 0.65714286 Logitmg1
## 45 0.7692308 0.26415094 Logitmg1
## 46 0.7500000 0.13333333 Logitmg1
## 47 0.7692308 0.22580645 Logitmg1
## 48 0.6666667 -0.08239700 Logitmg1
## 49 0.7647059 0.20930233 Logitmg1
## 50 0.7843137 0.29962547 Logitmg1
## 51 0.7058824 0.04494382 Logitmg1
## 52 0.8076923 0.45945946 Logitmg1
## 53 0.6923077 0.08571429 Logitmg1
## 54 0.7692308 0.15217391 Logitmg1
## 55 0.7884615 0.31250000 Logitmg1
## 56 0.8235294 0.33766234 Logitmg1
## 57 0.8269231 0.40000000 Logitmg1
## 58 0.7647059 0.20930233 Logitmg1
## 59 0.7692308 0.22580645 Logitmg1
## 60 0.7647059 0.26086957 Logitmg1
## 61 0.7843137 0.29962547 Logitmg1
## 62 0.7692308 0.27272727 Logitmg1
## 63 0.7647059 0.20930233 Logitmg1
## 64 0.8269231 0.47058824 Logitmg1
## 65 0.7500000 0.22831050 Logitmg1
## 66 0.8076923 0.35483871 Logitmg1
## 67 0.7692308 0.22580645 Logitmg1
## 68 0.7058824 0.10526316 Logitmg1
## 69 0.7647059 0.15000000 Logitmg1
## 70 0.7500000 0.18750000 Logitmg1
## 71 0.8235294 0.38554217 Logitmg1
## 72 0.8235294 0.38554217 Logitmg1
## 73 0.8269231 0.43750000 Logitmg1
## 74 0.7450980 0.17228464 Logitmg1
## 75 0.6470588 -0.04081633 Logitmg1
## 76 0.6730769 -0.13333333 Logitmg1
## 77 0.7692308 0.22580645 Logitmg1
## 78 0.7169811 0.12154696 Logitmg1
## 79 0.7843137 0.34385965 Logitmg1
## 80 0.7692308 0.31428571 Logitmg1
## 81 0.8076923 0.42857143 Logitmg1
## 82 0.7307692 0.15151515 Logitmg1
## 83 0.7307692 0.24324324 Logitmg1
## 84 0.7115385 0.00000000 Logitmg1
## 85 0.7307692 0.08080808 Logitmg1
## 86 0.8235294 0.38554217 Logitmg1
## 87 0.8039216 0.29166667 Logitmg1
## 88 0.7843137 0.24899598 Logitmg1
## 89 0.7884615 0.35294118 Logitmg1
## 90 0.6862745 0.01449275 Logitmg1
## 91 0.7307692 0.09677419 Logitmg1
## 92 0.8039216 0.23423423 Logitmg1
## 93 0.7647059 0.30612245 Logitmg1
## 94 0.7884615 0.38888889 Logitmg1
## 95 0.7450980 0.22456140 Logitmg1
## 96 0.7254902 0.13768116 Logitmg1
## 97 0.7547170 0.19036428 Logitmg1
## 98 0.8461538 0.48387097 Logitmg1
## 99 0.8039216 0.29166667 Logitmg1
## 100 0.7307692 0.20000000 Logitmg1
b = mg2train$resample
b = b[, -3]
b$Mohinh = "Logitmg2"
b
## Accuracy Kappa Mohinh
## 1 0.7647059 0.20930233 Logitmg2
## 2 0.7692308 0.35135135 Logitmg2
## 3 0.7884615 0.26666667 Logitmg2
## 4 0.8431373 0.47286822 Logitmg2
## 5 0.9038462 0.72222222 Logitmg2
## 6 0.7500000 0.13333333 Logitmg2
## 7 0.8431373 0.47286822 Logitmg2
## 8 0.7547170 0.13550816 Logitmg2
## 9 0.8627451 0.55430712 Logitmg2
## 10 0.8235294 0.42696629 Logitmg2
## 11 0.8076923 0.39393939 Logitmg2
## 12 0.7647059 0.26086957 Logitmg2
## 13 0.8039216 0.29166667 Logitmg2
## 14 0.8039216 0.29166667 Logitmg2
## 15 0.8076923 0.35483871 Logitmg2
## 16 0.8431373 0.43333333 Logitmg2
## 17 0.8653846 0.61111111 Logitmg2
## 18 0.7500000 0.27467811 Logitmg2
## 19 0.8461538 0.54285714 Logitmg2
## 20 0.7307692 0.15151515 Logitmg2
## 21 0.7692308 0.17241379 Logitmg2
## 22 0.8076923 0.42477876 Logitmg2
## 23 0.7500000 0.13333333 Logitmg2
## 24 0.7884615 0.38888889 Logitmg2
## 25 0.8269231 0.50000000 Logitmg2
## 26 0.8039216 0.38405797 Logitmg2
## 27 0.8431373 0.50724638 Logitmg2
## 28 0.8431373 0.47286822 Logitmg2
## 29 0.7692308 0.11111111 Logitmg2
## 30 0.8235294 0.38554217 Logitmg2
## 31 0.8627451 0.55430712 Logitmg2
## 32 0.7647059 0.15000000 Logitmg2
## 33 0.7843137 0.38283828 Logitmg2
## 34 0.7254902 0.07751938 Logitmg2
## 35 0.8461538 0.51515152 Logitmg2
## 36 0.7884615 0.35294118 Logitmg2
## 37 0.7115385 -0.07142857 Logitmg2
## 38 0.9019608 0.70175439 Logitmg2
## 39 0.7884615 0.21428571 Logitmg2
## 40 0.8490566 0.56997972 Logitmg2
## 41 0.8431373 0.47286822 Logitmg2
## 42 0.8431373 0.50724638 Logitmg2
## 43 0.8431373 0.47286822 Logitmg2
## 44 0.8431373 0.47286822 Logitmg2
## 45 0.7450980 0.22456140 Logitmg2
## 46 0.8076923 0.35483871 Logitmg2
## 47 0.7735849 0.27562642 Logitmg2
## 48 0.7692308 0.27272727 Logitmg2
## 49 0.7692308 0.31428571 Logitmg2
## 50 0.8269231 0.40000000 Logitmg2
## 51 0.7692308 0.31428571 Logitmg2
## 52 0.7647059 0.30612245 Logitmg2
## 53 0.7500000 0.23529412 Logitmg2
## 54 0.8461538 0.48387097 Logitmg2
## 55 0.7843137 0.34385965 Logitmg2
## 56 0.8076923 0.39393939 Logitmg2
## 57 0.7450980 0.17228464 Logitmg2
## 58 0.8269231 0.47058824 Logitmg2
## 59 0.8627451 0.52208835 Logitmg2
## 60 0.8076923 0.29347826 Logitmg2
## 61 0.8076923 0.42477876 Logitmg2
## 62 0.8076923 0.39393939 Logitmg2
## 63 0.7692308 0.17241379 Logitmg2
## 64 0.8431373 0.50724638 Logitmg2
## 65 0.7647059 0.15000000 Logitmg2
## 66 0.9019608 0.70175439 Logitmg2
## 67 0.8076923 0.39393939 Logitmg2
## 68 0.8039216 0.23423423 Logitmg2
## 69 0.7884615 0.38888889 Logitmg2
## 70 0.7692308 0.17241379 Logitmg2
## 71 0.7884615 0.26666667 Logitmg2
## 72 0.8461538 0.54285714 Logitmg2
## 73 0.7843137 0.29962547 Logitmg2
## 74 0.7692308 0.26415094 Logitmg2
## 75 0.9038462 0.70588235 Logitmg2
## 76 0.7647059 0.15000000 Logitmg2
## 77 0.8039216 0.34108527 Logitmg2
## 78 0.8076923 0.39393939 Logitmg2
## 79 0.7500000 0.23529412 Logitmg2
## 80 0.8039216 0.38405797 Logitmg2
## 81 0.7692308 0.27272727 Logitmg2
## 82 0.7647059 0.30612245 Logitmg2
## 83 0.7058824 -0.02409639 Logitmg2
## 84 0.7843137 0.29962547 Logitmg2
## 85 0.7884615 0.31250000 Logitmg2
## 86 0.9230769 0.77142857 Logitmg2
## 87 0.7884615 0.19209040 Logitmg2
## 88 0.8653846 0.58823529 Logitmg2
## 89 0.7884615 0.31250000 Logitmg2
## 90 0.8039216 0.34108527 Logitmg2
## 91 0.7450980 0.17228464 Logitmg2
## 92 0.8235294 0.38554217 Logitmg2
## 93 0.8269231 0.47058824 Logitmg2
## 94 0.7450980 0.11244980 Logitmg2
## 95 0.8113208 0.35679612 Logitmg2
## 96 0.7500000 0.13333333 Logitmg2
## 97 0.8076923 0.39393939 Logitmg2
## 98 0.7307692 0.24324324 Logitmg2
## 99 0.8627451 0.58245614 Logitmg2
## 100 0.7843137 0.29962547 Logitmg2
resamplemod = rbind(a, b) # Hợp nhất 2 data frame a và b
resamplemod
## Accuracy Kappa Mohinh
## 1 0.8269231 0.46575342 Logitmg1
## 2 0.7692308 0.22580645 Logitmg1
## 3 0.7500000 0.27777778 Logitmg1
## 4 0.8431373 0.47286822 Logitmg1
## 5 0.7058824 0.04494382 Logitmg1
## 6 0.7115385 0.06250000 Logitmg1
## 7 0.8269231 0.40000000 Logitmg1
## 8 0.7450980 0.04329004 Logitmg1
## 9 0.7647059 0.15000000 Logitmg1
## 10 0.6923077 0.17948718 Logitmg1
## 11 0.8461538 0.51515152 Logitmg1
## 12 0.7647059 0.20930233 Logitmg1
## 13 0.7307692 0.03448276 Logitmg1
## 14 0.6538462 -0.10377358 Logitmg1
## 15 0.8627451 0.52208835 Logitmg1
## 16 0.8039216 0.45512821 Logitmg1
## 17 0.7500000 0.18750000 Logitmg1
## 18 0.6923077 0.03030303 Logitmg1
## 19 0.8076923 0.35483871 Logitmg1
## 20 0.7647059 0.26086957 Logitmg1
## 21 0.8039216 0.34108527 Logitmg1
## 22 0.7307692 0.09677419 Logitmg1
## 23 0.8235294 0.49504950 Logitmg1
## 24 0.7450980 0.17228464 Logitmg1
## 25 0.7115385 0.00000000 Logitmg1
## 26 0.7692308 0.22580645 Logitmg1
## 27 0.7735849 0.22815534 Logitmg1
## 28 0.7843137 0.34385965 Logitmg1
## 29 0.7500000 0.23529412 Logitmg1
## 30 0.7254902 0.13768116 Logitmg1
## 31 0.7254902 0.19047619 Logitmg1
## 32 0.7058824 0.04494382 Logitmg1
## 33 0.7358491 0.15489749 Logitmg1
## 34 0.7058824 0.04494382 Logitmg1
## 35 0.8461538 0.48387097 Logitmg1
## 36 0.7115385 0.00000000 Logitmg1
## 37 0.7692308 0.31428571 Logitmg1
## 38 0.7843137 0.24899598 Logitmg1
## 39 0.8431373 0.47286822 Logitmg1
## 40 0.7500000 0.23529412 Logitmg1
## 41 0.7692308 0.31428571 Logitmg1
## 42 0.7884615 0.35294118 Logitmg1
## 43 0.7450980 0.04329004 Logitmg1
## 44 0.8846154 0.65714286 Logitmg1
## 45 0.7692308 0.26415094 Logitmg1
## 46 0.7500000 0.13333333 Logitmg1
## 47 0.7692308 0.22580645 Logitmg1
## 48 0.6666667 -0.08239700 Logitmg1
## 49 0.7647059 0.20930233 Logitmg1
## 50 0.7843137 0.29962547 Logitmg1
## 51 0.7058824 0.04494382 Logitmg1
## 52 0.8076923 0.45945946 Logitmg1
## 53 0.6923077 0.08571429 Logitmg1
## 54 0.7692308 0.15217391 Logitmg1
## 55 0.7884615 0.31250000 Logitmg1
## 56 0.8235294 0.33766234 Logitmg1
## 57 0.8269231 0.40000000 Logitmg1
## 58 0.7647059 0.20930233 Logitmg1
## 59 0.7692308 0.22580645 Logitmg1
## 60 0.7647059 0.26086957 Logitmg1
## 61 0.7843137 0.29962547 Logitmg1
## 62 0.7692308 0.27272727 Logitmg1
## 63 0.7647059 0.20930233 Logitmg1
## 64 0.8269231 0.47058824 Logitmg1
## 65 0.7500000 0.22831050 Logitmg1
## 66 0.8076923 0.35483871 Logitmg1
## 67 0.7692308 0.22580645 Logitmg1
## 68 0.7058824 0.10526316 Logitmg1
## 69 0.7647059 0.15000000 Logitmg1
## 70 0.7500000 0.18750000 Logitmg1
## 71 0.8235294 0.38554217 Logitmg1
## 72 0.8235294 0.38554217 Logitmg1
## 73 0.8269231 0.43750000 Logitmg1
## 74 0.7450980 0.17228464 Logitmg1
## 75 0.6470588 -0.04081633 Logitmg1
## 76 0.6730769 -0.13333333 Logitmg1
## 77 0.7692308 0.22580645 Logitmg1
## 78 0.7169811 0.12154696 Logitmg1
## 79 0.7843137 0.34385965 Logitmg1
## 80 0.7692308 0.31428571 Logitmg1
## 81 0.8076923 0.42857143 Logitmg1
## 82 0.7307692 0.15151515 Logitmg1
## 83 0.7307692 0.24324324 Logitmg1
## 84 0.7115385 0.00000000 Logitmg1
## 85 0.7307692 0.08080808 Logitmg1
## 86 0.8235294 0.38554217 Logitmg1
## 87 0.8039216 0.29166667 Logitmg1
## 88 0.7843137 0.24899598 Logitmg1
## 89 0.7884615 0.35294118 Logitmg1
## 90 0.6862745 0.01449275 Logitmg1
## 91 0.7307692 0.09677419 Logitmg1
## 92 0.8039216 0.23423423 Logitmg1
## 93 0.7647059 0.30612245 Logitmg1
## 94 0.7884615 0.38888889 Logitmg1
## 95 0.7450980 0.22456140 Logitmg1
## 96 0.7254902 0.13768116 Logitmg1
## 97 0.7547170 0.19036428 Logitmg1
## 98 0.8461538 0.48387097 Logitmg1
## 99 0.8039216 0.29166667 Logitmg1
## 100 0.7307692 0.20000000 Logitmg1
## 101 0.7647059 0.20930233 Logitmg2
## 102 0.7692308 0.35135135 Logitmg2
## 103 0.7884615 0.26666667 Logitmg2
## 104 0.8431373 0.47286822 Logitmg2
## 105 0.9038462 0.72222222 Logitmg2
## 106 0.7500000 0.13333333 Logitmg2
## 107 0.8431373 0.47286822 Logitmg2
## 108 0.7547170 0.13550816 Logitmg2
## 109 0.8627451 0.55430712 Logitmg2
## 110 0.8235294 0.42696629 Logitmg2
## 111 0.8076923 0.39393939 Logitmg2
## 112 0.7647059 0.26086957 Logitmg2
## 113 0.8039216 0.29166667 Logitmg2
## 114 0.8039216 0.29166667 Logitmg2
## 115 0.8076923 0.35483871 Logitmg2
## 116 0.8431373 0.43333333 Logitmg2
## 117 0.8653846 0.61111111 Logitmg2
## 118 0.7500000 0.27467811 Logitmg2
## 119 0.8461538 0.54285714 Logitmg2
## 120 0.7307692 0.15151515 Logitmg2
## 121 0.7692308 0.17241379 Logitmg2
## 122 0.8076923 0.42477876 Logitmg2
## 123 0.7500000 0.13333333 Logitmg2
## 124 0.7884615 0.38888889 Logitmg2
## 125 0.8269231 0.50000000 Logitmg2
## 126 0.8039216 0.38405797 Logitmg2
## 127 0.8431373 0.50724638 Logitmg2
## 128 0.8431373 0.47286822 Logitmg2
## 129 0.7692308 0.11111111 Logitmg2
## 130 0.8235294 0.38554217 Logitmg2
## 131 0.8627451 0.55430712 Logitmg2
## 132 0.7647059 0.15000000 Logitmg2
## 133 0.7843137 0.38283828 Logitmg2
## 134 0.7254902 0.07751938 Logitmg2
## 135 0.8461538 0.51515152 Logitmg2
## 136 0.7884615 0.35294118 Logitmg2
## 137 0.7115385 -0.07142857 Logitmg2
## 138 0.9019608 0.70175439 Logitmg2
## 139 0.7884615 0.21428571 Logitmg2
## 140 0.8490566 0.56997972 Logitmg2
## 141 0.8431373 0.47286822 Logitmg2
## 142 0.8431373 0.50724638 Logitmg2
## 143 0.8431373 0.47286822 Logitmg2
## 144 0.8431373 0.47286822 Logitmg2
## 145 0.7450980 0.22456140 Logitmg2
## 146 0.8076923 0.35483871 Logitmg2
## 147 0.7735849 0.27562642 Logitmg2
## 148 0.7692308 0.27272727 Logitmg2
## 149 0.7692308 0.31428571 Logitmg2
## 150 0.8269231 0.40000000 Logitmg2
## 151 0.7692308 0.31428571 Logitmg2
## 152 0.7647059 0.30612245 Logitmg2
## 153 0.7500000 0.23529412 Logitmg2
## 154 0.8461538 0.48387097 Logitmg2
## 155 0.7843137 0.34385965 Logitmg2
## 156 0.8076923 0.39393939 Logitmg2
## 157 0.7450980 0.17228464 Logitmg2
## 158 0.8269231 0.47058824 Logitmg2
## 159 0.8627451 0.52208835 Logitmg2
## 160 0.8076923 0.29347826 Logitmg2
## 161 0.8076923 0.42477876 Logitmg2
## 162 0.8076923 0.39393939 Logitmg2
## 163 0.7692308 0.17241379 Logitmg2
## 164 0.8431373 0.50724638 Logitmg2
## 165 0.7647059 0.15000000 Logitmg2
## 166 0.9019608 0.70175439 Logitmg2
## 167 0.8076923 0.39393939 Logitmg2
## 168 0.8039216 0.23423423 Logitmg2
## 169 0.7884615 0.38888889 Logitmg2
## 170 0.7692308 0.17241379 Logitmg2
## 171 0.7884615 0.26666667 Logitmg2
## 172 0.8461538 0.54285714 Logitmg2
## 173 0.7843137 0.29962547 Logitmg2
## 174 0.7692308 0.26415094 Logitmg2
## 175 0.9038462 0.70588235 Logitmg2
## 176 0.7647059 0.15000000 Logitmg2
## 177 0.8039216 0.34108527 Logitmg2
## 178 0.8076923 0.39393939 Logitmg2
## 179 0.7500000 0.23529412 Logitmg2
## 180 0.8039216 0.38405797 Logitmg2
## 181 0.7692308 0.27272727 Logitmg2
## 182 0.7647059 0.30612245 Logitmg2
## 183 0.7058824 -0.02409639 Logitmg2
## 184 0.7843137 0.29962547 Logitmg2
## 185 0.7884615 0.31250000 Logitmg2
## 186 0.9230769 0.77142857 Logitmg2
## 187 0.7884615 0.19209040 Logitmg2
## 188 0.8653846 0.58823529 Logitmg2
## 189 0.7884615 0.31250000 Logitmg2
## 190 0.8039216 0.34108527 Logitmg2
## 191 0.7450980 0.17228464 Logitmg2
## 192 0.8235294 0.38554217 Logitmg2
## 193 0.8269231 0.47058824 Logitmg2
## 194 0.7450980 0.11244980 Logitmg2
## 195 0.8113208 0.35679612 Logitmg2
## 196 0.7500000 0.13333333 Logitmg2
## 197 0.8076923 0.39393939 Logitmg2
## 198 0.7307692 0.24324324 Logitmg2
## 199 0.8627451 0.58245614 Logitmg2
## 200 0.7843137 0.29962547 Logitmg2
## Phân tích hình ảnh
# install.packages("ggplot2")
library(ggplot2)
library(ggthemes)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
h1 <- ggplot(resamplemod) + geom_boxplot(aes(x = Mohinh, y = Accuracy, fill = Mohinh)) + coord_flip() + geom_hline(yintercept = 0.7578, color = "blue") + theme_wsj() # Giá trị 0.7578 = 75.78% là tỷ lệ nhóm chiếm ưu thế (không chuyển đổi xe buýt)
h1 # Nhận xét: Cả 2 mô hình đều có khả năng phân loại tốt - độ chính xác accuracy của cả 2 mô hình đều lớn hơn 75.78% (đường màu xanh)
h2 <- ggplot(resamplemod, aes(Accuracy, fill = Mohinh)) + geom_density(alpha = 0.3) + theme_wsj() + facet_wrap(~ Mohinh)
h2
grid.arrange(h1, h2, ncol = 2, nrow = 1)
## Kiểm định t-test để đánh giá sự khác biệt về Accuracy và Kappa của 2 mô hình: p<0.05 có ý nghĩa thống kê về sự khác biệt Accuracy và Kappa của 2 mô hình
t.test(resamplemod$Accuracy ~ resamplemod$Mohinh)
##
## Welch Two Sample t-test
##
## data: resamplemod$Accuracy by resamplemod$Mohinh
## t = -5.6188, df = 197.15, p-value = 6.492e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04976722 -0.02390861
## sample estimates:
## mean in group Logitmg1 mean in group Logitmg2
## 0.7647548 0.8015927
summary(a)
## Accuracy Kappa Mohinh
## Min. :0.6471 Min. :-0.1333 Length:100
## 1st Qu.:0.7308 1st Qu.: 0.1377 Class :character
## Median :0.7647 Median : 0.2270 Mode :character
## Mean :0.7648 Mean : 0.2363
## 3rd Qu.:0.8039 3rd Qu.: 0.3439
## Max. :0.8846 Max. : 0.6571
summary(b)
## Accuracy Kappa Mohinh
## Min. :0.7059 Min. :-0.07143 Length:100
## 1st Qu.:0.7692 1st Qu.: 0.24126 Class :character
## Median :0.8039 Median : 0.35215 Mode :character
## Mean :0.8016 Mean : 0.35229
## 3rd Qu.:0.8431 3rd Qu.: 0.47287
## Max. :0.9231 Max. : 0.77143
t.test(resamplemod$Kappa ~ resamplemod$Mohinh)
##
## Welch Two Sample t-test
##
## data: resamplemod$Kappa by resamplemod$Mohinh
## t = -5.1443, df = 197.59, p-value = 6.449e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.16046674 -0.07153101
## sample estimates:
## mean in group Logitmg1 mean in group Logitmg2
## 0.2362892 0.3522881
# 4.9. Calculate AUC (Area under Curve) - Diện tích dưới đường cong ROC (Receiver Operating Characteristics), đánh giá mức độ lồi về phía trên của đường ROC (Tiêu chí thường được sử dụng khi so sánh các mô hình phân loại với nhau): sử dụng hàm roc trong gói "pROC": AUC càng gần 1 càng tốt, >0.7 xem như chấp nhận được
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
##
## ci
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
## Mô hình mg1
mgtrain1 <- train(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, method = "glm", family = "binomial", data = MotorS)
summary(mgtrain1)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.80649 -0.69419 -0.43485 -0.00113 2.63043
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -16.38213 552.17363 -0.030 0.97633
## NM1 14.26799 552.17330 0.026 0.97939
## `NM>=2` 14.31772 552.17321 0.026 0.97931
## SPYes -0.48827 0.51806 -0.942 0.34594
## `SPDon't know` -0.54986 0.68980 -0.797 0.42538
## `WIAt Bus Stops` 0.72698 0.36374 1.999 0.04565 *
## `WIInternet/Facebook` 1.73429 0.40686 4.263 2.02e-05 ***
## `WIFrom Friends` 1.35463 0.45332 2.988 0.00281 **
## WIOthers 0.95725 0.50130 1.910 0.05619 .
## BCYes -0.62908 0.24950 -2.521 0.01169 *
## BSTNormal -0.68942 0.35610 -1.936 0.05286 .
## BSTAgree -0.64951 0.34536 -1.881 0.06002 .
## BSCNormal -0.29375 0.48745 -0.603 0.54676
## BSCAgree -0.31145 0.43655 -0.713 0.47558
## BSANormal 0.20265 0.53556 0.378 0.70515
## BSAAgree 0.96808 0.52099 1.858 0.06315 .
## BHENormal -0.21547 0.30297 -0.711 0.47698
## BHEAgree 0.15164 0.29906 0.507 0.61212
## BUBYes 0.49882 0.27314 1.826 0.06781 .
## SBMNormal 0.40746 0.33913 1.201 0.22956
## SBMAgree 1.48605 0.36085 4.118 3.82e-05 ***
## SBSNormal -0.20278 0.30871 -0.657 0.51128
## SBSAgree 1.01296 0.43072 2.352 0.01868 *
## TI 0.00233 0.01126 0.207 0.83612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 458.17 on 492 degrees of freedom
## AIC: 506.17
##
## Number of Fisher Scoring iterations: 14
pred1mg <- predict(mgtrain1, newdata = MotorS, type = "prob")
rmg <- roc(MotorS$MSI, pred1mg$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg$auc
## Area under the curve: 0.7969
plot(rmg, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)
plot(smooth(rmg), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)
## Có thể dùng các lệnh đơn giản
auc(rmg)
## Area under the curve: 0.7969
plot.roc(rmg)
ci(rmg)
## 95% CI: 0.754-0.8397 (DeLong)
## Mô hình mg2
mg2train1 <- train(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, data = MotorS, method = "glm", family = "binomial")
summary(mg2train1)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1109 -0.6696 -0.4643 -0.1752 2.6984
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.75079 0.40307 -1.863 0.062508 .
## `AG25-60` -0.07254 0.25689 -0.282 0.777664
## `AG>60` 2.30511 0.83491 2.761 0.005764 **
## TSYes -0.46179 0.24422 -1.891 0.058643 .
## `WIAt Bus Stops` 0.75331 0.31163 2.417 0.015637 *
## `WIInternet/Facebook` 1.67211 0.35823 4.668 3.04e-06 ***
## `WIFrom Friends` 1.54306 0.41987 3.675 0.000238 ***
## WIOthers 1.11330 0.49058 2.269 0.023246 *
## LSYes -0.60908 0.28641 -2.127 0.033451 *
## BCYes -0.70517 0.24843 -2.838 0.004533 **
## CAYes -0.83271 0.25749 -3.234 0.001221 **
## SBMNormal 0.04398 0.27512 0.160 0.872991
## SBMAgree 2.02537 0.32084 6.313 2.74e-10 ***
## `TPPicking Children` -0.58872 0.41530 -1.418 0.156319
## `TPEntertainment/Food` -0.79116 0.35106 -2.254 0.024221 *
## TPOthers -0.71397 0.99890 -0.715 0.474759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 457.08 on 500 degrees of freedom
## AIC: 489.08
##
## Number of Fisher Scoring iterations: 5
pred1mg2 <- predict(mg2train1, newdata = MotorS, type = "prob")
rmg2 <- roc(MotorS$MSI, pred1mg2$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg2$auc
## Area under the curve: 0.7923
plot(rmg2, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)
plot(smooth(rmg2), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)
## Tính hệ số Gini = 2AUC-1
Ginimg <- 2*(rmg$auc)-1
Ginimg
## [1] 0.5937187
Ginimg2 <- 2*(rmg2$auc)-1
Ginimg2
## [1] 0.5845524
5. Importance of variables in models
library(caret)
varImp(mg2)
## Overall
## AG25-60 0.2823645
## AG>60 2.7609178
## TSYes 1.8908608
## WIAt Bus Stops 2.4172876
## WIInternet/Facebook 4.6677670
## WIFrom Friends 3.6750679
## WIOthers 2.2693698
## LSYes 2.1266243
## BCYes 2.8384880
## CAYes 3.2339820
## SBMNormal 0.1598598
## SBMAgree 6.3127968
## TPPicking Children 1.4175620
## TPEntertainment/Food 2.2536040
## TPOthers 0.7147577
# plot(varImp(mg2))
6. Probility of bus use - Prohibit model
# Prohibit Models
library(aod)
##
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
##
## rats
## Model mg1
prohibitmg1 <- glm(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, family = binomial(link = "probit"), data = MotorS)
summary(prohibitmg1)
##
## Call:
## glm(formula = MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE +
## BUB + SBM + SBS + TI, family = binomial(link = "probit"),
## data = MotorS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.78116 -0.69947 -0.42198 -0.00098 2.70426
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.234267 140.267761 -0.044 0.96455
## NM1 4.958399 140.267361 0.035 0.97180
## NM>=2 4.946783 140.267221 0.035 0.97187
## SPYes -0.221128 0.289658 -0.763 0.44522
## SPDon't know -0.281875 0.368785 -0.764 0.44467
## WIAt Bus Stops 0.370945 0.203212 1.825 0.06794 .
## WIInternet/Facebook 0.975965 0.231298 4.220 2.45e-05 ***
## WIFrom Friends 0.779809 0.261582 2.981 0.00287 **
## WIOthers 0.526548 0.286183 1.840 0.06578 .
## BCYes -0.349442 0.142179 -2.458 0.01398 *
## BSTNormal -0.409165 0.201514 -2.030 0.04231 *
## BSTAgree -0.381980 0.197425 -1.935 0.05301 .
## BSCNormal -0.159231 0.278115 -0.573 0.56696
## BSCAgree -0.167714 0.251752 -0.666 0.50529
## BSANormal 0.140126 0.305581 0.459 0.64655
## BSAAgree 0.562618 0.299267 1.880 0.06011 .
## BHENormal -0.118151 0.172999 -0.683 0.49463
## BHEAgree 0.088132 0.173235 0.509 0.61093
## BUBYes 0.295002 0.154523 1.909 0.05625 .
## SBMNormal 0.251063 0.195509 1.284 0.19909
## SBMAgree 0.891703 0.213962 4.168 3.08e-05 ***
## SBSNormal -0.115522 0.178942 -0.646 0.51855
## SBSAgree 0.613392 0.257007 2.387 0.01700 *
## TI 0.001392 0.006498 0.214 0.83034
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 457.07 on 492 degrees of freedom
## AIC: 505.07
##
## Number of Fisher Scoring iterations: 14
## Model mg2
prohibitmg2 <- glm(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial(link = "probit"), data = MotorS)
summary(prohibitmg2)
##
## Call:
## glm(formula = MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial(link = "probit"),
## data = MotorS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1034 -0.6752 -0.4654 -0.1388 2.7459
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.44907 0.23012 -1.951 0.051000 .
## AG25-60 -0.04813 0.14639 -0.329 0.742307
## AG>60 1.27195 0.49303 2.580 0.009884 **
## TSYes -0.25612 0.13812 -1.854 0.063686 .
## WIAt Bus Stops 0.40190 0.17255 2.329 0.019850 *
## WIInternet/Facebook 0.92887 0.20409 4.551 5.33e-06 ***
## WIFrom Friends 0.87198 0.24272 3.593 0.000328 ***
## WIOthers 0.60169 0.27713 2.171 0.029922 *
## LSYes -0.38435 0.16348 -2.351 0.018722 *
## BCYes -0.38520 0.13998 -2.752 0.005928 **
## CAYes -0.43460 0.14417 -3.014 0.002574 **
## SBMNormal 0.04853 0.15448 0.314 0.753382
## SBMAgree 1.18416 0.18382 6.442 1.18e-10 ***
## TPPicking Children -0.31201 0.22912 -1.362 0.173260
## TPEntertainment/Food -0.42127 0.19550 -2.155 0.031172 *
## TPOthers -0.43526 0.56172 -0.775 0.438409
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 571.37 on 515 degrees of freedom
## Residual deviance: 458.21 on 500 degrees of freedom
## AIC: 490.21
##
## Number of Fisher Scoring iterations: 5
# Đánh giá tác động tổng thể Purpose (Total effect of Temporary Stops) trong mô hinhg mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với Purpose là từ 4)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 4) # Giá trị 3.4 ứng với p-value > 0.05 --> KL: tác động tổng thể về TS (Số điểm dừng tạm thời) của người sử dụng lên khả năng chuyển đổi xe buýt là không có ý nghĩa thống kê
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 3.4, df = 1, P(> X2) = 0.064
# Đánh giá tác động tổng thể WI (Total effect of WI) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với WI là từ 5:8)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 5:8)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 26.3, df = 4, P(> X2) = 2.7e-05
# Đánh giá tác động tổng thể SBM (Total effect of SBM) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với SBM là từ 12:13)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 12:13)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 45.2, df = 2, P(> X2) = 1.5e-10
# Đánh giá tác động tổng thể TP (Total effect of TP) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với SP là từ 14:16)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 14:16)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 6.1, df = 3, P(> X2) = 0.11
# Đánh giá tác động tổng thể AG (Total effect of AG) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với OC là từ 2:3)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 2:3)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 7.3, df = 2, P(> X2) = 0.026
7. Perception of bus in two groups people: Shift to bus and non ship to bus 7.1. Tạo dữ liệu biểu đồ likert tương ứng với 2 nhóm : Chuyển đổi BP_MS và không chuyển đổi BP_nMS
# Create data for Likert graph by BS (remove ID and TM =7-Bus)
head(MS)
## ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1 1 0 1 6 3 3 1 2 10 1 1 0 4 1 0 1 15 1 3 3 3 4 4
## 2 2 0 1 3 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5
## 3 3 0 1 3 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4
## 4 4 0 1 3 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4
## 5 5 0 1 3 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4
## 6 6 0 1 4 1 4 1 20 30 1 1 0 5 3 0 2 50 0 3 2 4 4 4
## BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1 4 4 3 4 4 4 4 1 1 1 0 0 5 4 2 2 0 0 0 0 0 1 2
## 2 2 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3
## 3 3 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3
## 4 4 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3
## 5 2 2 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2
## 6 3 3 3 2 4 4 4 2 0 0 0 1 4 3 4 2 1 1 1 1 1 1 2
## NR
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
str(MS)
## 'data.frame': 810 obs. of 47 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CA : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TM : int 6 3 3 3 3 4 4 3 3 3 ...
## $ TP : int 3 1 1 1 1 1 1 1 1 1 ...
## $ FR : int 3 4 4 4 4 4 4 4 4 4 ...
## $ DT : int 1 1 3 1 3 1 1 1 2 1 ...
## $ DI : num 2 8 5 5 8 20 15 10 12 10 ...
## $ TI : num 10 15 15 10 15 30 20 25 30 25 ...
## $ SC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TS : int 0 1 1 1 0 0 0 0 1 1 ...
## $ MR : int 4 2 4 2 2 5 5 2 2 2 ...
## $ WE : int 1 3 1 1 3 3 3 3 1 3 ...
## $ WK : int 0 0 0 0 0 0 0 1 0 0 ...
## $ NBR: int 1 1 2 4 2 2 4 2 1 4 ...
## $ CO : num 15 5 3 10 5 50 40 5 7 7 ...
## $ BUB: int 1 0 1 1 1 0 0 1 1 1 ...
## $ SBM: int 3 2 2 2 2 3 2 3 3 3 ...
## $ SBS: int 3 2 2 2 2 2 2 2 3 2 ...
## $ BST: int 3 2 2 3 3 4 4 5 4 2 ...
## $ BSC: int 4 4 4 4 4 4 4 5 4 5 ...
## $ BSA: int 4 5 4 4 4 4 4 5 4 5 ...
## $ BRE: int 4 2 3 4 2 3 2 3 3 3 ...
## $ BCO: int 4 2 3 4 2 3 2 3 3 2 ...
## $ BIN: int 3 4 4 4 3 3 3 3 3 4 ...
## $ BHE: int 4 2 2 3 2 2 2 3 3 2 ...
## $ BEN: int 4 5 4 3 4 4 5 5 4 5 ...
## $ BRC: int 4 5 4 3 4 4 5 5 4 5 ...
## $ BWE: int 4 3 4 5 4 4 5 5 4 4 ...
## $ SP : int 1 2 1 1 2 2 2 1 1 1 ...
## $ BI : int 1 0 1 1 0 0 0 1 1 1 ...
## $ WI : int 1 0 2 1 0 0 0 2 3 2 ...
## $ MSI: int 0 0 0 0 0 0 0 1 0 0 ...
## $ GD : int 0 0 1 1 0 1 1 0 0 1 ...
## $ AG : int 5 3 3 3 4 4 5 4 3 4 ...
## $ OC : int 4 6 2 2 6 3 3 7 7 3 ...
## $ IN : int 2 3 4 3 3 4 4 2 3 3 ...
## $ NC : int 2 1 0 0 1 2 2 0 1 0 ...
## $ MC : int 0 1 1 1 1 1 1 1 1 1 ...
## $ CC : int 0 0 0 0 0 1 1 0 0 0 ...
## $ BO : int 0 1 0 1 0 1 0 0 1 0 ...
## $ MO : int 0 1 1 1 1 1 1 1 1 1 ...
## $ RO : int 0 0 0 0 0 1 1 0 0 0 ...
## $ NB : int 1 1 0 1 0 1 0 0 1 0 ...
## $ NM : int 2 3 3 3 2 2 2 2 3 3 ...
## $ NR : int 1 0 0 0 0 1 1 0 0 0 ...
dim(MS)
## [1] 810 47
BS_likert = subset(MS,TM == 3)
head(BS_likert)
## ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 2 2 0 1 3 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5
## 3 3 0 1 3 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4
## 4 4 0 1 3 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4
## 5 5 0 1 3 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4
## 8 8 0 1 3 1 4 1 10 25 1 1 0 2 3 1 2 5 1 3 2 5 5 5
## 9 9 0 1 3 1 4 2 12 30 1 1 1 2 1 0 1 7 1 3 3 4 4 4
## BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 2 2 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3
## 3 3 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3
## 4 4 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3
## 5 2 2 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2
## 8 3 3 3 3 5 5 5 1 1 2 1 0 4 7 2 0 1 0 0 1 0 0 2
## 9 3 3 3 3 4 4 4 1 1 3 0 0 3 7 3 1 1 0 1 1 0 1 3
## NR
## 2 0
## 3 0
## 4 0
## 5 0
## 8 0
## 9 0
str(BS_likert)
## 'data.frame': 516 obs. of 47 variables:
## $ ID : int 2 3 4 5 8 9 10 13 15 16 ...
## $ CA : int 0 0 0 0 0 0 0 0 1 1 ...
## $ BC : int 1 1 1 1 1 1 1 1 0 0 ...
## $ TM : int 3 3 3 3 3 3 3 3 3 3 ...
## $ TP : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FR : int 4 4 4 4 4 4 4 4 4 4 ...
## $ DT : int 1 3 1 3 1 2 1 1 1 1 ...
## $ DI : num 8 5 5 8 10 12 10 3 2 4 ...
## $ TI : num 15 15 10 15 25 30 25 5 5 10 ...
## $ SC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TS : int 1 1 1 0 0 1 1 0 1 0 ...
## $ MR : int 2 4 2 2 2 2 2 2 4 3 ...
## $ WE : int 3 1 1 3 3 1 3 1 3 1 ...
## $ WK : int 0 0 0 0 1 0 0 0 0 0 ...
## $ NBR: int 1 2 4 2 2 1 4 4 4 2 ...
## $ CO : num 5 3 10 5 5 7 7 3 3 10 ...
## $ BUB: int 0 1 1 1 1 1 1 1 1 1 ...
## $ SBM: int 2 2 2 2 3 3 3 1 4 4 ...
## $ SBS: int 2 2 2 2 2 3 2 1 3 2 ...
## $ BST: int 2 2 3 3 5 4 2 3 2 3 ...
## $ BSC: int 4 4 4 4 5 4 5 3 4 3 ...
## $ BSA: int 5 4 4 4 5 4 5 4 4 4 ...
## $ BRE: int 2 3 4 2 3 3 3 1 4 4 ...
## $ BCO: int 2 3 4 2 3 3 2 4 4 2 ...
## $ BIN: int 4 4 4 3 3 3 4 3 4 4 ...
## $ BHE: int 2 2 3 2 3 3 2 4 3 2 ...
## $ BEN: int 5 4 3 4 5 4 5 4 4 4 ...
## $ BRC: int 5 4 3 4 5 4 5 4 5 4 ...
## $ BWE: int 3 4 5 4 5 4 4 4 5 4 ...
## $ SP : int 2 1 1 2 1 1 1 1 1 1 ...
## $ BI : int 0 1 1 0 1 1 1 1 1 1 ...
## $ WI : int 0 2 1 0 2 3 2 1 1 1 ...
## $ MSI: int 0 0 0 0 1 0 0 0 1 1 ...
## $ GD : int 0 1 1 0 0 0 1 1 1 1 ...
## $ AG : int 3 3 3 4 4 3 4 2 4 3 ...
## $ OC : int 6 2 2 6 7 7 3 1 7 7 ...
## $ IN : int 3 4 3 3 2 3 3 2 2 1 ...
## $ NC : int 1 0 0 1 0 1 0 0 1 0 ...
## $ MC : int 1 1 1 1 1 1 1 0 1 1 ...
## $ CC : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BO : int 1 0 1 0 0 1 0 1 0 0 ...
## $ MO : int 1 1 1 1 1 1 1 0 1 1 ...
## $ RO : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NB : int 1 0 1 0 0 1 0 1 2 0 ...
## $ NM : int 3 3 3 2 2 3 3 3 2 1 ...
## $ NR : int 0 0 0 0 0 0 0 0 0 0 ...
dim(BS_likert)
## [1] 516 47
## Create dat_BST
dat_BST <- BS_likert[ , c(18,19,20,21,34)]
head(dat_BST)
## BUB SBM SBS BST MSI
## 2 0 2 2 2 0
## 3 1 2 2 2 0
## 4 1 2 2 3 0
## 5 1 2 2 3 0
## 8 1 3 2 5 1
## 9 1 3 3 4 0
dim(dat_BST)
## [1] 516 5
size <- 516
dat_BST$BPE <- rep("BST", size = size) # Tạo biến mới BPE
names(dat_BST)[names(dat_BST) == "BST"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BST)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 2 0 BST
## 3 1 2 2 2 0 BST
## 4 1 2 2 3 0 BST
## 5 1 2 2 3 0 BST
## 8 1 3 2 5 1 BST
## 9 1 3 3 4 0 BST
## Create dat_BSC
dat_BSC <- BS_likert[ , c(18,19,20,22,34)]
head(dat_BSC)
## BUB SBM SBS BSC MSI
## 2 0 2 2 4 0
## 3 1 2 2 4 0
## 4 1 2 2 4 0
## 5 1 2 2 4 0
## 8 1 3 2 5 1
## 9 1 3 3 4 0
dim(dat_BSC)
## [1] 516 5
str(dat_BSC)
## 'data.frame': 516 obs. of 5 variables:
## $ BUB: int 0 1 1 1 1 1 1 1 1 1 ...
## $ SBM: int 2 2 2 2 3 3 3 1 4 4 ...
## $ SBS: int 2 2 2 2 2 3 2 1 3 2 ...
## $ BSC: int 4 4 4 4 5 4 5 3 4 3 ...
## $ MSI: int 0 0 0 0 1 0 0 0 1 1 ...
dat_BSC$BPE <- rep("BSC", size = size) # Tạo biến mới BPE
names(dat_BSC)[names(dat_BSC) == "BSC"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BSC)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 4 0 BSC
## 3 1 2 2 4 0 BSC
## 4 1 2 2 4 0 BSC
## 5 1 2 2 4 0 BSC
## 8 1 3 2 5 1 BSC
## 9 1 3 3 4 0 BSC
## Create dat_BSA
dat_BSA <- BS_likert[ , c(18,19,20,23,34)]
head(dat_BSA)
## BUB SBM SBS BSA MSI
## 2 0 2 2 5 0
## 3 1 2 2 4 0
## 4 1 2 2 4 0
## 5 1 2 2 4 0
## 8 1 3 2 5 1
## 9 1 3 3 4 0
dim(dat_BSA)
## [1] 516 5
dat_BSA$BPE <- rep("BSA", size = size) # Tạo biến mới BPE
names(dat_BSA)[names(dat_BSA) == "BSA"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BSA)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 5 0 BSA
## 3 1 2 2 4 0 BSA
## 4 1 2 2 4 0 BSA
## 5 1 2 2 4 0 BSA
## 8 1 3 2 5 1 BSA
## 9 1 3 3 4 0 BSA
## Create dat_BRE
dat_BRE <- BS_likert[ , c(18,19,20,24,34)]
head(dat_BRE)
## BUB SBM SBS BRE MSI
## 2 0 2 2 2 0
## 3 1 2 2 3 0
## 4 1 2 2 4 0
## 5 1 2 2 2 0
## 8 1 3 2 3 1
## 9 1 3 3 3 0
dim(dat_BRE)
## [1] 516 5
dat_BRE$BPE <- rep("BRE", size = size) # Tạo biến mới BPE
names(dat_BRE)[names(dat_BRE) == "BRE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BRE)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 2 0 BRE
## 3 1 2 2 3 0 BRE
## 4 1 2 2 4 0 BRE
## 5 1 2 2 2 0 BRE
## 8 1 3 2 3 1 BRE
## 9 1 3 3 3 0 BRE
## Create dat_BCO
dat_BCO <- BS_likert[ , c(18,19,20,25,34)]
head(dat_BCO)
## BUB SBM SBS BCO MSI
## 2 0 2 2 2 0
## 3 1 2 2 3 0
## 4 1 2 2 4 0
## 5 1 2 2 2 0
## 8 1 3 2 3 1
## 9 1 3 3 3 0
dim(dat_BCO)
## [1] 516 5
dat_BCO$BPE <- rep("BCO", size = size) # Tạo biến mới BPE
names(dat_BCO)[names(dat_BCO) == "BCO"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BCO)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 2 0 BCO
## 3 1 2 2 3 0 BCO
## 4 1 2 2 4 0 BCO
## 5 1 2 2 2 0 BCO
## 8 1 3 2 3 1 BCO
## 9 1 3 3 3 0 BCO
## Create dat_BIN
dat_BIN <- BS_likert[ , c(18,19,20,26,34)]
head(dat_BIN)
## BUB SBM SBS BIN MSI
## 2 0 2 2 4 0
## 3 1 2 2 4 0
## 4 1 2 2 4 0
## 5 1 2 2 3 0
## 8 1 3 2 3 1
## 9 1 3 3 3 0
dim(dat_BIN)
## [1] 516 5
dat_BIN$BPE <- rep("BIN", size = size) # Tạo biến mới BPE
names(dat_BIN)[names(dat_BIN) == "BIN"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BIN)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 4 0 BIN
## 3 1 2 2 4 0 BIN
## 4 1 2 2 4 0 BIN
## 5 1 2 2 3 0 BIN
## 8 1 3 2 3 1 BIN
## 9 1 3 3 3 0 BIN
## Create dat_BHE
dat_BHE <- BS_likert[ , c(18,19,20,27,34)]
head(dat_BHE)
## BUB SBM SBS BHE MSI
## 2 0 2 2 2 0
## 3 1 2 2 2 0
## 4 1 2 2 3 0
## 5 1 2 2 2 0
## 8 1 3 2 3 1
## 9 1 3 3 3 0
dim(dat_BHE)
## [1] 516 5
dat_BHE$BPE <- rep("BHE", size = size) # Tạo biến mới BPE
names(dat_BHE)[names(dat_BHE) == "BHE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BHE)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 2 0 BHE
## 3 1 2 2 2 0 BHE
## 4 1 2 2 3 0 BHE
## 5 1 2 2 2 0 BHE
## 8 1 3 2 3 1 BHE
## 9 1 3 3 3 0 BHE
## Create dat_BEN
dat_BEN <- BS_likert[ , c(18,19,20,28,34)]
head(dat_BEN)
## BUB SBM SBS BEN MSI
## 2 0 2 2 5 0
## 3 1 2 2 4 0
## 4 1 2 2 3 0
## 5 1 2 2 4 0
## 8 1 3 2 5 1
## 9 1 3 3 4 0
dim(dat_BEN)
## [1] 516 5
dat_BEN$BPE <- rep("BEN", size = size) # Tạo biến mới BPE
names(dat_BEN)[names(dat_BEN) == "BEN"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BEN)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 5 0 BEN
## 3 1 2 2 4 0 BEN
## 4 1 2 2 3 0 BEN
## 5 1 2 2 4 0 BEN
## 8 1 3 2 5 1 BEN
## 9 1 3 3 4 0 BEN
## Create dat_BRC
dat_BRC <- BS_likert[ , c(18,19,20,29,34)]
head(dat_BRC)
## BUB SBM SBS BRC MSI
## 2 0 2 2 5 0
## 3 1 2 2 4 0
## 4 1 2 2 3 0
## 5 1 2 2 4 0
## 8 1 3 2 5 1
## 9 1 3 3 4 0
dim(dat_BRC)
## [1] 516 5
dat_BRC$BPE <- rep("BRC", size = size) # Tạo biến mới BPE
names(dat_BRC)[names(dat_BRC) == "BRC"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BRC)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 5 0 BRC
## 3 1 2 2 4 0 BRC
## 4 1 2 2 3 0 BRC
## 5 1 2 2 4 0 BRC
## 8 1 3 2 5 1 BRC
## 9 1 3 3 4 0 BRC
## Create dat_BWE
dat_BWE <- BS_likert[ , c(18,19,20,30,34)]
head(dat_BWE)
## BUB SBM SBS BWE MSI
## 2 0 2 2 3 0
## 3 1 2 2 4 0
## 4 1 2 2 5 0
## 5 1 2 2 4 0
## 8 1 3 2 5 1
## 9 1 3 3 4 0
dim(dat_BWE)
## [1] 516 5
dat_BWE$BPE <- rep("BWE", size = size) # Tạo biến mới BPE
names(dat_BWE)[names(dat_BWE) == "BWE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BWE)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 3 0 BWE
## 3 1 2 2 4 0 BWE
## 4 1 2 2 5 0 BWE
## 5 1 2 2 4 0 BWE
## 8 1 3 2 5 1 BWE
## 9 1 3 3 4 0 BWE
## Tạo dữ liệu likert từ 10 dữ liệu: dat_BST, dat_BSC, dat_BSA, dat_BRE, dat_BCO, dat_BIN, dat_BHE, dat_BEN, dat_BRC, dat_BWE
B.Per <- rbind(dat_BST, dat_BSC, dat_BSA, dat_BRE, dat_BCO, dat_BIN, dat_BHE, dat_BEN, dat_BRC, dat_BWE)
head(B.Per)
## BUB SBM SBS Res MSI BPE
## 2 0 2 2 2 0 BST
## 3 1 2 2 2 0 BST
## 4 1 2 2 3 0 BST
## 5 1 2 2 3 0 BST
## 8 1 3 2 5 1 BST
## 9 1 3 3 4 0 BST
str(B.Per)
## 'data.frame': 5160 obs. of 6 variables:
## $ BUB: int 0 1 1 1 1 1 1 1 1 1 ...
## $ SBM: int 2 2 2 2 3 3 3 1 4 4 ...
## $ SBS: int 2 2 2 2 2 3 2 1 3 2 ...
## $ Res: int 2 2 3 3 5 4 2 3 2 3 ...
## $ MSI: int 0 0 0 0 1 0 0 0 1 1 ...
## $ BPE: chr "BST" "BST" "BST" "BST" ...
dim(B.Per)
## [1] 5160 6
# Cách 2: Tạo dữ liệu thủ công trên excel với các cột BPE, Res, BUB, MSI, SBM, SBS: t_likert = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 3-Mode shift\\So lieu mode choice tu phan 1\\8. MS-Bieu do Likert.csv" ; BP_likert = read.csv(t_likert, header = T, sep = ";")
# Data Bus perception of people shift to bus
BP_MS = subset(B.Per,MSI == 1)
str(BP_MS)
## 'data.frame': 1250 obs. of 6 variables:
## $ BUB: int 1 1 1 1 1 0 1 1 1 1 ...
## $ SBM: int 3 4 4 5 4 4 4 4 4 4 ...
## $ SBS: int 2 3 2 4 4 2 2 3 2 2 ...
## $ Res: int 5 2 3 1 4 2 4 4 4 2 ...
## $ MSI: int 1 1 1 1 1 1 1 1 1 1 ...
## $ BPE: chr "BST" "BST" "BST" "BST" ...
dim(BP_MS)
## [1] 1250 6
BP_MS = BP_MS[ , c(4, 6)]
dim(BP_MS)
## [1] 1250 2
head(BP_MS)
## Res BPE
## 8 5 BST
## 15 2 BST
## 16 3 BST
## 18 1 BST
## 21 4 BST
## 29 2 BST
# Data Bus perception of people shift to bus (MSI = 1) and used to use bus before (BUB = 1)
BP_MS_BU = subset(B.Per,BUB == 1 & MSI == 1)
str(BP_MS_BU)
## 'data.frame': 950 obs. of 6 variables:
## $ BUB: int 1 1 1 1 1 1 1 1 1 1 ...
## $ SBM: int 3 4 4 5 4 4 4 4 4 4 ...
## $ SBS: int 2 3 2 4 4 2 3 2 2 3 ...
## $ Res: int 5 2 3 1 4 4 4 4 2 4 ...
## $ MSI: int 1 1 1 1 1 1 1 1 1 1 ...
## $ BPE: chr "BST" "BST" "BST" "BST" ...
dim(BP_MS_BU)
## [1] 950 6
BP_MS_BU = BP_MS_BU[ , c(4, 6)]
dim(BP_MS_BU)
## [1] 950 2
head(BP_MS_BU)
## Res BPE
## 8 5 BST
## 15 2 BST
## 16 3 BST
## 18 1 BST
## 21 4 BST
## 30 4 BST
# Data Bus perception of people don't shift to bus
BP_nMS = subset(B.Per,MSI == 0)
str(BP_nMS)
## 'data.frame': 3910 obs. of 6 variables:
## $ BUB: int 0 1 1 1 1 1 1 1 1 1 ...
## $ SBM: int 2 2 2 2 3 3 1 4 2 2 ...
## $ SBS: int 2 2 2 2 3 2 1 2 2 2 ...
## $ Res: int 2 2 3 3 4 2 3 3 2 2 ...
## $ MSI: int 0 0 0 0 0 0 0 0 0 0 ...
## $ BPE: chr "BST" "BST" "BST" "BST" ...
dim(BP_nMS)
## [1] 3910 6
BP_nMS = BP_nMS[ , c(4, 6)]
dim(BP_nMS)
## [1] 3910 2
head(BP_nMS)
## Res BPE
## 2 2 BST
## 3 2 BST
## 4 3 BST
## 5 3 BST
## 9 4 BST
## 10 2 BST
7.2. Vẽ biểu đồ likert cho nhóm đối tượng chuyển đổi - BP_MS
# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_MS)
## Res BPE
## 8 5 BST
## 15 2 BST
## 16 3 BST
## 18 1 BST
## 21 4 BST
## 29 2 BST
attach(BP_MS)
BP_MS = within(BP_MS, {
Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
})
str(BP_MS)
## 'data.frame': 1250 obs. of 2 variables:
## $ Res: Factor w/ 5 levels "Very Disagree",..: 5 2 3 1 4 2 4 4 4 2 ...
## $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_MS)
## Res BPE
## Very Disagree: 85 BST :125
## Disagree :190 BSC :125
## Normal :263 BSA :125
## Agree :582 BRE :125
## Very Agree :130 BCO :125
## BIN :125
## (Other):500
t = compareGroups(Res ~ BPE, data = BP_MS)
createTable(t)
##
## --------Summary descriptives table by 'Res'---------
##
## ___________________________________________________________________________
## Very Disagree Disagree Normal Agree Very Agree p.overall
## N=85 N=190 N=263 N=582 N=130
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## BPE: <0.001
## BST 10 (11.8%) 29 (15.3%) 32 (12.2%) 48 (8.25%) 6 (4.62%)
## BSC 6 (7.06%) 5 (2.63%) 21 (7.98%) 67 (11.5%) 26 (20.0%)
## BSA 9 (10.6%) 36 (18.9%) 37 (14.1%) 37 (6.36%) 6 (4.62%)
## BRE 6 (7.06%) 31 (16.3%) 46 (17.5%) 38 (6.53%) 4 (3.08%)
## BCO 9 (10.6%) 5 (2.63%) 17 (6.46%) 67 (11.5%) 27 (20.8%)
## BIN 7 (8.24%) 13 (6.84%) 33 (12.5%) 62 (10.7%) 10 (7.69%)
## BHE 7 (8.24%) 5 (2.63%) 20 (7.60%) 81 (13.9%) 12 (9.23%)
## BEN 9 (10.6%) 11 (5.79%) 21 (7.98%) 75 (12.9%) 9 (6.92%)
## BRC 15 (17.6%) 51 (26.8%) 23 (8.75%) 34 (5.84%) 2 (1.54%)
## BWE 7 (8.24%) 4 (2.11%) 13 (4.94%) 73 (12.5%) 28 (21.5%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_MS %>%
group_by(BPE, Res) %>%
count() %>%
ungroup() %>%
group_by(BPE) %>%
mutate(percent = 100*n / sum(n)) %>%
mutate(percent = round(percent, 2)) %>%
mutate(bar_text = paste0(percent, "%")) %>%
ungroup() -> df_for_ploting
df_for_ploting %>%
filter(Res == Res[5]) %>%
arrange(percent) %>%
pull(BPE) -> order_x
order_x
## [1] BRC BRE BST BSA BEN BIN BHE BSC BCO BWE
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot:
my_colors <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font <- "Roboto Condensed"
df_for_ploting %>%
mutate(BPE = factor(BPE, levels = order_x), Res = factor(Res, levels = Res[5:1])) -> df_odered
df_odered
## # A tibble: 50 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 10 8 8%
## 2 BST Disagree 29 23.2 23.2%
## 3 BST Normal 32 25.6 25.6%
## 4 BST Agree 48 38.4 38.4%
## 5 BST Very Agree 6 4.8 4.8%
## 6 BSC Very Disagree 6 4.8 4.8%
## 7 BSC Disagree 5 4 4%
## 8 BSC Normal 21 16.8 16.8%
## 9 BSC Agree 67 53.6 53.6%
## 10 BSC Very Agree 26 20.8 20.8%
## # ... with 40 more rows
library(extrafont)
## Registering fonts with R
theme_set(theme_minimal())
gg <- df_odered %>%
ggplot(aes(x = BPE, y = percent, fill = Res)) +
geom_col(width = 0.8) +
coord_flip() +
scale_fill_manual(values = my_colors[5:1], name = "") +
theme(legend.position = "top") +
theme(text = element_text(family = my_font)) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) +
theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) +
theme(axis.text = element_text(color = "grey20", size = 10.2)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
theme(legend.key.height = unit(0.15, "mm")) +
labs(x = NULL, y = NULL,
title = "Bus perception of motor users to shift to bus",
subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# For displaying percent of "Very Disagree":
df_odered %>%
filter(Res == "Very Disagree") %>%
filter(percent >= 3) -> df_for_text1
df_for_text1
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 10 8 8%
## 2 BSC Very Disagree 6 4.8 4.8%
## 3 BSA Very Disagree 9 7.2 7.2%
## 4 BRE Very Disagree 6 4.8 4.8%
## 5 BCO Very Disagree 9 7.2 7.2%
## 6 BIN Very Disagree 7 5.6 5.6%
## 7 BHE Very Disagree 7 5.6 5.6%
## 8 BEN Very Disagree 9 7.2 7.2%
## 9 BRC Very Disagree 15 12 12%
## 10 BWE Very Disagree 7 5.6 5.6%
# For displaying percent of "Disagree":
df_odered %>%
filter(Res == "Disagree") %>%
filter(percent >= 3) -> df_for_text2
df_for_text2
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Disagree 29 23.2 23.2%
## 2 BSC Disagree 5 4 4%
## 3 BSA Disagree 36 28.8 28.8%
## 4 BRE Disagree 31 24.8 24.8%
## 5 BCO Disagree 5 4 4%
## 6 BIN Disagree 13 10.4 10.4%
## 7 BHE Disagree 5 4 4%
## 8 BEN Disagree 11 8.8 8.8%
## 9 BRC Disagree 51 40.8 40.8%
## 10 BWE Disagree 4 3.2 3.2%
# For displaying percent of "Normal":
df_odered %>%
filter(Res == "Normal") %>%
filter(percent >= 3) -> df_for_text3
df_for_text3
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Normal 32 25.6 25.6%
## 2 BSC Normal 21 16.8 16.8%
## 3 BSA Normal 37 29.6 29.6%
## 4 BRE Normal 46 36.8 36.8%
## 5 BCO Normal 17 13.6 13.6%
## 6 BIN Normal 33 26.4 26.4%
## 7 BHE Normal 20 16 16%
## 8 BEN Normal 21 16.8 16.8%
## 9 BRC Normal 23 18.4 18.4%
## 10 BWE Normal 13 10.4 10.4%
# For displaying percent of "Agree":
df_odered %>%
filter(Res == "Agree") %>%
filter(percent >= 3) -> df_for_text4
df_for_text4
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Agree 48 38.4 38.4%
## 2 BSC Agree 67 53.6 53.6%
## 3 BSA Agree 37 29.6 29.6%
## 4 BRE Agree 38 30.4 30.4%
## 5 BCO Agree 67 53.6 53.6%
## 6 BIN Agree 62 49.6 49.6%
## 7 BHE Agree 81 64.8 64.8%
## 8 BEN Agree 75 60 60%
## 9 BRC Agree 34 27.2 27.2%
## 10 BWE Agree 73 58.4 58.4%
# For displaying percent of "Very Agree":
df_odered %>%
filter(Res == "Very Agree") %>%
filter(percent >= 3) -> df_for_text5
df_for_text5
## # A tibble: 9 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 6 4.8 4.8%
## 2 BSC Very Agree 26 20.8 20.8%
## 3 BSA Very Agree 6 4.8 4.8%
## 4 BRE Very Agree 4 3.2 3.2%
## 5 BCO Very Agree 27 21.6 21.6%
## 6 BIN Very Agree 10 8 8%
## 7 BHE Very Agree 12 9.6 9.6%
## 8 BEN Very Agree 9 7.2 7.2%
## 9 BWE Very Agree 28 22.4 22.4%
# For displaying percent of "Very Agree":
df_odered %>%
filter(Res == "Very Agree") -> df_for_text
df_for_text
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 6 4.8 4.8%
## 2 BSC Very Agree 26 20.8 20.8%
## 3 BSA Very Agree 6 4.8 4.8%
## 4 BRE Very Agree 4 3.2 3.2%
## 5 BCO Very Agree 27 21.6 21.6%
## 6 BIN Very Agree 10 8 8%
## 7 BHE Very Agree 12 9.6 9.6%
## 8 BEN Very Agree 9 7.2 7.2%
## 9 BRC Very Agree 2 1.6 1.6%
## 10 BWE Very Agree 28 22.4 22.4%
# Ad text layers-thay đổi giá trị y để thể hiện vị trí ghi text phù hợp
gg +
geom_text(data = df_for_text1 %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font) +
geom_text(data = df_for_text1 %>% filter(BPE == "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font) +
geom_text(data = df_for_text, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
7.3. Vẽ biểu đồ likert cho nhóm đối tượng chuyển đổi và đã từng sử dụng xe buýt trước đó - BP_MS_BU
# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_MS_BU)
## Res BPE
## 8 5 BST
## 15 2 BST
## 16 3 BST
## 18 1 BST
## 21 4 BST
## 30 4 BST
attach(BP_MS_BU)
## The following objects are masked from BP_MS:
##
## BPE, Res
BP_MS_BU = within(BP_MS_BU, {
Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
})
str(BP_MS_BU)
## 'data.frame': 950 obs. of 2 variables:
## $ Res: Factor w/ 5 levels "Very Disagree",..: 5 2 3 1 4 4 4 4 2 4 ...
## $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_MS_BU)
## Res BPE
## Very Disagree: 70 BST : 95
## Disagree :131 BSC : 95
## Normal :213 BSA : 95
## Agree :429 BRE : 95
## Very Agree :107 BCO : 95
## BIN : 95
## (Other):380
t_BU = compareGroups(Res ~ BPE, data = BP_MS_BU)
createTable(t_BU)
##
## --------Summary descriptives table by 'Res'---------
##
## ___________________________________________________________________________
## Very Disagree Disagree Normal Agree Very Agree p.overall
## N=70 N=131 N=213 N=429 N=107
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## BPE: <0.001
## BST 7 (10.0%) 21 (16.0%) 28 (13.1%) 33 (7.69%) 6 (5.61%)
## BSC 6 (8.57%) 2 (1.53%) 18 (8.45%) 51 (11.9%) 18 (16.8%)
## BSA 7 (10.0%) 29 (22.1%) 28 (13.1%) 26 (6.06%) 5 (4.67%)
## BRE 5 (7.14%) 19 (14.5%) 37 (17.4%) 30 (6.99%) 4 (3.74%)
## BCO 7 (10.0%) 3 (2.29%) 14 (6.57%) 51 (11.9%) 20 (18.7%)
## BIN 5 (7.14%) 11 (8.40%) 26 (12.2%) 43 (10.0%) 10 (9.35%)
## BHE 7 (10.0%) 3 (2.29%) 16 (7.51%) 58 (13.5%) 11 (10.3%)
## BEN 9 (12.9%) 6 (4.58%) 18 (8.45%) 54 (12.6%) 8 (7.48%)
## BRC 11 (15.7%) 35 (26.7%) 20 (9.39%) 27 (6.29%) 2 (1.87%)
## BWE 6 (8.57%) 2 (1.53%) 8 (3.76%) 56 (13.1%) 23 (21.5%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_MS_BU %>%
group_by(BPE, Res) %>%
count() %>%
ungroup() %>%
group_by(BPE) %>%
mutate(percent = 100*n / sum(n)) %>%
mutate(percent = round(percent, 2)) %>%
mutate(bar_text = paste0(percent, "%")) %>%
ungroup() -> df_for_ploting_BU
df_for_ploting_BU %>%
filter(Res == Res[5]) %>%
arrange(percent) %>%
pull(BPE) -> order_x_BU
order_x_BU
## [1] BRC BRE BSA BST BEN BIN BHE BSC BCO BWE
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot:
my_colors_BU <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font_BU <- "Roboto Condensed"
df_for_ploting_BU %>%
mutate(BPE = factor(BPE, levels = order_x_BU), Res = factor(Res, levels = Res[5:1])) -> df_odered_BU
df_odered_BU
## # A tibble: 50 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 7 7.37 7.37%
## 2 BST Disagree 21 22.1 22.11%
## 3 BST Normal 28 29.5 29.47%
## 4 BST Agree 33 34.7 34.74%
## 5 BST Very Agree 6 6.32 6.32%
## 6 BSC Very Disagree 6 6.32 6.32%
## 7 BSC Disagree 2 2.11 2.11%
## 8 BSC Normal 18 19.0 18.95%
## 9 BSC Agree 51 53.7 53.68%
## 10 BSC Very Agree 18 19.0 18.95%
## # ... with 40 more rows
library(extrafont)
theme_set(theme_minimal())
gg_BU <- df_odered_BU %>%
ggplot(aes(x = BPE, y = percent, fill = Res)) +
geom_col(width = 0.8) +
coord_flip() +
scale_fill_manual(values = my_colors[5:1], name = "") +
theme(legend.position = "top") +
theme(text = element_text(family = my_font)) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) +
theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) +
theme(axis.text = element_text(color = "grey20", size = 10.2)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
theme(legend.key.height = unit(0.15, "mm")) +
labs(x = NULL, y = NULL,
title = "Bus perception of motor users shifting to bus and being used to use bus before",
subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg_BU
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# For displaying percent of "Very Disagree":
df_odered_BU %>%
filter(Res == "Very Disagree") %>%
filter(percent >= 3) -> df_for_text1_BU
df_for_text1_BU
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 7 7.37 7.37%
## 2 BSC Very Disagree 6 6.32 6.32%
## 3 BSA Very Disagree 7 7.37 7.37%
## 4 BRE Very Disagree 5 5.26 5.26%
## 5 BCO Very Disagree 7 7.37 7.37%
## 6 BIN Very Disagree 5 5.26 5.26%
## 7 BHE Very Disagree 7 7.37 7.37%
## 8 BEN Very Disagree 9 9.47 9.47%
## 9 BRC Very Disagree 11 11.6 11.58%
## 10 BWE Very Disagree 6 6.32 6.32%
# For displaying percent of "Disagree":
df_odered_BU %>%
filter(Res == "Disagree") %>%
filter(percent >= 3) -> df_for_text2_BU
df_for_text2_BU
## # A tibble: 8 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Disagree 21 22.1 22.11%
## 2 BSA Disagree 29 30.5 30.53%
## 3 BRE Disagree 19 20 20%
## 4 BCO Disagree 3 3.16 3.16%
## 5 BIN Disagree 11 11.6 11.58%
## 6 BHE Disagree 3 3.16 3.16%
## 7 BEN Disagree 6 6.32 6.32%
## 8 BRC Disagree 35 36.8 36.84%
# For displaying percent of "Normal":
df_odered_BU %>%
filter(Res == "Normal") %>%
filter(percent >= 3) -> df_for_text3_BU
df_for_text3_BU
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Normal 28 29.5 29.47%
## 2 BSC Normal 18 19.0 18.95%
## 3 BSA Normal 28 29.5 29.47%
## 4 BRE Normal 37 39.0 38.95%
## 5 BCO Normal 14 14.7 14.74%
## 6 BIN Normal 26 27.4 27.37%
## 7 BHE Normal 16 16.8 16.84%
## 8 BEN Normal 18 19.0 18.95%
## 9 BRC Normal 20 21.0 21.05%
## 10 BWE Normal 8 8.42 8.42%
# For displaying percent of "Agree":
df_odered_BU %>%
filter(Res == "Agree") %>%
filter(percent >= 3) -> df_for_text4_BU
df_for_text4_BU
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Agree 33 34.7 34.74%
## 2 BSC Agree 51 53.7 53.68%
## 3 BSA Agree 26 27.4 27.37%
## 4 BRE Agree 30 31.6 31.58%
## 5 BCO Agree 51 53.7 53.68%
## 6 BIN Agree 43 45.3 45.26%
## 7 BHE Agree 58 61.0 61.05%
## 8 BEN Agree 54 56.8 56.84%
## 9 BRC Agree 27 28.4 28.42%
## 10 BWE Agree 56 59.0 58.95%
# For displaying percent of "Very Agree":
df_odered_BU %>%
filter(Res == "Very Agree") %>%
filter(percent >= 3) -> df_for_text5_BU
df_for_text5_BU
## # A tibble: 9 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 6 6.32 6.32%
## 2 BSC Very Agree 18 19.0 18.95%
## 3 BSA Very Agree 5 5.26 5.26%
## 4 BRE Very Agree 4 4.21 4.21%
## 5 BCO Very Agree 20 21.0 21.05%
## 6 BIN Very Agree 10 10.5 10.53%
## 7 BHE Very Agree 11 11.6 11.58%
## 8 BEN Very Agree 8 8.42 8.42%
## 9 BWE Very Agree 23 24.2 24.21%
# For displaying percent of "Very Agree":
df_odered_BU %>%
filter(Res == "Very Agree") -> df_for_text_BU
df_for_text_BU
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 6 6.32 6.32%
## 2 BSC Very Agree 18 19.0 18.95%
## 3 BSA Very Agree 5 5.26 5.26%
## 4 BRE Very Agree 4 4.21 4.21%
## 5 BCO Very Agree 20 21.0 21.05%
## 6 BIN Very Agree 10 10.5 10.53%
## 7 BHE Very Agree 11 11.6 11.58%
## 8 BEN Very Agree 8 8.42 8.42%
## 9 BRC Very Agree 2 2.11 2.11%
## 10 BWE Very Agree 23 24.2 24.21%
# Ad text layers-thay đổi giá trị y để thể hiện vị trí ghi text phù hợp
gg_BU +
geom_text(data = df_for_text1_BU %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_BU) +
geom_text(data = df_for_text1_BU %>% filter(BPE == "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_BU) +
geom_text(data = df_for_text_BU, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font_BU)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
7.4. Vẽ biểu đồ likert cho nhóm đối tượng không chuyển đổi - BP_nMS
# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_nMS)
## Res BPE
## 2 2 BST
## 3 2 BST
## 4 3 BST
## 5 3 BST
## 9 4 BST
## 10 2 BST
attach(BP_nMS)
## The following objects are masked from BP_MS_BU:
##
## BPE, Res
## The following objects are masked from BP_MS:
##
## BPE, Res
BP_nMS = within(BP_nMS, {
Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
})
str(BP_nMS)
## 'data.frame': 3910 obs. of 2 variables:
## $ Res: Factor w/ 5 levels "Very Disagree",..: 2 2 3 3 4 2 3 3 2 2 ...
## $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_nMS)
## Res BPE
## Very Disagree: 137 BST : 391
## Disagree : 744 BSC : 391
## Normal :1060 BSA : 391
## Agree :1673 BRE : 391
## Very Agree : 296 BCO : 391
## BIN : 391
## (Other):1564
t_nMS = compareGroups(Res ~ BPE, data = BP_nMS)
createTable(t_nMS)
##
## --------Summary descriptives table by 'Res'---------
##
## ______________________________________________________________________________
## Very Disagree Disagree Normal Agree Very Agree p.overall
## N=137 N=744 N=1060 N=1673 N=296
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## BPE: <0.001
## BST 10 (7.30%) 109 (14.7%) 117 (11.0%) 139 (8.31%) 16 (5.41%)
## BSC 9 (6.57%) 28 (3.76%) 87 (8.21%) 221 (13.2%) 46 (15.5%)
## BSA 10 (7.30%) 172 (23.1%) 123 (11.6%) 69 (4.12%) 17 (5.74%)
## BRE 18 (13.1%) 86 (11.6%) 167 (15.8%) 108 (6.46%) 12 (4.05%)
## BCO 10 (7.30%) 33 (4.44%) 70 (6.60%) 223 (13.3%) 55 (18.6%)
## BIN 6 (4.38%) 75 (10.1%) 117 (11.0%) 173 (10.3%) 20 (6.76%)
## BHE 15 (10.9%) 25 (3.36%) 113 (10.7%) 209 (12.5%) 29 (9.80%)
## BEN 18 (13.1%) 43 (5.78%) 108 (10.2%) 197 (11.8%) 25 (8.45%)
## BRC 33 (24.1%) 149 (20.0%) 121 (11.4%) 81 (4.84%) 7 (2.36%)
## BWE 8 (5.84%) 24 (3.23%) 37 (3.49%) 253 (15.1%) 69 (23.3%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_nMS %>%
group_by(BPE, Res) %>%
count() %>%
ungroup() %>%
group_by(BPE) %>%
mutate(percent = 100*n / sum(n)) %>%
mutate(percent = round(percent, 2)) %>%
mutate(bar_text = paste0(percent, "%")) %>%
ungroup() -> df_for_ploting_nMS
df_for_ploting_nMS %>%
filter(Res == Res[5]) %>%
arrange(percent) %>%
pull(BPE) -> order_x_nMS
order_x_nMS
## [1] BRC BRE BST BSA BIN BEN BHE BSC BCO BWE
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot:
my_colors_nMS <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font_nMS <- "Roboto Condensed"
df_for_ploting_nMS %>%
mutate(BPE = factor(BPE, levels = order_x_nMS), Res = factor(Res, levels = Res[5:1])) -> df_odered_nMS
df_odered_nMS
## # A tibble: 50 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 10 2.56 2.56%
## 2 BST Disagree 109 27.9 27.88%
## 3 BST Normal 117 29.9 29.92%
## 4 BST Agree 139 35.6 35.55%
## 5 BST Very Agree 16 4.09 4.09%
## 6 BSC Very Disagree 9 2.3 2.3%
## 7 BSC Disagree 28 7.16 7.16%
## 8 BSC Normal 87 22.2 22.25%
## 9 BSC Agree 221 56.5 56.52%
## 10 BSC Very Agree 46 11.8 11.76%
## # ... with 40 more rows
library(extrafont)
theme_set(theme_minimal())
gg_nMS <- df_odered_nMS %>%
ggplot(aes(x = BPE, y = percent, fill = Res)) +
geom_col(width = 0.8) +
coord_flip() +
scale_fill_manual(values = my_colors[5:1], name = "") +
theme(legend.position = "top") +
theme(text = element_text(family = my_font)) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) +
theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) +
theme(axis.text = element_text(color = "grey20", size = 10.2)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
theme(legend.key.height = unit(0.15, "mm")) +
labs(x = NULL, y = NULL,
title = "Bus perception of motor users not shifting to bus",
subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg_nMS
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# For displaying percent of "Very Disagree":
df_odered_nMS %>%
filter(Res == "Very Disagree") -> df_for_text1_nMS
df_for_text1_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 10 2.56 2.56%
## 2 BSC Very Disagree 9 2.3 2.3%
## 3 BSA Very Disagree 10 2.56 2.56%
## 4 BRE Very Disagree 18 4.6 4.6%
## 5 BCO Very Disagree 10 2.56 2.56%
## 6 BIN Very Disagree 6 1.53 1.53%
## 7 BHE Very Disagree 15 3.84 3.84%
## 8 BEN Very Disagree 18 4.6 4.6%
## 9 BRC Very Disagree 33 8.44 8.44%
## 10 BWE Very Disagree 8 2.05 2.05%
# For displaying percent of "Disagree":
df_odered_nMS %>%
filter(Res == "Disagree") %>%
filter(percent >= 3) -> df_for_text2_nMS
df_for_text2_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Disagree 109 27.9 27.88%
## 2 BSC Disagree 28 7.16 7.16%
## 3 BSA Disagree 172 44.0 43.99%
## 4 BRE Disagree 86 22.0 21.99%
## 5 BCO Disagree 33 8.44 8.44%
## 6 BIN Disagree 75 19.2 19.18%
## 7 BHE Disagree 25 6.39 6.39%
## 8 BEN Disagree 43 11 11%
## 9 BRC Disagree 149 38.1 38.11%
## 10 BWE Disagree 24 6.14 6.14%
# For displaying percent of "Normal":
df_odered_nMS %>%
filter(Res == "Normal") %>%
filter(percent >= 3) -> df_for_text3_nMS
df_for_text3_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Normal 117 29.9 29.92%
## 2 BSC Normal 87 22.2 22.25%
## 3 BSA Normal 123 31.5 31.46%
## 4 BRE Normal 167 42.7 42.71%
## 5 BCO Normal 70 17.9 17.9%
## 6 BIN Normal 117 29.9 29.92%
## 7 BHE Normal 113 28.9 28.9%
## 8 BEN Normal 108 27.6 27.62%
## 9 BRC Normal 121 31.0 30.95%
## 10 BWE Normal 37 9.46 9.46%
# For displaying percent of "Agree":
df_odered_nMS %>%
filter(Res == "Agree") %>%
filter(percent >= 3) -> df_for_text4_nMS
df_for_text4_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Agree 139 35.6 35.55%
## 2 BSC Agree 221 56.5 56.52%
## 3 BSA Agree 69 17.6 17.65%
## 4 BRE Agree 108 27.6 27.62%
## 5 BCO Agree 223 57.0 57.03%
## 6 BIN Agree 173 44.2 44.25%
## 7 BHE Agree 209 53.4 53.45%
## 8 BEN Agree 197 50.4 50.38%
## 9 BRC Agree 81 20.7 20.72%
## 10 BWE Agree 253 64.7 64.71%
# For displaying percent of "Very Agree":
df_odered_nMS %>%
filter(Res == "Very Agree") %>%
filter(percent >= 3) -> df_for_text5_nMS
df_for_text5_nMS
## # A tibble: 9 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 16 4.09 4.09%
## 2 BSC Very Agree 46 11.8 11.76%
## 3 BSA Very Agree 17 4.35 4.35%
## 4 BRE Very Agree 12 3.07 3.07%
## 5 BCO Very Agree 55 14.1 14.07%
## 6 BIN Very Agree 20 5.12 5.12%
## 7 BHE Very Agree 29 7.42 7.42%
## 8 BEN Very Agree 25 6.39 6.39%
## 9 BWE Very Agree 69 17.6 17.65%
# For displaying percent of "Very Agree":
df_odered_nMS %>%
filter(Res == "Very Agree") -> df_for_text_nMS
df_for_text_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 16 4.09 4.09%
## 2 BSC Very Agree 46 11.8 11.76%
## 3 BSA Very Agree 17 4.35 4.35%
## 4 BRE Very Agree 12 3.07 3.07%
## 5 BCO Very Agree 55 14.1 14.07%
## 6 BIN Very Agree 20 5.12 5.12%
## 7 BHE Very Agree 29 7.42 7.42%
## 8 BEN Very Agree 25 6.39 6.39%
## 9 BRC Very Agree 7 1.79 1.79%
## 10 BWE Very Agree 69 17.6 17.65%
# Ad text layers:
gg_nMS +
geom_text(data = df_for_text1_nMS %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_nMS) +
geom_text(data = df_for_text_nMS, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font_nMS)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database