1. Read and Create Data
t = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 3-Mode shift\\So lieu mode choice tu phan 1\\7. Data MSI - 810.csv"
MS = read.csv(t, header = T, sep = ";")
head(MS)
## ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1 1 0 1 6 3 3 1 2 10 1 1 0 4 1 0 1 15 1 3 3 3 4 4
## 2 2 0 1 3 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5
## 3 3 0 1 3 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4
## 4 4 0 1 3 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4
## 5 5 0 1 3 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4
## 6 6 0 1 4 1 4 1 20 30 1 1 0 5 3 0 2 50 0 3 2 4 4 4
## BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1 4 4 3 4 4 4 4 1 1 1 0 0 5 4 2 2 0 0 0 0 0 1 2
## 2 2 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3
## 3 3 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3
## 4 4 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3
## 5 2 2 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2
## 6 3 3 3 2 4 4 4 2 0 0 0 1 4 3 4 2 1 1 1 1 1 1 2
## NR
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
str(MS$MSI)
## int [1:810] 0 0 0 0 0 0 0 1 0 0 ...
# View data (number of rows and colume)
dim(MS)
## [1] 810 47
# Data non bus
BS = subset(MS,TM < 7)
BS = BS[, 2:47]
head(BS)
## CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE
## 1 0 1 6 3 3 1 2 10 1 1 0 4 1 0 1 15 1 3 3 3 4 4 4
## 2 0 1 3 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5 2
## 3 0 1 3 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4 3
## 4 0 1 3 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4 4
## 5 0 1 3 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4 2
## 6 0 1 4 1 4 1 20 30 1 1 0 5 3 0 2 50 0 3 2 4 4 4 3
## BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 1 4 3 4 4 4 4 1 1 1 0 0 5 4 2 2 0 0 0 0 0 1 2 1
## 2 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3 0
## 3 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3 0
## 4 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3 0
## 5 2 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2 0
## 6 3 3 2 4 4 4 2 0 0 0 1 4 3 4 2 1 1 1 1 1 1 2 1
dim(BS)
## [1] 724 46
# Sumerize Data
summary(BS)
## CA BC TM TP
## Min. :0.0000 Min. :0.0000 Min. :1.00 Min. :1.00
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:3.00 1st Qu.:1.00
## Median :0.0000 Median :1.0000 Median :3.00 Median :1.00
## Mean :0.4544 Mean :0.5207 Mean :3.13 Mean :1.57
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:3.00 3rd Qu.:2.00
## Max. :1.0000 Max. :1.0000 Max. :6.00 Max. :4.00
## FR DT DI TI
## Min. :1.000 Min. :1.000 Min. : 0.015 Min. : 0.00
## 1st Qu.:4.000 1st Qu.:1.000 1st Qu.: 2.000 1st Qu.: 10.00
## Median :4.000 Median :1.000 Median : 5.000 Median : 15.00
## Mean :3.622 Mean :1.863 Mean : 6.159 Mean : 16.84
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:10.000 3rd Qu.: 20.00
## Max. :4.000 Max. :4.000 Max. :35.000 Max. :180.00
## SC LS TS MR
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :1.000
## 1st Qu.:1.0000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:2.000
## Median :1.0000 Median :1.0000 Median :1.0000 Median :2.000
## Mean :0.8674 Mean :0.7873 Mean :0.7044 Mean :2.798
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:4.000
## Max. :1.0000 Max. :1.0000 Max. :3.0000 Max. :6.000
## WE WK NBR CO
## Min. :1.000 Min. :0.0000 Min. :1.000 Min. : 0.0
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.: 4.0
## Median :1.000 Median :0.0000 Median :2.000 Median : 10.0
## Mean :1.664 Mean :0.2445 Mean :2.916 Mean : 13.4
## 3rd Qu.:3.000 3rd Qu.:0.0000 3rd Qu.:4.000 3rd Qu.: 15.0
## Max. :3.000 Max. :1.0000 Max. :6.000 Max. :300.0
## BUB SBM SBS BST
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :1.0000 Median :2.000 Median :2.000 Median :3.000
## Mean :0.6119 Mean :2.628 Mean :2.474 Mean :2.711
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :1.0000 Max. :5.000 Max. :5.000 Max. :5.000
## BSC BSA BRE BCO BIN
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.00
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.00 1st Qu.:2.000 1st Qu.:2.00
## Median :4.000 Median :4.000 Median :4.00 Median :3.000 Median :3.00
## Mean :3.474 Mean :3.583 Mean :3.33 Mean :3.088 Mean :3.03
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000 3rd Qu.:4.00
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.00
## BHE BEN BRC BWE SP
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:3.00 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:1.000
## Median :3.000 Median :4.00 Median :4.000 Median :4.000 Median :1.000
## Mean :2.829 Mean :3.67 Mean :3.713 Mean :3.862 Mean :1.023
## 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:1.000
## Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000 Max. :2.000
## BI WI MSI GD
## Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :1.0000 Median :1.000 Median :0.000 Median :1.0000
## Mean :0.5622 Mean :1.133 Mean :0.268 Mean :0.5843
## 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :6.000 Max. :1.000 Max. :1.0000
## AG OC IN NC
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:0.0000
## Median :4.000 Median :3.000 Median :2.000 Median :1.0000
## Mean :3.544 Mean :4.225 Mean :2.267 Mean :0.7845
## 3rd Qu.:4.000 3rd Qu.:7.000 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :6.000 Max. :8.000 Max. :4.000 Max. :3.0000
## MC CC BO MO
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :1.000
## Mean :0.8798 Mean :0.2044 Mean :0.4019 Mean :0.826
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## RO NB NM NR
## Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:2.000 1st Qu.:0.0000
## Median :0.0000 Median :1.000 Median :2.000 Median :0.0000
## Mean :0.1575 Mean :0.703 Mean :2.189 Mean :0.2735
## 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:3.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :4.000 Max. :4.000 Max. :3.0000
# Đổi biến TM (1-Walk/Bike, 2-Motor, 3-car,4-)
BS$TM[BS$TM == 2] <- 1
BS$TM[BS$TM == 3] <- 2
BS$TM[BS$TM == 4] <- 3
BS$TM[BS$TM == 5] <- 4
BS$TM[BS$TM == 6] <- 4
head(BS$TM)
## [1] 4 2 2 2 2 3
# Đổi biến AG (1-<=24,2-24-60,3->60)
BS$AG[BS$AG == 2] <- 1
BS$AG[BS$AG == 3] <- 1
BS$AG[BS$AG == 4] <- 2
BS$AG[BS$AG == 5] <- 2
BS$AG[BS$AG == 6] <- 3
head(BS$AG)
## [1] 2 1 1 1 2 2
# Đổi biến OC (1-HS/SV,2-CNVC,3-Nội trợ,4-LĐ tự do,5-Khác)
BS$OC[BS$OC == 2] <- 1
BS$OC[BS$OC == 3] <- 2
BS$OC[BS$OC == 6] <- 2
BS$OC[BS$OC == 4] <- 3
BS$OC[BS$OC == 5] <- 3
BS$OC[BS$OC == 7] <- 4
BS$OC[BS$OC == 8] <- 5
head(BS$OC)
## [1] 3 2 1 1 2 2
# Đổi biến FR (1-1l/t,2-2l/t,3->=3l/t)
BS$FR[BS$FR == 4] <- 3
head(BS$FR)
## [1] 3 3 3 3 3 3
# Đổi biến DT (1-CĐ/6-8h/16-18h,0-Ngoài giờ CĐ)
BS$DT[BS$DT == 3] <- 1
BS$DT[BS$DT == 2] <- 0
BS$DT[BS$DT == 4] <- 0
head(BS$DT)
## [1] 1 1 1 1 1 1
# Đổi biến TS (1-có, 2-không)
BS$TS[BS$TS == 2] <- 1
BS$TS[BS$TS == 3] <- 1
head(BS$TS)
## [1] 0 1 1 1 0 0
# Đổi biến WE (1-có mưa, 2-không)
BS$WE[BS$WE == 1] <- 0
BS$WE[BS$WE == 2] <- 1
BS$WE[BS$WE == 3] <- 0
head(BS$WE)
## [1] 0 0 0 0 0 0
# Đổi biến SBM (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$SBM[BS$SBM == 2] <- 1
BS$SBM[BS$SBM == 3] <- 2
BS$SBM[BS$SBM == 4] <- 3
BS$SBM[BS$SBM == 5] <- 3
head(BS$SBM)
## [1] 2 1 1 1 1 2
# Đổi biến SBS (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$SBS[BS$SBS == 2] <- 1
BS$SBS[BS$SBS == 3] <- 2
BS$SBS[BS$SBS == 4] <- 3
BS$SBS[BS$SBS == 5] <- 3
head(BS$SBS)
## [1] 2 1 1 1 1 1
# Đổi biến BST (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BST[BS$BST == 2] <- 1
BS$BST[BS$BST == 3] <- 2
BS$BST[BS$BST == 4] <- 3
BS$BST[BS$BST == 5] <- 3
head(BS$BST)
## [1] 2 1 1 2 2 3
# Đổi biến BSC (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BSC[BS$BSC == 2] <- 1
BS$BSC[BS$BSC == 3] <- 2
BS$BSC[BS$BSC == 4] <- 3
BS$BSC[BS$BSC == 5] <- 3
head(BS$BSC)
## [1] 3 3 3 3 3 3
# Đổi biến BSA (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BSA[BS$BSA == 2] <- 1
BS$BSA[BS$BSA == 3] <- 2
BS$BSA[BS$BSA == 4] <- 3
BS$BSA[BS$BSA == 5] <- 3
head(BS$BSA)
## [1] 3 3 3 3 3 3
# Đổi biến BRE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BRE[BS$BRE == 2] <- 1
BS$BRE[BS$BRE == 3] <- 2
BS$BRE[BS$BRE == 4] <- 3
BS$BRE[BS$BRE == 5] <- 3
head(BS$BRE)
## [1] 3 1 2 3 1 2
# Đổi biến BCO (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BCO[BS$BCO == 2] <- 1
BS$BCO[BS$BCO == 3] <- 2
BS$BCO[BS$BCO == 4] <- 3
BS$BCO[BS$BCO == 5] <- 3
head(BS$BCO)
## [1] 3 1 2 3 1 2
# Đổi biến BIN (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BIN[BS$BIN == 2] <- 1
BS$BIN[BS$BIN == 3] <- 2
BS$BIN[BS$BIN == 4] <- 3
BS$BIN[BS$BIN == 5] <- 3
head(BS$BIN)
## [1] 2 3 3 3 2 2
# Đổi biến BHE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BHE[BS$BHE == 2] <- 1
BS$BHE[BS$BHE == 3] <- 2
BS$BHE[BS$BHE == 4] <- 3
BS$BHE[BS$BHE == 5] <- 3
head(BS$BHE)
## [1] 3 1 1 2 1 1
# Đổi biến BEN (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BEN[BS$BEN == 2] <- 1
BS$BEN[BS$BEN == 3] <- 2
BS$BEN[BS$BEN == 4] <- 3
BS$BEN[BS$BEN == 5] <- 3
head(BS$BEN)
## [1] 3 3 3 2 3 3
# Đổi biến BRC (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BRC[BS$BRC == 2] <- 1
BS$BRC[BS$BRC == 3] <- 2
BS$BRC[BS$BRC == 4] <- 3
BS$BRC[BS$BRC == 5] <- 3
head(BS$BRC)
## [1] 3 3 3 2 3 3
# Đổi biến BWE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BWE[BS$BWE == 2] <- 1
BS$BWE[BS$BWE == 3] <- 2
BS$BWE[BS$BWE == 4] <- 3
BS$BWE[BS$BWE == 5] <- 3
head(BS$BWE)
## [1] 3 2 3 3 3 3
# Đổi biến NB (0-0xe,1-1xe,2->=2xe)
BS$NB[BS$NB == 3] <- 2
BS$NB[BS$NB == 4] <- 2
head(BS$NB)
## [1] 1 1 0 1 0 1
# Đổi biến NM (0-0xe,1-1xe,2-2xe,3->=3xe)
BS$NM[BS$NM == 4] <- 3
head(BS$NM)
## [1] 2 3 3 3 2 2
# Đổi biến NR (0-0xe,1-Có xe)
BS$NR[BS$NR == 2] <- 1
BS$NR[BS$NR == 3] <- 1
head(BS$NR)
## [1] 1 0 0 0 0 1
# Đổi biến NC (0-No,1-Yes)
BS$NC[BS$NC == 2] <- 1
BS$NC[BS$NC == 3] <- 1
head(BS$NC)
## [1] 1 1 0 0 1 1
# Đổi biến WI (0-No,1-Yes)
BS$WI[BS$WI >= 5] <- 4
BS$WI[BS$WI >= 6] <- 4
head(BS$WI)
## [1] 1 0 2 1 0 0
head(BS$BUB)
## [1] 1 0 1 1 1 0
head(BS)
## CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE
## 1 0 1 4 3 3 1 2 10 1 1 0 4 0 0 1 15 1 2 2 2 3 3 3
## 2 0 1 2 1 3 1 8 15 1 1 1 2 0 0 1 5 0 1 1 1 3 3 1
## 3 0 1 2 1 3 1 5 15 1 1 1 4 0 0 2 3 1 1 1 1 3 3 2
## 4 0 1 2 1 3 1 5 10 1 1 1 2 0 0 4 10 1 1 1 2 3 3 3
## 5 0 1 2 1 3 1 8 15 1 1 0 2 0 0 2 5 1 1 1 2 3 3 1
## 6 0 1 3 1 3 1 20 30 1 1 0 5 0 0 2 50 0 2 1 3 3 3 2
## BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 1 3 2 3 3 3 3 1 1 1 0 0 2 3 2 1 0 0 0 0 0 1 2 1
## 2 1 3 1 3 3 2 2 0 0 0 0 1 2 3 1 1 0 1 1 0 1 3 0
## 3 2 3 1 3 3 3 1 1 2 0 1 1 1 4 0 1 0 0 1 0 0 3 0
## 4 3 3 2 2 2 3 1 1 1 0 1 1 1 3 0 1 0 1 1 0 1 3 0
## 5 1 2 1 3 3 3 2 0 0 0 0 2 2 3 1 1 0 0 1 0 0 2 0
## 6 2 2 1 3 3 3 2 0 0 0 1 2 2 4 1 1 1 1 1 1 1 2 1
dim(BS)
## [1] 724 46
str(BS)
## 'data.frame': 724 obs. of 46 variables:
## $ CA : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TM : num 4 2 2 2 2 3 3 2 2 2 ...
## $ TP : int 3 1 1 1 1 1 1 1 1 1 ...
## $ FR : num 3 3 3 3 3 3 3 3 3 3 ...
## $ DT : num 1 1 1 1 1 1 1 1 0 1 ...
## $ DI : num 2 8 5 5 8 20 15 10 12 10 ...
## $ TI : num 10 15 15 10 15 30 20 25 30 25 ...
## $ SC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TS : num 0 1 1 1 0 0 0 0 1 1 ...
## $ MR : int 4 2 4 2 2 5 5 2 2 2 ...
## $ WE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ WK : int 0 0 0 0 0 0 0 1 0 0 ...
## $ NBR: int 1 1 2 4 2 2 4 2 1 4 ...
## $ CO : num 15 5 3 10 5 50 40 5 7 7 ...
## $ BUB: int 1 0 1 1 1 0 0 1 1 1 ...
## $ SBM: num 2 1 1 1 1 2 1 2 2 2 ...
## $ SBS: num 2 1 1 1 1 1 1 1 2 1 ...
## $ BST: num 2 1 1 2 2 3 3 3 3 1 ...
## $ BSC: num 3 3 3 3 3 3 3 3 3 3 ...
## $ BSA: num 3 3 3 3 3 3 3 3 3 3 ...
## $ BRE: num 3 1 2 3 1 2 1 2 2 2 ...
## $ BCO: num 3 1 2 3 1 2 1 2 2 1 ...
## $ BIN: num 2 3 3 3 2 2 2 2 2 3 ...
## $ BHE: num 3 1 1 2 1 1 1 2 2 1 ...
## $ BEN: num 3 3 3 2 3 3 3 3 3 3 ...
## $ BRC: num 3 3 3 2 3 3 3 3 3 3 ...
## $ BWE: num 3 2 3 3 3 3 3 3 3 3 ...
## $ SP : int 1 2 1 1 2 2 2 1 1 1 ...
## $ BI : int 1 0 1 1 0 0 0 1 1 1 ...
## $ WI : num 1 0 2 1 0 0 0 2 3 2 ...
## $ MSI: int 0 0 0 0 0 0 0 1 0 0 ...
## $ GD : int 0 0 1 1 0 1 1 0 0 1 ...
## $ AG : num 2 1 1 1 2 2 2 2 1 2 ...
## $ OC : num 3 2 1 1 2 2 2 4 4 2 ...
## $ IN : int 2 3 4 3 3 4 4 2 3 3 ...
## $ NC : num 1 1 0 0 1 1 1 0 1 0 ...
## $ MC : int 0 1 1 1 1 1 1 1 1 1 ...
## $ CC : int 0 0 0 0 0 1 1 0 0 0 ...
## $ BO : int 0 1 0 1 0 1 0 0 1 0 ...
## $ MO : int 0 1 1 1 1 1 1 1 1 1 ...
## $ RO : int 0 0 0 0 0 1 1 0 0 0 ...
## $ NB : num 1 1 0 1 0 1 0 0 1 0 ...
## $ NM : num 2 3 3 3 2 2 2 2 3 3 ...
## $ NR : num 1 0 0 0 0 1 1 0 0 0 ...
# Reformat variables in factor variables - Đổi định dạng biến
attach(BS)
BS = within(BS, {
CA = factor(CA, labels = c("No", "Yes"))
WE = factor(WE, labels = c("No", "Yes"))
WK = factor(WK, labels = c("No", "Yes"))
TM = factor(TM, labels = c("Walk/Bicycle", "Motorbike", "Car", "Hichhiking/Taxi"))
TP = factor(TP, labels = c("Work/Study", "Picking Children", "Entertainment/Food", "Others"))
FR = factor(FR, labels = c("Once", "twice", ">=3 times"))
DT = factor(DT, labels = c("Normal", "Rush hour"))
TS = factor(TS, labels = c("No", "Yes"))
SC = factor(SC, labels = c("No", "Yes"))
LS = factor(LS, labels = c("No", "Yes"))
BC = factor(BC,labels = c("No", "Yes"))
SP = factor(SP, labels = c("No", "Yes", "Don't know"))
BI = factor(BI, labels = c("No", "Yes"))
WI = factor(WI, labels = c("No", "At Bus Stops", "Internet/Facebook", "From Friends", "Others"))
MSI = factor(MSI, labels = c("No", "Yes"))
BST = factor(BST, labels = c("Don't agree", "Normal", "Agree"))
BSC = factor(BSC, labels = c("Don't agree", "Normal", "Agree"))
BSA = factor(BSA, labels = c("Don't agree", "Normal", "Agree"))
BRE = factor(BRE, labels = c("Don't agree", "Normal", "Agree"))
BCO = factor(BCO, labels = c("Don't agree", "Normal", "Agree"))
BIN = factor(BIN, labels = c("Don't agree", "Normal", "Agree"))
BHE = factor(BHE, labels = c("Don't agree", "Normal", "Agree"))
BEN = factor(BEN, labels = c("Don't agree", "Normal", "Agree"))
BRC = factor(BRC, labels = c("Don't agree", "Normal", "Agree"))
BWE = factor(BWE, labels = c("Don't agree", "Normal", "Agree"))
SBM = factor(SBM, labels = c("Don't agree", "Normal", "Agree"))
SBS = factor(SBS, labels = c("Don't agree", "Normal", "Agree"))
BWE = factor(BWE, labels = c("Don't agree", "Normal", "Agree"))
MR = factor(MR, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "Reliability", "others"))
NBR = factor(NBR, labels = c("No Route", "Inconvenience", "Unsafety", "Long waiting time", "Unreliability", "others"))
BUB = factor(BUB, labels = c("No", "Yes"))
GD = factor(GD, labels = c("Female", "Male"))
AG = factor(AG, labels = c("<=24", "25-60", ">60"))
OC = factor(OC, labels = c("Pupils/Students", "Officers/Workers", "Housewife/Unemployed", "Free labor", "Others"))
IN = factor(IN, labels = c("<8 millions", "(8-15) millions", "(15-25) millions", ">25 millions"))
NC = factor(NC, labels = c("No", "Yes"))
MC = factor(MC, labels = c("No", "Yes"))
CC = factor(CC, labels = c("No", "Yes"))
BO = factor(BO, labels = c("No", "Yes"))
MO = factor(MO, labels = c("No", "Yes"))
RO = factor(RO, labels = c("No", "Yes"))
NB = factor(NB, labels = c("None", "1", ">=2"))
NM = factor(NM, labels = c("None", "1", "2", ">=3"))
NR = factor(NR, labels = c("No", "Yes"))
DI = as.numeric(DI)
CO = as.numeric(CO)
TI = as.numeric(TI)
} )
head(BS)
## CA BC TM TP FR DT DI TI SC LS
## 1 No Yes Hichhiking/Taxi Entertainment/Food >=3 times Rush hour 2 10 Yes Yes
## 2 No Yes Motorbike Work/Study >=3 times Rush hour 8 15 Yes Yes
## 3 No Yes Motorbike Work/Study >=3 times Rush hour 5 15 Yes Yes
## 4 No Yes Motorbike Work/Study >=3 times Rush hour 5 10 Yes Yes
## 5 No Yes Motorbike Work/Study >=3 times Rush hour 8 15 Yes Yes
## 6 No Yes Car Work/Study >=3 times Rush hour 20 30 Yes Yes
## TS MR WE WK NBR CO BUB SBM SBS
## 1 No Accessibility No No No Route 15 Yes Normal Normal
## 2 Yes Comfortable No No No Route 5 No Don't agree Don't agree
## 3 Yes Accessibility No No Inconvenience 3 Yes Don't agree Don't agree
## 4 Yes Comfortable No No Long waiting time 10 Yes Don't agree Don't agree
## 5 No Comfortable No No Inconvenience 5 Yes Don't agree Don't agree
## 6 No Reliability No No Inconvenience 50 No Normal Don't agree
## BST BSC BSA BRE BCO BIN BHE BEN
## 1 Normal Agree Agree Agree Agree Normal Agree Agree
## 2 Don't agree Agree Agree Don't agree Don't agree Agree Don't agree Agree
## 3 Don't agree Agree Agree Normal Normal Agree Don't agree Agree
## 4 Normal Agree Agree Agree Agree Agree Normal Normal
## 5 Normal Agree Agree Don't agree Don't agree Normal Don't agree Agree
## 6 Agree Agree Agree Normal Normal Normal Don't agree Agree
## BRC BWE SP BI WI MSI GD AG
## 1 Agree Agree Yes Yes At Bus Stops No Female 25-60
## 2 Agree Normal Don't know No No No Female <=24
## 3 Agree Agree Yes Yes Internet/Facebook No Male <=24
## 4 Normal Agree Yes Yes At Bus Stops No Male <=24
## 5 Agree Agree Don't know No No No Female 25-60
## 6 Agree Agree Don't know No No No Male 25-60
## OC IN NC MC CC BO MO RO NB NM NR
## 1 Housewife/Unemployed (8-15) millions Yes No No No No No 1 2 Yes
## 2 Officers/Workers (15-25) millions Yes Yes No Yes Yes No 1 >=3 No
## 3 Pupils/Students >25 millions No Yes No No Yes No None >=3 No
## 4 Pupils/Students (15-25) millions No Yes No Yes Yes No 1 >=3 No
## 5 Officers/Workers (15-25) millions Yes Yes No No Yes No None 2 No
## 6 Officers/Workers >25 millions Yes Yes Yes Yes Yes Yes 1 2 Yes
str(BS)
## 'data.frame': 724 obs. of 46 variables:
## $ CA : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ BC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ TM : Factor w/ 4 levels "Walk/Bicycle",..: 4 2 2 2 2 3 3 2 2 2 ...
## $ TP : Factor w/ 4 levels "Work/Study","Picking Children",..: 3 1 1 1 1 1 1 1 1 1 ...
## $ FR : Factor w/ 3 levels "Once","twice",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ DT : Factor w/ 2 levels "Normal","Rush hour": 2 2 2 2 2 2 2 2 1 2 ...
## $ DI : num 2 8 5 5 8 20 15 10 12 10 ...
## $ TI : num 10 15 15 10 15 30 20 25 30 25 ...
## $ SC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ LS : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ TS : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 1 2 2 ...
## $ MR : Factor w/ 6 levels "Safety","Comfortable",..: 4 2 4 2 2 5 5 2 2 2 ...
## $ WE : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ WK : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
## $ NBR: Factor w/ 6 levels "No Route","Inconvenience",..: 1 1 2 4 2 2 4 2 1 4 ...
## $ CO : num 15 5 3 10 5 50 40 5 7 7 ...
## $ BUB: Factor w/ 2 levels "No","Yes": 2 1 2 2 2 1 1 2 2 2 ...
## $ SBM: Factor w/ 3 levels "Don't agree",..: 2 1 1 1 1 2 1 2 2 2 ...
## $ SBS: Factor w/ 3 levels "Don't agree",..: 2 1 1 1 1 1 1 1 2 1 ...
## $ BST: Factor w/ 3 levels "Don't agree",..: 2 1 1 2 2 3 3 3 3 1 ...
## $ BSC: Factor w/ 3 levels "Don't agree",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ BSA: Factor w/ 3 levels "Don't agree",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ BRE: Factor w/ 3 levels "Don't agree",..: 3 1 2 3 1 2 1 2 2 2 ...
## $ BCO: Factor w/ 3 levels "Don't agree",..: 3 1 2 3 1 2 1 2 2 1 ...
## $ BIN: Factor w/ 3 levels "Don't agree",..: 2 3 3 3 2 2 2 2 2 3 ...
## $ BHE: Factor w/ 3 levels "Don't agree",..: 3 1 1 2 1 1 1 2 2 1 ...
## $ BEN: Factor w/ 3 levels "Don't agree",..: 3 3 3 2 3 3 3 3 3 3 ...
## $ BRC: Factor w/ 3 levels "Don't agree",..: 3 3 3 2 3 3 3 3 3 3 ...
## $ BWE: Factor w/ 3 levels "Don't agree",..: 3 2 3 3 3 3 3 3 3 3 ...
## $ SP : Factor w/ 3 levels "No","Yes","Don't know": 2 3 2 2 3 3 3 2 2 2 ...
## $ BI : Factor w/ 2 levels "No","Yes": 2 1 2 2 1 1 1 2 2 2 ...
## $ WI : Factor w/ 5 levels "No","At Bus Stops",..: 2 1 3 2 1 1 1 3 4 3 ...
## $ MSI: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
## $ GD : Factor w/ 2 levels "Female","Male": 1 1 2 2 1 2 2 1 1 2 ...
## $ AG : Factor w/ 3 levels "<=24","25-60",..: 2 1 1 1 2 2 2 2 1 2 ...
## $ OC : Factor w/ 5 levels "Pupils/Students",..: 3 2 1 1 2 2 2 4 4 2 ...
## $ IN : Factor w/ 4 levels "<8 millions",..: 2 3 4 3 3 4 4 2 3 3 ...
## $ NC : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 2 2 1 2 1 ...
## $ MC : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ CC : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
## $ BO : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 2 1 1 2 1 ...
## $ MO : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ RO : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
## $ NB : Factor w/ 3 levels "None","1",">=2": 2 2 1 2 1 2 1 1 2 1 ...
## $ NM : Factor w/ 4 levels "None","1","2",..: 3 4 4 4 3 3 3 3 4 4 ...
## $ NR : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 2 2 1 1 1 ...
2. Descriptive analyse
# Descriptive analyse with package "table 1" - sort by MSI
## install.packages("table1", dependencies = T)
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1 (~ GD + AG + OC + IN + NC + MC + CC + BO + MO + RO + NB + NM + NR + TM + TP + FR + DT + TS + SP + BI + WI + LS + SC + BC + WE + WK + CA + BST + BSC + BSA + BRE + BCO + BIN + BHE + BEN + BRC + BWE + MR + NBR + BUB + SBM + SBS + CO + DI + TI| MSI , data = BS)
| No (N=530) |
Yes (N=194) |
Overall (N=724) |
|
|---|---|---|---|
| GD | |||
| Female | 231 (43.6%) | 70 (36.1%) | 301 (41.6%) |
| Male | 299 (56.4%) | 124 (63.9%) | 423 (58.4%) |
| AG | |||
| <=24 | 216 (40.8%) | 106 (54.6%) | 322 (44.5%) |
| 25-60 | 302 (57.0%) | 82 (42.3%) | 384 (53.0%) |
| >60 | 12 (2.3%) | 6 (3.1%) | 18 (2.5%) |
| OC | |||
| Pupils/Students | 166 (31.3%) | 93 (47.9%) | 259 (35.8%) |
| Officers/Workers | 164 (30.9%) | 40 (20.6%) | 204 (28.2%) |
| Housewife/Unemployed | 34 (6.4%) | 6 (3.1%) | 40 (5.5%) |
| Free labor | 135 (25.5%) | 35 (18.0%) | 170 (23.5%) |
| Others | 31 (5.8%) | 20 (10.3%) | 51 (7.0%) |
| IN | |||
| <8 millions | 119 (22.5%) | 36 (18.6%) | 155 (21.4%) |
| (8-15) millions | 214 (40.4%) | 90 (46.4%) | 304 (42.0%) |
| (15-25) millions | 134 (25.3%) | 48 (24.7%) | 182 (25.1%) |
| >25 millions | 63 (11.9%) | 20 (10.3%) | 83 (11.5%) |
| NC | |||
| No | 222 (41.9%) | 81 (41.8%) | 303 (41.9%) |
| Yes | 308 (58.1%) | 113 (58.2%) | 421 (58.1%) |
| MC | |||
| No | 50 (9.4%) | 37 (19.1%) | 87 (12.0%) |
| Yes | 480 (90.6%) | 157 (80.9%) | 637 (88.0%) |
| CC | |||
| No | 413 (77.9%) | 163 (84.0%) | 576 (79.6%) |
| Yes | 117 (22.1%) | 31 (16.0%) | 148 (20.4%) |
| BO | |||
| No | 314 (59.2%) | 119 (61.3%) | 433 (59.8%) |
| Yes | 216 (40.8%) | 75 (38.7%) | 291 (40.2%) |
| MO | |||
| No | 78 (14.7%) | 48 (24.7%) | 126 (17.4%) |
| Yes | 452 (85.3%) | 146 (75.3%) | 598 (82.6%) |
| RO | |||
| No | 444 (83.8%) | 166 (85.6%) | 610 (84.3%) |
| Yes | 86 (16.2%) | 28 (14.4%) | 114 (15.7%) |
| NB | |||
| None | 242 (45.7%) | 82 (42.3%) | 324 (44.8%) |
| 1 | 212 (40.0%) | 91 (46.9%) | 303 (41.9%) |
| >=2 | 76 (14.3%) | 21 (10.8%) | 97 (13.4%) |
| NM | |||
| None | 17 (3.2%) | 3 (1.5%) | 20 (2.8%) |
| 1 | 80 (15.1%) | 24 (12.4%) | 104 (14.4%) |
| 2 | 279 (52.6%) | 90 (46.4%) | 369 (51.0%) |
| >=3 | 154 (29.1%) | 77 (39.7%) | 231 (31.9%) |
| NR | |||
| No | 399 (75.3%) | 147 (75.8%) | 546 (75.4%) |
| Yes | 131 (24.7%) | 47 (24.2%) | 178 (24.6%) |
| TM | |||
| Walk/Bicycle | 45 (8.5%) | 30 (15.5%) | 75 (10.4%) |
| Motorbike | 391 (73.8%) | 125 (64.4%) | 516 (71.3%) |
| Car | 64 (12.1%) | 18 (9.3%) | 82 (11.3%) |
| Hichhiking/Taxi | 30 (5.7%) | 21 (10.8%) | 51 (7.0%) |
| TP | |||
| Work/Study | 337 (63.6%) | 146 (75.3%) | 483 (66.7%) |
| Picking Children | 72 (13.6%) | 13 (6.7%) | 85 (11.7%) |
| Entertainment/Food | 108 (20.4%) | 32 (16.5%) | 140 (19.3%) |
| Others | 13 (2.5%) | 3 (1.5%) | 16 (2.2%) |
| FR | |||
| Once | 35 (6.6%) | 9 (4.6%) | 44 (6.1%) |
| twice | 37 (7.0%) | 9 (4.6%) | 46 (6.4%) |
| >=3 times | 458 (86.4%) | 176 (90.7%) | 634 (87.6%) |
| DT | |||
| Normal | 124 (23.4%) | 41 (21.1%) | 165 (22.8%) |
| Rush hour | 406 (76.6%) | 153 (78.9%) | 559 (77.2%) |
| TS | |||
| No | 252 (47.5%) | 107 (55.2%) | 359 (49.6%) |
| Yes | 278 (52.5%) | 87 (44.8%) | 365 (50.4%) |
| SP | |||
| No | 52 (9.8%) | 12 (6.2%) | 64 (8.8%) |
| Yes | 404 (76.2%) | 175 (90.2%) | 579 (80.0%) |
| Don't know | 74 (14.0%) | 7 (3.6%) | 81 (11.2%) |
| BI | |||
| No | 267 (50.4%) | 50 (25.8%) | 317 (43.8%) |
| Yes | 263 (49.6%) | 144 (74.2%) | 407 (56.2%) |
| WI | |||
| No | 267 (50.4%) | 50 (25.8%) | 317 (43.8%) |
| At Bus Stops | 139 (26.2%) | 66 (34.0%) | 205 (28.3%) |
| Internet/Facebook | 55 (10.4%) | 35 (18.0%) | 90 (12.4%) |
| From Friends | 34 (6.4%) | 28 (14.4%) | 62 (8.6%) |
| Others | 35 (6.6%) | 15 (7.7%) | 50 (6.9%) |
| LS | |||
| No | 106 (20.0%) | 48 (24.7%) | 154 (21.3%) |
| Yes | 424 (80.0%) | 146 (75.3%) | 570 (78.7%) |
| SC | |||
| No | 78 (14.7%) | 18 (9.3%) | 96 (13.3%) |
| Yes | 452 (85.3%) | 176 (90.7%) | 628 (86.7%) |
| BC | |||
| No | 231 (43.6%) | 116 (59.8%) | 347 (47.9%) |
| Yes | 299 (56.4%) | 78 (40.2%) | 377 (52.1%) |
| WE | |||
| No | 523 (98.7%) | 192 (99.0%) | 715 (98.8%) |
| Yes | 7 (1.3%) | 2 (1.0%) | 9 (1.2%) |
| WK | |||
| No | 397 (74.9%) | 150 (77.3%) | 547 (75.6%) |
| Yes | 133 (25.1%) | 44 (22.7%) | 177 (24.4%) |
| CA | |||
| No | 273 (51.5%) | 122 (62.9%) | 395 (54.6%) |
| Yes | 257 (48.5%) | 72 (37.1%) | 329 (45.4%) |
| BST | |||
| Don't agree | 242 (45.7%) | 92 (47.4%) | 334 (46.1%) |
| Normal | 161 (30.4%) | 53 (27.3%) | 214 (29.6%) |
| Agree | 127 (24.0%) | 49 (25.3%) | 176 (24.3%) |
| BSC | |||
| Don't agree | 85 (16.0%) | 30 (15.5%) | 115 (15.9%) |
| Normal | 136 (25.7%) | 32 (16.5%) | 168 (23.2%) |
| Agree | 309 (58.3%) | 132 (68.0%) | 441 (60.9%) |
| BSA | |||
| Don't agree | 55 (10.4%) | 23 (11.9%) | 78 (10.8%) |
| Normal | 147 (27.7%) | 31 (16.0%) | 178 (24.6%) |
| Agree | 328 (61.9%) | 140 (72.2%) | 468 (64.6%) |
| BRE | |||
| Don't agree | 105 (19.8%) | 41 (21.1%) | 146 (20.2%) |
| Normal | 158 (29.8%) | 57 (29.4%) | 215 (29.7%) |
| Agree | 267 (50.4%) | 96 (49.5%) | 363 (50.1%) |
| BCO | |||
| Don't agree | 158 (29.8%) | 66 (34.0%) | 224 (30.9%) |
| Normal | 166 (31.3%) | 57 (29.4%) | 223 (30.8%) |
| Agree | 206 (38.9%) | 71 (36.6%) | 277 (38.3%) |
| BIN | |||
| Don't agree | 135 (25.5%) | 66 (34.0%) | 201 (27.8%) |
| Normal | 223 (42.1%) | 73 (37.6%) | 296 (40.9%) |
| Agree | 172 (32.5%) | 55 (28.4%) | 227 (31.4%) |
| BHE | |||
| Don't agree | 228 (43.0%) | 77 (39.7%) | 305 (42.1%) |
| Normal | 177 (33.4%) | 57 (29.4%) | 234 (32.3%) |
| Agree | 125 (23.6%) | 60 (30.9%) | 185 (25.6%) |
| BEN | |||
| Don't agree | 60 (11.3%) | 22 (11.3%) | 82 (11.3%) |
| Normal | 119 (22.5%) | 34 (17.5%) | 153 (21.1%) |
| Agree | 351 (66.2%) | 138 (71.1%) | 489 (67.5%) |
| BRC | |||
| Don't agree | 62 (11.7%) | 29 (14.9%) | 91 (12.6%) |
| Normal | 95 (17.9%) | 21 (10.8%) | 116 (16.0%) |
| Agree | 373 (70.4%) | 144 (74.2%) | 517 (71.4%) |
| BWE | |||
| Don't agree | 44 (8.3%) | 24 (12.4%) | 68 (9.4%) |
| Normal | 53 (10.0%) | 25 (12.9%) | 78 (10.8%) |
| Agree | 433 (81.7%) | 145 (74.7%) | 578 (79.8%) |
| MR | |||
| Safety | 70 (13.2%) | 19 (9.8%) | 89 (12.3%) |
| Comfortable | 255 (48.1%) | 79 (40.7%) | 334 (46.1%) |
| Low price | 27 (5.1%) | 15 (7.7%) | 42 (5.8%) |
| Accessibility | 117 (22.1%) | 59 (30.4%) | 176 (24.3%) |
| Reliability | 46 (8.7%) | 13 (6.7%) | 59 (8.1%) |
| others | 15 (2.8%) | 9 (4.6%) | 24 (3.3%) |
| NBR | |||
| No Route | 114 (21.5%) | 47 (24.2%) | 161 (22.2%) |
| Inconvenience | 165 (31.1%) | 57 (29.4%) | 222 (30.7%) |
| Unsafety | 12 (2.3%) | 7 (3.6%) | 19 (2.6%) |
| Long waiting time | 159 (30.0%) | 48 (24.7%) | 207 (28.6%) |
| Unreliability | 50 (9.4%) | 19 (9.8%) | 69 (9.5%) |
| others | 30 (5.7%) | 16 (8.2%) | 46 (6.4%) |
| BUB | |||
| No | 227 (42.8%) | 54 (27.8%) | 281 (38.8%) |
| Yes | 303 (57.2%) | 140 (72.2%) | 443 (61.2%) |
| SBM | |||
| Don't agree | 300 (56.6%) | 70 (36.1%) | 370 (51.1%) |
| Normal | 180 (34.0%) | 50 (25.8%) | 230 (31.8%) |
| Agree | 50 (9.4%) | 74 (38.1%) | 124 (17.1%) |
| SBS | |||
| Don't agree | 329 (62.1%) | 93 (47.9%) | 422 (58.3%) |
| Normal | 176 (33.2%) | 57 (29.4%) | 233 (32.2%) |
| Agree | 25 (4.7%) | 44 (22.7%) | 69 (9.5%) |
| CO | |||
| Mean (SD) | 13.4 (20.4) | 13.5 (22.9) | 13.4 (21.0) |
| Median [Min, Max] | 10.0 [0, 300] | 10.0 [0, 250] | 10.0 [0, 300] |
| DI | |||
| Mean (SD) | 6.21 (5.42) | 6.02 (5.48) | 6.16 (5.44) |
| Median [Min, Max] | 5.00 [0.0150, 35.0] | 4.50 [0.0200, 30.0] | 5.00 [0.0150, 35.0] |
| TI | |||
| Mean (SD) | 16.0 (12.9) | 19.1 (21.2) | 16.8 (15.6) |
| Median [Min, Max] | 15.0 [0, 180] | 15.0 [1.00, 180] | 15.0 [0, 180] |
# Descriptive analyse with package "compareGroups" - sort by MSI (hon table1 la cung cap them tri so p. Doi voi bien phan loai, tri so p cung duoc tinh theo Chi-Square)
library(compareGroups)
t = compareGroups(MSI ~ GD + AG + OC + IN + NC + MC + CC + BO + MO + RO + NB + NM + NR + TM + TP + FR + DT + TS + SP + BI + WI + LS + SC + BC + WE + WK + CA + BST + BSC + BSA + BRE + BCO + BIN + BHE + BEN + BRC + BWE + MR + NBR + BUB + SBM + SBS + CO + DI + TI, data = BS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t)
##
## --------Summary descriptives table by 'MSI'---------
##
## __________________________________________________________
## No Yes p.overall
## N=530 N=194
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## GD: 0.084
## Female 231 (43.6%) 70 (36.1%)
## Male 299 (56.4%) 124 (63.9%)
## AG: 0.002
## <=24 216 (40.8%) 106 (54.6%)
## 25-60 302 (57.0%) 82 (42.3%)
## >60 12 (2.26%) 6 (3.09%)
## OC: <0.001
## Pupils/Students 166 (31.3%) 93 (47.9%)
## Officers/Workers 164 (30.9%) 40 (20.6%)
## Housewife/Unemployed 34 (6.42%) 6 (3.09%)
## Free labor 135 (25.5%) 35 (18.0%)
## Others 31 (5.85%) 20 (10.3%)
## IN: 0.465
## <8 millions 119 (22.5%) 36 (18.6%)
## (8-15) millions 214 (40.4%) 90 (46.4%)
## (15-25) millions 134 (25.3%) 48 (24.7%)
## >25 millions 63 (11.9%) 20 (10.3%)
## NC: 1.000
## No 222 (41.9%) 81 (41.8%)
## Yes 308 (58.1%) 113 (58.2%)
## MC: 0.001
## No 50 (9.43%) 37 (19.1%)
## Yes 480 (90.6%) 157 (80.9%)
## CC: 0.090
## No 413 (77.9%) 163 (84.0%)
## Yes 117 (22.1%) 31 (16.0%)
## BO: 0.672
## No 314 (59.2%) 119 (61.3%)
## Yes 216 (40.8%) 75 (38.7%)
## MO: 0.002
## No 78 (14.7%) 48 (24.7%)
## Yes 452 (85.3%) 146 (75.3%)
## RO: 0.637
## No 444 (83.8%) 166 (85.6%)
## Yes 86 (16.2%) 28 (14.4%)
## NB: 0.193
## None 242 (45.7%) 82 (42.3%)
## 1 212 (40.0%) 91 (46.9%)
## >=2 76 (14.3%) 21 (10.8%)
## NM: 0.041
## None 17 (3.21%) 3 (1.55%)
## 1 80 (15.1%) 24 (12.4%)
## 2 279 (52.6%) 90 (46.4%)
## >=3 154 (29.1%) 77 (39.7%)
## NR: 0.970
## No 399 (75.3%) 147 (75.8%)
## Yes 131 (24.7%) 47 (24.2%)
## TM: 0.002
## Walk/Bicycle 45 (8.49%) 30 (15.5%)
## Motorbike 391 (73.8%) 125 (64.4%)
## Car 64 (12.1%) 18 (9.28%)
## Hichhiking/Taxi 30 (5.66%) 21 (10.8%)
## TP: 0.015
## Work/Study 337 (63.6%) 146 (75.3%)
## Picking Children 72 (13.6%) 13 (6.70%)
## Entertainment/Food 108 (20.4%) 32 (16.5%)
## Others 13 (2.45%) 3 (1.55%)
## FR: 0.297
## Once 35 (6.60%) 9 (4.64%)
## twice 37 (6.98%) 9 (4.64%)
## >=3 times 458 (86.4%) 176 (90.7%)
## DT: 0.587
## Normal 124 (23.4%) 41 (21.1%)
## Rush hour 406 (76.6%) 153 (78.9%)
## TS: 0.084
## No 252 (47.5%) 107 (55.2%)
## Yes 278 (52.5%) 87 (44.8%)
## SP: <0.001
## No 52 (9.81%) 12 (6.19%)
## Yes 404 (76.2%) 175 (90.2%)
## Don't know 74 (14.0%) 7 (3.61%)
## BI: <0.001
## No 267 (50.4%) 50 (25.8%)
## Yes 263 (49.6%) 144 (74.2%)
## WI: <0.001
## No 267 (50.4%) 50 (25.8%)
## At Bus Stops 139 (26.2%) 66 (34.0%)
## Internet/Facebook 55 (10.4%) 35 (18.0%)
## From Friends 34 (6.42%) 28 (14.4%)
## Others 35 (6.60%) 15 (7.73%)
## LS: 0.201
## No 106 (20.0%) 48 (24.7%)
## Yes 424 (80.0%) 146 (75.3%)
## SC: 0.074
## No 78 (14.7%) 18 (9.28%)
## Yes 452 (85.3%) 176 (90.7%)
## BC: <0.001
## No 231 (43.6%) 116 (59.8%)
## Yes 299 (56.4%) 78 (40.2%)
## WE: 1.000
## No 523 (98.7%) 192 (99.0%)
## Yes 7 (1.32%) 2 (1.03%)
## WK: 0.568
## No 397 (74.9%) 150 (77.3%)
## Yes 133 (25.1%) 44 (22.7%)
## CA: 0.008
## No 273 (51.5%) 122 (62.9%)
## Yes 257 (48.5%) 72 (37.1%)
## BST: 0.725
## Don't agree 242 (45.7%) 92 (47.4%)
## Normal 161 (30.4%) 53 (27.3%)
## Agree 127 (24.0%) 49 (25.3%)
## BSC: 0.025
## Don't agree 85 (16.0%) 30 (15.5%)
## Normal 136 (25.7%) 32 (16.5%)
## Agree 309 (58.3%) 132 (68.0%)
## BSA: 0.005
## Don't agree 55 (10.4%) 23 (11.9%)
## Normal 147 (27.7%) 31 (16.0%)
## Agree 328 (61.9%) 140 (72.2%)
## BRE: 0.926
## Don't agree 105 (19.8%) 41 (21.1%)
## Normal 158 (29.8%) 57 (29.4%)
## Agree 267 (50.4%) 96 (49.5%)
## BCO: 0.555
## Don't agree 158 (29.8%) 66 (34.0%)
## Normal 166 (31.3%) 57 (29.4%)
## Agree 206 (38.9%) 71 (36.6%)
## BIN: 0.075
## Don't agree 135 (25.5%) 66 (34.0%)
## Normal 223 (42.1%) 73 (37.6%)
## Agree 172 (32.5%) 55 (28.4%)
## BHE: 0.130
## Don't agree 228 (43.0%) 77 (39.7%)
## Normal 177 (33.4%) 57 (29.4%)
## Agree 125 (23.6%) 60 (30.9%)
## BEN: 0.343
## Don't agree 60 (11.3%) 22 (11.3%)
## Normal 119 (22.5%) 34 (17.5%)
## Agree 351 (66.2%) 138 (71.1%)
## BRC: 0.051
## Don't agree 62 (11.7%) 29 (14.9%)
## Normal 95 (17.9%) 21 (10.8%)
## Agree 373 (70.4%) 144 (74.2%)
## BWE: 0.107
## Don't agree 44 (8.30%) 24 (12.4%)
## Normal 53 (10.0%) 25 (12.9%)
## Agree 433 (81.7%) 145 (74.7%)
## MR: 0.054
## Safety 70 (13.2%) 19 (9.79%)
## Comfortable 255 (48.1%) 79 (40.7%)
## Low price 27 (5.09%) 15 (7.73%)
## Accessibility 117 (22.1%) 59 (30.4%)
## Reliability 46 (8.68%) 13 (6.70%)
## others 15 (2.83%) 9 (4.64%)
## NBR: 0.483
## No Route 114 (21.5%) 47 (24.2%)
## Inconvenience 165 (31.1%) 57 (29.4%)
## Unsafety 12 (2.26%) 7 (3.61%)
## Long waiting time 159 (30.0%) 48 (24.7%)
## Unreliability 50 (9.43%) 19 (9.79%)
## others 30 (5.66%) 16 (8.25%)
## BUB: <0.001
## No 227 (42.8%) 54 (27.8%)
## Yes 303 (57.2%) 140 (72.2%)
## SBM: <0.001
## Don't agree 300 (56.6%) 70 (36.1%)
## Normal 180 (34.0%) 50 (25.8%)
## Agree 50 (9.43%) 74 (38.1%)
## SBS: <0.001
## Don't agree 329 (62.1%) 93 (47.9%)
## Normal 176 (33.2%) 57 (29.4%)
## Agree 25 (4.72%) 44 (22.7%)
## CO 13.4 (20.4) 13.5 (22.9) 0.954
## DI 6.21 (5.42) 6.02 (5.48) 0.676
## TI 16.0 (12.9) 19.1 (21.2) 0.063
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05 (25 variables): GD, IN, NC, CC, BO, RO, NB, NR, FR, DT, TS, LS, SC, WE, WK, BST, BRE, BCO, BIN, BHE, BEN, BWE, NBR, CO, DI
t1 = compareGroups(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + BI + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI, data = BS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t1)
##
## --------Summary descriptives table by 'MSI'---------
##
## __________________________________________________________
## No Yes p.overall
## N=530 N=194
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## AG: 0.002
## <=24 216 (40.8%) 106 (54.6%)
## 25-60 302 (57.0%) 82 (42.3%)
## >60 12 (2.26%) 6 (3.09%)
## OC: <0.001
## Pupils/Students 166 (31.3%) 93 (47.9%)
## Officers/Workers 164 (30.9%) 40 (20.6%)
## Housewife/Unemployed 34 (6.42%) 6 (3.09%)
## Free labor 135 (25.5%) 35 (18.0%)
## Others 31 (5.85%) 20 (10.3%)
## MC: 0.001
## No 50 (9.43%) 37 (19.1%)
## Yes 480 (90.6%) 157 (80.9%)
## MO: 0.002
## No 78 (14.7%) 48 (24.7%)
## Yes 452 (85.3%) 146 (75.3%)
## NM: 0.041
## None 17 (3.21%) 3 (1.55%)
## 1 80 (15.1%) 24 (12.4%)
## 2 279 (52.6%) 90 (46.4%)
## >=3 154 (29.1%) 77 (39.7%)
## TM: 0.002
## Walk/Bicycle 45 (8.49%) 30 (15.5%)
## Motorbike 391 (73.8%) 125 (64.4%)
## Car 64 (12.1%) 18 (9.28%)
## Hichhiking/Taxi 30 (5.66%) 21 (10.8%)
## TP: 0.015
## Work/Study 337 (63.6%) 146 (75.3%)
## Picking Children 72 (13.6%) 13 (6.70%)
## Entertainment/Food 108 (20.4%) 32 (16.5%)
## Others 13 (2.45%) 3 (1.55%)
## SP: <0.001
## No 52 (9.81%) 12 (6.19%)
## Yes 404 (76.2%) 175 (90.2%)
## Don't know 74 (14.0%) 7 (3.61%)
## BI: <0.001
## No 267 (50.4%) 50 (25.8%)
## Yes 263 (49.6%) 144 (74.2%)
## WI: <0.001
## No 267 (50.4%) 50 (25.8%)
## At Bus Stops 139 (26.2%) 66 (34.0%)
## Internet/Facebook 55 (10.4%) 35 (18.0%)
## From Friends 34 (6.42%) 28 (14.4%)
## Others 35 (6.60%) 15 (7.73%)
## BC: <0.001
## No 231 (43.6%) 116 (59.8%)
## Yes 299 (56.4%) 78 (40.2%)
## CA: 0.008
## No 273 (51.5%) 122 (62.9%)
## Yes 257 (48.5%) 72 (37.1%)
## BSC: 0.025
## Don't agree 85 (16.0%) 30 (15.5%)
## Normal 136 (25.7%) 32 (16.5%)
## Agree 309 (58.3%) 132 (68.0%)
## BSA: 0.005
## Don't agree 55 (10.4%) 23 (11.9%)
## Normal 147 (27.7%) 31 (16.0%)
## Agree 328 (61.9%) 140 (72.2%)
## BRC: 0.051
## Don't agree 62 (11.7%) 29 (14.9%)
## Normal 95 (17.9%) 21 (10.8%)
## Agree 373 (70.4%) 144 (74.2%)
## MR: 0.054
## Safety 70 (13.2%) 19 (9.79%)
## Comfortable 255 (48.1%) 79 (40.7%)
## Low price 27 (5.09%) 15 (7.73%)
## Accessibility 117 (22.1%) 59 (30.4%)
## Reliability 46 (8.68%) 13 (6.70%)
## others 15 (2.83%) 9 (4.64%)
## BUB: <0.001
## No 227 (42.8%) 54 (27.8%)
## Yes 303 (57.2%) 140 (72.2%)
## SBM: <0.001
## Don't agree 300 (56.6%) 70 (36.1%)
## Normal 180 (34.0%) 50 (25.8%)
## Agree 50 (9.43%) 74 (38.1%)
## SBS: <0.001
## Don't agree 329 (62.1%) 93 (47.9%)
## Normal 176 (33.2%) 57 (29.4%)
## Agree 25 (4.72%) 44 (22.7%)
## TI 16.0 (12.9) 19.1 (21.2) 0.063
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05 (21/25 variables): IN, BO, RO, NB, NR, FR, DT, LS, SC, WE, WK, BST, BRE, BCO, BIN, BHE, BEN, BWE, NBR, CO, DI; Cor>0.7 (BI); remains 4/25 variables p-value to test (GD, NC, CC, TS)
t.c = compareGroups(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI, data = BS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t.c)
##
## --------Summary descriptives table by 'MSI'---------
##
## __________________________________________________________
## No Yes p.overall
## N=530 N=194
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## GD: 0.084
## Female 231 (43.6%) 70 (36.1%)
## Male 299 (56.4%) 124 (63.9%)
## AG: 0.002
## <=24 216 (40.8%) 106 (54.6%)
## 25-60 302 (57.0%) 82 (42.3%)
## >60 12 (2.26%) 6 (3.09%)
## OC: <0.001
## Pupils/Students 166 (31.3%) 93 (47.9%)
## Officers/Workers 164 (30.9%) 40 (20.6%)
## Housewife/Unemployed 34 (6.42%) 6 (3.09%)
## Free labor 135 (25.5%) 35 (18.0%)
## Others 31 (5.85%) 20 (10.3%)
## NC: 1.000
## No 222 (41.9%) 81 (41.8%)
## Yes 308 (58.1%) 113 (58.2%)
## MC: 0.001
## No 50 (9.43%) 37 (19.1%)
## Yes 480 (90.6%) 157 (80.9%)
## CC: 0.090
## No 413 (77.9%) 163 (84.0%)
## Yes 117 (22.1%) 31 (16.0%)
## MO: 0.002
## No 78 (14.7%) 48 (24.7%)
## Yes 452 (85.3%) 146 (75.3%)
## NM: 0.041
## None 17 (3.21%) 3 (1.55%)
## 1 80 (15.1%) 24 (12.4%)
## 2 279 (52.6%) 90 (46.4%)
## >=3 154 (29.1%) 77 (39.7%)
## TM: 0.002
## Walk/Bicycle 45 (8.49%) 30 (15.5%)
## Motorbike 391 (73.8%) 125 (64.4%)
## Car 64 (12.1%) 18 (9.28%)
## Hichhiking/Taxi 30 (5.66%) 21 (10.8%)
## TP: 0.015
## Work/Study 337 (63.6%) 146 (75.3%)
## Picking Children 72 (13.6%) 13 (6.70%)
## Entertainment/Food 108 (20.4%) 32 (16.5%)
## Others 13 (2.45%) 3 (1.55%)
## TS: 0.084
## No 252 (47.5%) 107 (55.2%)
## Yes 278 (52.5%) 87 (44.8%)
## SP: <0.001
## No 52 (9.81%) 12 (6.19%)
## Yes 404 (76.2%) 175 (90.2%)
## Don't know 74 (14.0%) 7 (3.61%)
## WI: <0.001
## No 267 (50.4%) 50 (25.8%)
## At Bus Stops 139 (26.2%) 66 (34.0%)
## Internet/Facebook 55 (10.4%) 35 (18.0%)
## From Friends 34 (6.42%) 28 (14.4%)
## Others 35 (6.60%) 15 (7.73%)
## BC: <0.001
## No 231 (43.6%) 116 (59.8%)
## Yes 299 (56.4%) 78 (40.2%)
## CA: 0.008
## No 273 (51.5%) 122 (62.9%)
## Yes 257 (48.5%) 72 (37.1%)
## BSC: 0.025
## Don't agree 85 (16.0%) 30 (15.5%)
## Normal 136 (25.7%) 32 (16.5%)
## Agree 309 (58.3%) 132 (68.0%)
## BSA: 0.005
## Don't agree 55 (10.4%) 23 (11.9%)
## Normal 147 (27.7%) 31 (16.0%)
## Agree 328 (61.9%) 140 (72.2%)
## BRC: 0.051
## Don't agree 62 (11.7%) 29 (14.9%)
## Normal 95 (17.9%) 21 (10.8%)
## Agree 373 (70.4%) 144 (74.2%)
## MR: 0.054
## Safety 70 (13.2%) 19 (9.79%)
## Comfortable 255 (48.1%) 79 (40.7%)
## Low price 27 (5.09%) 15 (7.73%)
## Accessibility 117 (22.1%) 59 (30.4%)
## Reliability 46 (8.68%) 13 (6.70%)
## others 15 (2.83%) 9 (4.64%)
## BUB: <0.001
## No 227 (42.8%) 54 (27.8%)
## Yes 303 (57.2%) 140 (72.2%)
## SBM: <0.001
## Don't agree 300 (56.6%) 70 (36.1%)
## Normal 180 (34.0%) 50 (25.8%)
## Agree 50 (9.43%) 74 (38.1%)
## SBS: <0.001
## Don't agree 329 (62.1%) 93 (47.9%)
## Normal 176 (33.2%) 57 (29.4%)
## Agree 25 (4.72%) 44 (22.7%)
## TI 16.0 (12.9) 19.1 (21.2) 0.063
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
3. Descriptive analyse by graphs
library(magrittr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract() masks magrittr::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(car)
## Warning: package 'car' was built under R version 4.0.4
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
BS %>%
group_by(GD, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = GD)) +
geom_col(position = "fill") +
xlab("Mode shift intention") +
ylab("Gender") +
ggtitle("Probability of Bus Shift ~ Gender") # Nữ ít có khả năng chuyển đổi hơn
BS %>%
group_by(AG, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = AG)) +
geom_col(position = "fill") +
xlab("Mode shift intention") +
ylab("Age") +
ggtitle("Probability of Bus Shift ~ Age") # Nhóm tuổi lao động ít có khả năng chuyển đổi hơn
BS %>%
group_by(OC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = OC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Occupation") +
ggtitle("Proportion of Bus Shift ~ Occupation") # Học sinh sinh viên có khả năng chuyển đổi nhiều hơn
BS %>%
group_by(NC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = NC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Number of children") +
ggtitle("Proportion of Bus Shift ~ Number of children") # Không có ảnh hưởng đến khả năng chuyển đổi PT
BS %>%
group_by(MC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = MC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Motor Certification") +
ggtitle("Proportion of Bus Shift ~ Motor Certification") # Có bằng lái xe máy ít có khả năng chuyển đổi hơn
BS %>%
group_by(CC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = CC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Car Certification") +
ggtitle("Proportion of Bus Shift ~ Car Certification") # Có bằng lái xe ô tô ít có khả năng chuyển đổi hơn
BS %>%
group_by(MO, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = MO)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Motor Owning") +
ggtitle("Proportion of Bus Shift ~ Motor Owning") # Sở hữu xe máy ít có khả năng chuyển đổi PT hơn
BS %>%
group_by(NM, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = NM)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Number of Motor") +
ggtitle("Proportion of Bus Shift ~ Number of Motor") # Số lượng xe máy trong gia đình sở hữu có xu hướng ảnh hưởng tích cực đến khả năng chuyển đổi (Hơi mâu thuẩn ???)
BS %>%
group_by(TM, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = TM)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Travel Mode") +
ggtitle("Proportion of Bus Shift ~ Travel Mode") # Người sử dụng xe máy và ô tô ít có khả năng chuyển đổi sang xe buýt hơn các đối tượng chọn phương tiện khác
BS %>%
group_by(TP, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = TP)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Travel Purpose") +
ggtitle("Proportion of Bus Shift ~ Travel Purpose") # Mục đích chuyến đi công việc có nhiều khả năng chuyển đổi sang xe buýt hơn
BS %>%
group_by(TS, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = TS)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Temporary Stops") +
ggtitle("Proportion of Bus Shift ~ Temporary Stops") # Có điểm dừng tạm thời trong hành trình chuyến đi sẽ ít có khả năng chuyển đổi PT sang buýt hơn
BS %>%
group_by(SP, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = SP)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Bus Stop Present") +
ggtitle("Proportion of Bus Shift ~ Bus Stop Present") # Gần trạm dừng sẽ buýt có nhiều khả năng chuyển đổi PT sang xe buýt hơn
BS %>%
group_by(BI, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BI)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Bus Information") +
ggtitle("Proportion of Bus Shift ~ Bus Information") # Biết về thông tin hệ thống buýt có nhiều khả năng chuyển đổi PT sang xe buýt hơn
BS %>%
group_by(WI, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = WI)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Way to have bus information") +
ggtitle("Proportion of Bus Shift ~ Way to have bus information")# Bioongs BI
BS %>%
group_by(BC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Bus Condition") +
ggtitle("Proportion of Bus Shift ~ Bus Condition") # Điều kiện trạm dừng (có mái che) tỷ lệ nghich với khả năng chuyển đổi (Mâu thuẩn ??? Kiểm tra)
BS %>%
group_by(CA, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = CA)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Center Area") +
ggtitle("Proportion of Bus Shift ~ Center Area") # Chuyến đi ở khu vực trung tâm ít có khả năng chuyển đổi hơn
BS %>%
group_by(BSC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BSC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Perception of cost saving") +
ggtitle("Proportion of Bus Shift ~ Perception of cost saving") # Cảm nhận xe buýt tiết kiệm chi phí đi lại có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi
BS %>%
group_by(BSA, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BSA)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Perception of Safety") +
ggtitle("Proportion of Bus Shift ~ Perception of Safety") # Cảm nhận xe buýt an toàn có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi
BS %>%
group_by(BRC, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BRC)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Perception of Traffic congestion reduce") +
ggtitle("Proportion of Bus Shift ~ Perception of Traffic congestion reduce") # Cảm nhận xe buýt giảm UTGT có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi
BS %>%
group_by(MR, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = MR)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Mode Choice Reason") +
ggtitle("Proportion of Bus Shift ~ Mode Choice Reason")
BS %>%
group_by(BUB, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = BUB)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Bus use before") +
ggtitle("Proportion of Bus Shift ~ Bus use before") # Trải nghiệm xe buýt có ảnh hưởng tích cực đến khả năng chuyển đổi
BS %>%
group_by(SBM, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = SBM)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Like to shift to bus for main trips") +
ggtitle("Proportion of Bus Shift ~ Like to shift to bus for main trips")
BS %>%
group_by(SBS, MSI) %>%
count() %>%
ggplot(aes(MSI, n, fill = SBS)) +
geom_col(position = "fill") +
xlab("Mode Shift Intention") +
ylab("Like to shift to bus for sub trips") +
ggtitle("Proportion of Bus Shift ~ Like to shift to bus for sub trips")
## Travel Mode Choice ~ Countinuous Variables (Distance, Travel Period and Cost)
ggplot(BS, aes(x=TI, fill = MSI, color = MSI)) +
geom_histogram (position = "dodge") +
xlab("Travel Period") +
ylab("Count") +
ggtitle("Histogram of Travel Time")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(BS, aes(x=DI, fill = MSI, color = MSI)) +
geom_histogram (position = "dodge") +
xlab("Distance") +
ylab("Count") +
ggtitle("Histogram of Distance")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(BS, aes(x=CO, fill = MSI, color = MSI)) +
geom_histogram (position = "dodge") +
xlab("Cost") +
ylab("Count") +
ggtitle("Histogram of Cost")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Statistic Analysis by boxplot (continuous variales)
BS %>%
group_by(DI, MSI) %>%
count() %>%
ggplot(aes(x = MSI, y = DI, fill = MSI)) +
geom_boxplot() +
xlab("Mode Shift to Bus") +
ylab("Distance") +
ggtitle("Boxplot of Bus Shift ~ Distance")
BS %>%
group_by(CO, MSI) %>%
count() %>%
ggplot(aes(x = MSI, y = CO, fill = MSI)) +
geom_boxplot() +
xlab("Mode Shift to Bus") +
ylab("Cost") +
ggtitle("Boxplot of Bus Shift ~ Cost")
BS %>%
group_by(TI, MSI) %>%
count() %>%
ggplot(aes(x = MSI, y = TI, fill = MSI)) +
geom_boxplot() +
xlab("Mode Shift to Bus") +
ylab("Travel Period") +
ggtitle("Boxplot of Bus Shift ~ Travel Period")
## Correlation in continuous variables
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
CVMSI = data.frame(BS$MSI, BS$DI, BS$TI, BS$CO)
pairs.panels(CVMSI)
summary(BS)
## CA BC TM TP
## No :395 No :347 Walk/Bicycle : 75 Work/Study :483
## Yes:329 Yes:377 Motorbike :516 Picking Children : 85
## Car : 82 Entertainment/Food:140
## Hichhiking/Taxi: 51 Others : 16
##
##
## FR DT DI TI SC
## Once : 44 Normal :165 Min. : 0.015 Min. : 0.00 No : 96
## twice : 46 Rush hour:559 1st Qu.: 2.000 1st Qu.: 10.00 Yes:628
## >=3 times:634 Median : 5.000 Median : 15.00
## Mean : 6.159 Mean : 16.84
## 3rd Qu.:10.000 3rd Qu.: 20.00
## Max. :35.000 Max. :180.00
## LS TS MR WE WK
## No :154 No :359 Safety : 89 No :715 No :547
## Yes:570 Yes:365 Comfortable :334 Yes: 9 Yes:177
## Low price : 42
## Accessibility:176
## Reliability : 59
## others : 24
## NBR CO BUB SBM
## No Route :161 Min. : 0.0 No :281 Don't agree:370
## Inconvenience :222 1st Qu.: 4.0 Yes:443 Normal :230
## Unsafety : 19 Median : 10.0 Agree :124
## Long waiting time:207 Mean : 13.4
## Unreliability : 69 3rd Qu.: 15.0
## others : 46 Max. :300.0
## SBS BST BSC BSA
## Don't agree:422 Don't agree:334 Don't agree:115 Don't agree: 78
## Normal :233 Normal :214 Normal :168 Normal :178
## Agree : 69 Agree :176 Agree :441 Agree :468
##
##
##
## BRE BCO BIN BHE
## Don't agree:146 Don't agree:224 Don't agree:201 Don't agree:305
## Normal :215 Normal :223 Normal :296 Normal :234
## Agree :363 Agree :277 Agree :227 Agree :185
##
##
##
## BEN BRC BWE SP
## Don't agree: 82 Don't agree: 91 Don't agree: 68 No : 64
## Normal :153 Normal :116 Normal : 78 Yes :579
## Agree :489 Agree :517 Agree :578 Don't know: 81
##
##
##
## BI WI MSI GD AG
## No :317 No :317 No :530 Female:301 <=24 :322
## Yes:407 At Bus Stops :205 Yes:194 Male :423 25-60:384
## Internet/Facebook: 90 >60 : 18
## From Friends : 62
## Others : 50
##
## OC IN NC MC
## Pupils/Students :259 <8 millions :155 No :303 No : 87
## Officers/Workers :204 (8-15) millions :304 Yes:421 Yes:637
## Housewife/Unemployed: 40 (15-25) millions:182
## Free labor :170 >25 millions : 83
## Others : 51
##
## CC BO MO RO NB NM NR
## No :576 No :433 No :126 No :610 None:324 None: 20 No :546
## Yes:148 Yes:291 Yes:598 Yes:114 1 :303 1 :104 Yes:178
## >=2 : 97 2 :369
## >=3 :231
##
##
# Correlations between MSI and variables of Group 1 (Individual Characteristics)
Cor1 = data.frame(BS$MSI, BS$GD, BS$AG, BS$OC, BS$IN, BS$NC, BS$MC, BS$CC, BS$BO, BS$MO, BS$RO, BS$NB, BS$NM, BS$NR)
pairs.panels(Cor1)
Cor1_1 = data.frame(BS$MSI, BS$GD, BS$AG, BS$OC, BS$IN, BS$NC)
pairs.panels(Cor1_1)
Cor1_2 = data.frame(BS$MSI, BS$MC, BS$CC, BS$BO, BS$MO, BS$RO, BS$NB, BS$NM, BS$NR)
pairs.panels(Cor1_2)
# Correlation between MSI and variables of Group 2 (Trip Characteristics)
Cor2 = data.frame(BS$MSI, BS$TM, BS$TP, BS$FR, BS$DT, BS$TS, BS$CO, BS$DI)
pairs.panels(Cor2)
# Correlation between MSI and variables of Group 3 (Infrastructures)
Cor3 = data.frame(BS$MSI, BS$SP, BS$BI, BS$WI, BS$LS, BS$SC, BS$BC, BS$TI)
pairs.panels(Cor3)
# Correlation between MSI and variables of Group 4 (Surrouding Environment) and 5 (Others)
Cor45 = data.frame(BS$MSI, BS$WE, BS$WK, BS$CA, BS$MR, BS$NBR, BS$BUB, BS$SBM, BS$SBS)
pairs.panels(Cor45)
# Correlation between MSI and variables of Group 6 (Perception of Bus)
Cor6 = data.frame(BS$MSI, BS$BST, BS$BSC, BS$BSA, BS$BRE, BS$BCO, BS$BIN, BS$BHE, BS$BEN, BS$BRC, BS$BWE)
pairs.panels(Cor6)
4. Binary Model
# Model with multi-variables (remove variables with p-value>0,05 cor>0.7, in t.c) --> 23 variables
mg = glm(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI, family = binomial, data = BS)
mg
##
## Call: glm(formula = MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM +
## TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB +
## SBM + SBS + TI, family = binomial, data = BS)
##
## Coefficients:
## (Intercept) GDMale AG25-60
## -0.55298 0.30378 0.40819
## AG>60 OCOfficers/Workers OCHousewife/Unemployed
## 1.48900 -0.64592 -0.44226
## OCFree labor OCOthers NCYes
## -0.98629 -0.01543 0.21170
## MCYes CCYes MOYes
## -0.84386 0.13362 -0.22179
## NM1 NM2 NM>=3
## 0.75807 0.47658 1.05291
## TMMotorbike TMCar TMHichhiking/Taxi
## -0.09731 0.23282 0.36714
## TPPicking Children TPEntertainment/Food TPOthers
## -1.27376 -0.87081 -1.27200
## TSYes SPYes SPDon't know
## -0.27580 -0.19116 -1.17003
## WIAt Bus Stops WIInternet/Facebook WIFrom Friends
## 0.72940 1.41480 0.98769
## WIOthers BCYes CAYes
## 0.82602 -0.79790 -0.78183
## BSCNormal BSCAgree BSANormal
## -0.27922 -0.21726 -0.57199
## BSAAgree BRCNormal BRCAgree
## 0.09555 -0.43554 -0.40906
## MRComfortable MRLow price MRAccessibility
## -0.02688 0.84610 0.76702
## MRReliability MRothers BUBYes
## -0.19950 0.19721 -0.04493
## SBMNormal SBMAgree SBSNormal
## 0.34803 1.96718 -0.02342
## SBSAgree TI
## 0.76157 0.01238
##
## Degrees of Freedom: 723 Total (i.e. Null); 677 Residual
## Null Deviance: 841.6
## Residual Deviance: 612.3 AIC: 706.3
summary(mg)
##
## Call:
## glm(formula = MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM +
## TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB +
## SBM + SBS + TI, family = binomial, data = BS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4126 -0.6578 -0.3798 0.4445 3.0253
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.552975 1.040959 -0.531 0.595268
## GDMale 0.303781 0.233306 1.302 0.192891
## AG25-60 0.408189 0.417731 0.977 0.328491
## AG>60 1.488998 0.819418 1.817 0.069196 .
## OCOfficers/Workers -0.645925 0.443333 -1.457 0.145124
## OCHousewife/Unemployed -0.442259 0.686302 -0.644 0.519311
## OCFree labor -0.986294 0.436473 -2.260 0.023841 *
## OCOthers -0.015426 0.577683 -0.027 0.978697
## NCYes 0.211699 0.232343 0.911 0.362217
## MCYes -0.843858 0.426952 -1.976 0.048101 *
## CCYes 0.133620 0.358990 0.372 0.709736
## MOYes -0.221792 0.377558 -0.587 0.556910
## NM1 0.758073 0.828629 0.915 0.360269
## NM2 0.476576 0.800777 0.595 0.551749
## NM>=3 1.052914 0.801632 1.313 0.189027
## TMMotorbike -0.097308 0.435607 -0.223 0.823235
## TMCar 0.232824 0.587499 0.396 0.691887
## TMHichhiking/Taxi 0.367141 0.500856 0.733 0.463541
## TPPicking Children -1.273759 0.431232 -2.954 0.003139 **
## TPEntertainment/Food -0.870810 0.290887 -2.994 0.002757 **
## TPOthers -1.272000 0.912520 -1.394 0.163335
## TSYes -0.275798 0.213677 -1.291 0.196800
## SPYes -0.191164 0.455921 -0.419 0.675003
## SPDon't know -1.170028 0.598844 -1.954 0.050724 .
## WIAt Bus Stops 0.729396 0.308264 2.366 0.017975 *
## WIInternet/Facebook 1.414802 0.366220 3.863 0.000112 ***
## WIFrom Friends 0.987690 0.400673 2.465 0.013698 *
## WIOthers 0.826023 0.459715 1.797 0.072365 .
## BCYes -0.797902 0.232726 -3.429 0.000607 ***
## CAYes -0.781828 0.235347 -3.322 0.000894 ***
## BSCNormal -0.279221 0.406630 -0.687 0.492289
## BSCAgree -0.217257 0.368406 -0.590 0.555378
## BSANormal -0.571989 0.445914 -1.283 0.199585
## BSAAgree 0.095555 0.413196 0.231 0.817114
## BRCNormal -0.435540 0.431687 -1.009 0.313011
## BRCAgree -0.409063 0.342847 -1.193 0.232817
## MRComfortable -0.026879 0.395853 -0.068 0.945864
## MRLow price 0.846104 0.558206 1.516 0.129581
## MRAccessibility 0.767021 0.438945 1.747 0.080565 .
## MRReliability -0.199500 0.526415 -0.379 0.704704
## MRothers 0.197207 0.671097 0.294 0.768866
## BUBYes -0.044926 0.236129 -0.190 0.849106
## SBMNormal 0.348029 0.290242 1.199 0.230489
## SBMAgree 1.967184 0.335713 5.860 4.64e-09 ***
## SBSNormal -0.023419 0.278773 -0.084 0.933051
## SBSAgree 0.761571 0.387746 1.964 0.049519 *
## TI 0.012377 0.007423 1.667 0.095445 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 612.33 on 677 degrees of freedom
## AIC: 706.33
##
## Number of Fisher Scoring iterations: 5
## Remove insignificant variables: Occupation (19), Number.of.Children (21), Car.Certificate (23), Car.Owning (26), Number.of.Motors (28), Number.of.Car (29), Distance (6) --> 12 variables
#mg1 = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
## Way 2: Use function lmr in package "rms" - library(rms) -ml = lrm(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI)
# Find Parsimonious model - Tìm mô hình tối ưu - GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.5-5)
xvars <- BS[, c(34,35,36,38,39,40,42,45,3,4,11,30,32,2,1,21,22,28,12, 17, 18, 19, 8)]
y <- BS$MSI
bma.search <- bic.glm (xvars, y, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search)
##
## Call:
## bic.glm.data.frame(x = xvars, y = y, glm.family = "binomial", strict = F, OR = 20)
##
##
## 7 models were selected
## Best 5 models (cumulative posterior probability = 0.9391 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -0.3451553 0.392538 -2.548e-01 -5.234e-01
## GD 5.2
## .Male 0.0163310 0.083048 . .
## AG 0.0
## .25-60 0.0000000 0.000000 . .
## .>60 0.0000000 0.000000 . .
## OC 0.0
## .Officers/Workers 0.0000000 0.000000 . .
## .Housewife/Unemployed 0.0000000 0.000000 . .
## .Free labor 0.0000000 0.000000 . .
## .Others 0.0000000 0.000000 . .
## NC 0.0
## .Yes 0.0000000 0.000000 . .
## MC 96.7
## .Yes -1.0981072 0.350068 -1.111e+00 -1.190e+00
## CC 0.0
## .Yes 0.0000000 0.000000 . .
## MO 6.2
## .Yes -0.0360873 0.164014 . .
## NM 0.0
## .1 0.0000000 0.000000 . .
## .2 0.0000000 0.000000 . .
## .>=3 0.0000000 0.000000 . .
## TM 0.0
## .Motorbike 0.0000000 0.000000 . .
## .Car 0.0000000 0.000000 . .
## .Hichhiking/Taxi 0.0000000 0.000000 . .
## TP 0.0
## .Picking Children 0.0000000 0.000000 . .
## .Entertainment/Food 0.0000000 0.000000 . .
## .Others 0.0000000 0.000000 . .
## TS 10.6
## .Yes -0.0405234 0.133809 . .
## SP 0.0
## .Yes 0.0000000 0.000000 . .
## .Don't know 0.0000000 0.000000 . .
## WI 100.0
## .At Bus Stops 0.6996884 0.244735 6.713e-01 7.465e-01
## .Internet/Facebook 1.3859142 0.292753 1.351e+00 1.429e+00
## .From Friends 1.1013938 0.330802 1.068e+00 1.156e+00
## .Others 0.6575057 0.390179 6.257e-01 6.780e-01
## BC 100.0
## .Yes -0.8550115 0.199569 -8.642e-01 -8.458e-01
## CA 100.0
## .Yes -0.8789633 0.207461 -8.995e-01 -8.335e-01
## BSC 0.0
## .Normal 0.0000000 0.000000 . .
## .Agree 0.0000000 0.000000 . .
## BSA 3.2
## .Normal -0.0210880 0.132381 . .
## .Agree 0.0003411 0.054078 . .
## BRC 0.0
## .Normal 0.0000000 0.000000 . .
## .Agree 0.0000000 0.000000 . .
## MR 0.0
## .Comfortable 0.0000000 0.000000 . .
## .Low price 0.0000000 0.000000 . .
## .Accessibility 0.0000000 0.000000 . .
## .Reliability 0.0000000 0.000000 . .
## .others 0.0000000 0.000000 . .
## BUB 0.0
## .Yes 0.0000000 0.000000 . .
## SBM 100.0
## .Normal 0.1316721 0.221303 1.283e-01 1.210e-01
## .Agree 1.9435843 0.254496 1.948e+00 1.943e+00
## SBS 0.0
## .Normal 0.0000000 0.000000 . .
## .Agree 0.0000000 0.000000 . .
## TI 35.1 0.0052631 0.007962 . 1.499e-02
##
## nVar 5 6
## BIC -4.009e+03 -4.009e+03
## post prob 0.397 0.351
## model 3 model 4 model 5
## Intercept -8.546e-02 -4.498e-01 -6.185e-01
## GD
## .Male . 3.133e-01 .
## AG
## .25-60 . . .
## .>60 . . .
## OC
## .Officers/Workers . . .
## .Housewife/Unemployed . . .
## .Free labor . . .
## .Others . . .
## NC
## .Yes . . .
## MC
## .Yes -1.135e+00 -1.125e+00 .
## CC
## .Yes . . .
## MO
## .Yes . . -7.785e-01
## NM
## .1 . . .
## .2 . . .
## .>=3 . . .
## TM
## .Motorbike . . .
## .Car . . .
## .Hichhiking/Taxi . . .
## TP
## .Picking Children . . .
## .Entertainment/Food . . .
## .Others . . .
## TS
## .Yes -3.840e-01 . .
## SP
## .Yes . . .
## .Don't know . . .
## WI
## .At Bus Stops 6.916e-01 6.768e-01 6.772e-01
## .Internet/Facebook 1.417e+00 1.343e+00 1.342e+00
## .From Friends 1.059e+00 1.089e+00 1.182e+00
## .Others 7.489e-01 6.735e-01 5.862e-01
## BC
## .Yes -8.485e-01 -8.256e-01 -8.283e-01
## CA
## .Yes -9.045e-01 -9.303e-01 -8.677e-01
## BSC
## .Normal . . .
## .Agree . . .
## BSA
## .Normal . . .
## .Agree . . .
## BRC
## .Normal . . .
## .Agree . . .
## MR
## .Comfortable . . .
## .Low price . . .
## .Accessibility . . .
## .Reliability . . .
## .others . . .
## BUB
## .Yes . . .
## SBM
## .Normal 1.500e-01 1.546e-01 1.288e-01
## .Agree 1.959e+00 1.947e+00 1.899e+00
## SBS
## .Normal . . .
## .Agree . . .
## TI . . .
##
## nVar 6 6 5
## BIC -4.007e+03 -4.005e+03 -4.005e+03
## post prob 0.106 0.052 0.033
# Remove ns variables (GD, NC, CC, TS) and change DI (7) by TI (8) --> 19 variables
xvars1 <- BS[, c(35,36,39,42,45,3,4,30,32,2,1,21,22,28,12, 17, 18, 19, 7)]
y1 <- BS$MSI
bma.search1 <- bic.glm (xvars1, y1, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search1)
##
## Call:
## bic.glm.data.frame(x = xvars1, y = y1, glm.family = "binomial", strict = F, OR = 20)
##
##
## 3 models were selected
## Best 3 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -0.2687921 0.36471 -2.548e-01 -6.185e-01
## AG 0.0
## .25-60 0.0000000 0.00000 . .
## .>60 0.0000000 0.00000 . .
## OC 0.0
## .Officers/Workers 0.0000000 0.00000 . .
## .Housewife/Unemployed 0.0000000 0.00000 . .
## .Free labor 0.0000000 0.00000 . .
## .Others 0.0000000 0.00000 . .
## MC 92.8
## .Yes -1.0312792 0.39381 -1.111e+00 .
## MO 7.2
## .Yes -0.0558463 0.21086 . -7.785e-01
## NM 0.0
## .1 0.0000000 0.00000 . .
## .2 0.0000000 0.00000 . .
## .>=3 0.0000000 0.00000 . .
## TM 0.0
## .Motorbike 0.0000000 0.00000 . .
## .Car 0.0000000 0.00000 . .
## .Hichhiking/Taxi 0.0000000 0.00000 . .
## TP 0.0
## .Picking Children 0.0000000 0.00000 . .
## .Entertainment/Food 0.0000000 0.00000 . .
## .Others 0.0000000 0.00000 . .
## SP 0.0
## .Yes 0.0000000 0.00000 . .
## .Don't know 0.0000000 0.00000 . .
## WI 100.0
## .At Bus Stops 0.6702762 0.24048 6.713e-01 6.772e-01
## .Internet/Facebook 1.3523836 0.28913 1.351e+00 1.342e+00
## .From Friends 1.0733555 0.32789 1.068e+00 1.182e+00
## .Others 0.6239739 0.38837 6.257e-01 5.862e-01
## BC 100.0
## .Yes -0.8654120 0.19908 -8.642e-01 -8.283e-01
## CA 100.0
## .Yes -0.8998859 0.20415 -8.995e-01 -8.677e-01
## BSC 0.0
## .Normal 0.0000000 0.00000 . .
## .Agree 0.0000000 0.00000 . .
## BSA 6.8
## .Normal -0.0456443 0.19186 . .
## .Agree 0.0007384 0.07956 . .
## BRC 0.0
## .Normal 0.0000000 0.00000 . .
## .Agree 0.0000000 0.00000 . .
## MR 0.0
## .Comfortable 0.0000000 0.00000 . .
## .Low price 0.0000000 0.00000 . .
## .Accessibility 0.0000000 0.00000 . .
## .Reliability 0.0000000 0.00000 . .
## .others 0.0000000 0.00000 . .
## BUB 0.0
## .Yes 0.0000000 0.00000 . .
## SBM 100.0
## .Normal 0.1332786 0.22109 1.283e-01 1.288e-01
## .Agree 1.9406127 0.25385 1.948e+00 1.899e+00
## SBS 0.0
## .Normal 0.0000000 0.00000 . .
## .Agree 0.0000000 0.00000 . .
## DI 0.0 0.0000000 0.00000 . .
##
## nVar 5 5
## BIC -4.009e+03 -4.005e+03
## post prob 0.860 0.072
## model 3
## Intercept -7.800e-02
## AG
## .25-60 .
## .>60 .
## OC
## .Officers/Workers .
## .Housewife/Unemployed .
## .Free labor .
## .Others .
## MC
## .Yes -1.108e+00
## MO
## .Yes .
## NM
## .1 .
## .2 .
## .>=3 .
## TM
## .Motorbike .
## .Car .
## .Hichhiking/Taxi .
## TP
## .Picking Children .
## .Entertainment/Food .
## .Others .
## SP
## .Yes .
## .Don't know .
## WI
## .At Bus Stops 6.502e-01
## .Internet/Facebook 1.377e+00
## .From Friends 1.026e+00
## .Others 6.418e-01
## BC
## .Yes -9.198e-01
## CA
## .Yes -9.378e-01
## BSC
## .Normal .
## .Agree .
## BSA
## .Normal -6.665e-01
## .Agree 1.078e-02
## BRC
## .Normal .
## .Agree .
## MR
## .Comfortable .
## .Low price .
## .Accessibility .
## .Reliability .
## .others .
## BUB
## .Yes .
## SBM
## .Normal 2.000e-01
## .Agree 1.888e+00
## SBS
## .Normal .
## .Agree .
## DI .
##
## nVar 6
## BIC -4.004e+03
## post prob 0.068
#imageplot.bma(bma.search1)
## Model 1 from bma.search1 : Remove ns variables (GD, NC, CC, TS) and change DI (7) by TI (8) --> 19 variables
mg1 = glm(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial, data = BS)
mg1
##
## Call: glm(formula = MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI +
## BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial,
## data = BS)
##
## Coefficients:
## (Intercept) AG25-60 AG>60
## -0.460888 0.509909 1.600156
## OCOfficers/Workers OCHousewife/Unemployed OCFree labor
## -0.601602 -0.672492 -1.067818
## OCOthers MCYes MOYes
## 0.145226 -0.803585 -0.142332
## NM1 NM2 NM>=3
## 0.797740 0.584359 1.153470
## TMMotorbike TMCar TMHichhiking/Taxi
## 0.006570 0.810212 0.504619
## TPPicking Children TPEntertainment/Food TPOthers
## -1.452278 -0.855917 -1.357762
## SPYes SPDon't know WIAt Bus Stops
## -0.040310 -1.119875 0.622671
## WIInternet/Facebook WIFrom Friends WIOthers
## 1.294873 0.905878 0.670218
## BCYes CAYes BSCNormal
## -0.888957 -0.915710 -0.227395
## BSCAgree BSANormal BSAAgree
## -0.194065 -0.596242 0.096531
## BRCNormal BRCAgree MRComfortable
## -0.343956 -0.263950 0.052656
## MRLow price MRAccessibility MRReliability
## 0.969812 0.804047 -0.097818
## MRothers BUBYes SBMNormal
## 0.231989 -0.050910 0.252138
## SBMAgree SBSNormal SBSAgree
## 1.963412 0.008038 0.775681
## DI
## -0.032412
##
## Degrees of Freedom: 723 Total (i.e. Null); 681 Residual
## Null Deviance: 841.6
## Residual Deviance: 618.4 AIC: 704.4
summary(mg1)
##
## Call:
## glm(formula = MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI +
## BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial,
## data = BS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6346 -0.6513 -0.3886 0.4517 3.0879
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.460888 1.014052 -0.455 0.649468
## AG25-60 0.509909 0.408317 1.249 0.211735
## AG>60 1.600156 0.809458 1.977 0.048062 *
## OCOfficers/Workers -0.601602 0.444097 -1.355 0.175525
## OCHousewife/Unemployed -0.672492 0.670571 -1.003 0.315927
## OCFree labor -1.067818 0.435640 -2.451 0.014240 *
## OCOthers 0.145226 0.567793 0.256 0.798126
## MCYes -0.803585 0.424113 -1.895 0.058127 .
## MOYes -0.142332 0.381606 -0.373 0.709162
## NM1 0.797740 0.828571 0.963 0.335653
## NM2 0.584359 0.799689 0.731 0.464942
## NM>=3 1.153470 0.800814 1.440 0.149762
## TMMotorbike 0.006570 0.439137 0.015 0.988063
## TMCar 0.810212 0.563283 1.438 0.150328
## TMHichhiking/Taxi 0.504619 0.497789 1.014 0.310716
## TPPicking Children -1.452278 0.431442 -3.366 0.000762 ***
## TPEntertainment/Food -0.855917 0.284101 -3.013 0.002589 **
## TPOthers -1.357762 0.918309 -1.479 0.139262
## SPYes -0.040310 0.446021 -0.090 0.927988
## SPDon't know -1.119875 0.598265 -1.872 0.061224 .
## WIAt Bus Stops 0.622671 0.304066 2.048 0.040578 *
## WIInternet/Facebook 1.294873 0.357963 3.617 0.000298 ***
## WIFrom Friends 0.905878 0.394216 2.298 0.021566 *
## WIOthers 0.670218 0.463461 1.446 0.148145
## BCYes -0.888957 0.231589 -3.839 0.000124 ***
## CAYes -0.915710 0.236190 -3.877 0.000106 ***
## BSCNormal -0.227395 0.404108 -0.563 0.573633
## BSCAgree -0.194065 0.362292 -0.536 0.592195
## BSANormal -0.596242 0.443520 -1.344 0.178838
## BSAAgree 0.096531 0.408890 0.236 0.813369
## BRCNormal -0.343956 0.431132 -0.798 0.424988
## BRCAgree -0.263950 0.341203 -0.774 0.439175
## MRComfortable 0.052656 0.393148 0.134 0.893455
## MRLow price 0.969812 0.556347 1.743 0.081302 .
## MRAccessibility 0.804047 0.436812 1.841 0.065663 .
## MRReliability -0.097818 0.520557 -0.188 0.850948
## MRothers 0.231989 0.656438 0.353 0.723784
## BUBYes -0.050910 0.235854 -0.216 0.829101
## SBMNormal 0.252138 0.286773 0.879 0.379279
## SBMAgree 1.963412 0.329928 5.951 2.66e-09 ***
## SBSNormal 0.008038 0.273902 0.029 0.976590
## SBSAgree 0.775681 0.384369 2.018 0.043585 *
## DI -0.032412 0.022708 -1.427 0.153481
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 618.39 on 681 degrees of freedom
## AIC: 704.39
##
## Number of Fisher Scoring iterations: 5
## CHON: Model 2 --> 5 variables (bma.search1) - MC, WI, BC, CA, SBM + some variables significant and choose by AIC
mg2 = glm(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial, data = BS)
mg2
##
## Call: glm(formula = MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial,
## data = BS)
##
## Coefficients:
## (Intercept) OCOfficers/Workers OCHousewife/Unemployed
## 0.04216 -0.31020 -0.29222
## OCFree labor OCOthers MCYes
## -0.66363 0.91856 -0.85258
## TPPicking Children TPEntertainment/Food TPOthers
## -1.05006 -0.80345 -1.11305
## SPYes SPDon't know WIAt Bus Stops
## -0.02669 -1.10368 0.60130
## WIInternet/Facebook WIFrom Friends WIOthers
## 1.18500 0.98890 0.60877
## BCYes CAYes SBMNormal
## -0.81650 -0.92689 0.18031
## SBMAgree
## 2.13985
##
## Degrees of Freedom: 723 Total (i.e. Null); 705 Residual
## Null Deviance: 841.6
## Residual Deviance: 656.2 AIC: 694.2
summary(mg2)
##
## Call:
## glm(formula = MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial,
## data = BS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0246 -0.7121 -0.4431 0.5256 2.7444
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.04216 0.48436 0.087 0.930642
## OCOfficers/Workers -0.31020 0.27507 -1.128 0.259441
## OCHousewife/Unemployed -0.29222 0.52541 -0.556 0.578092
## OCFree labor -0.66363 0.28470 -2.331 0.019753 *
## OCOthers 0.91856 0.39809 2.307 0.021032 *
## MCYes -0.85258 0.31772 -2.683 0.007286 **
## TPPicking Children -1.05006 0.38954 -2.696 0.007026 **
## TPEntertainment/Food -0.80345 0.26544 -3.027 0.002471 **
## TPOthers -1.11305 0.81779 -1.361 0.173497
## SPYes -0.02669 0.41726 -0.064 0.948998
## SPDon't know -1.10368 0.55322 -1.995 0.046040 *
## WIAt Bus Stops 0.60130 0.28481 2.111 0.034751 *
## WIInternet/Facebook 1.18500 0.33140 3.576 0.000349 ***
## WIFrom Friends 0.98890 0.36275 2.726 0.006408 **
## WIOthers 0.60877 0.42965 1.417 0.156513
## BCYes -0.81650 0.20905 -3.906 9.39e-05 ***
## CAYes -0.92689 0.21197 -4.373 1.23e-05 ***
## SBMNormal 0.18031 0.22877 0.788 0.430593
## SBMAgree 2.13985 0.27266 7.848 4.22e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 656.17 on 705 degrees of freedom
## AIC: 694.17
##
## Number of Fisher Scoring iterations: 5
## Calculate OR (odd ratio)
exp(mg2$coefficients)
## (Intercept) OCOfficers/Workers OCHousewife/Unemployed
## 1.0430586 0.7332997 0.7466038
## OCFree labor OCOthers MCYes
## 0.5149797 2.5056898 0.4263137
## TPPicking Children TPEntertainment/Food TPOthers
## 0.3499178 0.4477802 0.3285540
## SPYes SPDon't know WIAt Bus Stops
## 0.9736630 0.3316474 1.8244835
## WIInternet/Facebook WIFrom Friends WIOthers
## 3.2706852 2.6882733 1.8381742
## BCYes CAYes SBMNormal
## 0.4419760 0.3957832 1.1975866
## SBMAgree
## 8.4981677
exp(coef(mg2))
## (Intercept) OCOfficers/Workers OCHousewife/Unemployed
## 1.0430586 0.7332997 0.7466038
## OCFree labor OCOthers MCYes
## 0.5149797 2.5056898 0.4263137
## TPPicking Children TPEntertainment/Food TPOthers
## 0.3499178 0.4477802 0.3285540
## SPYes SPDon't know WIAt Bus Stops
## 0.9736630 0.3316474 1.8244835
## WIInternet/Facebook WIFrom Friends WIOthers
## 3.2706852 2.6882733 1.8381742
## BCYes CAYes SBMNormal
## 0.4419760 0.3957832 1.1975866
## SBMAgree
## 8.4981677
library(epiDisplay)
## Loading required package: foreign
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
##
## alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
##
## alpha
logistic.display(mg2)
##
## Logistic regression predicting MSI : Yes vs No
##
## crude OR(95%CI) adj. OR(95%CI) P(Wald's test)
## OC: ref.=Pupils/Students
## Officers/Workers 0.44 (0.28,0.67) 0.73 (0.43,1.26) 0.259
## Housewife/Unemployed 0.31 (0.13,0.78) 0.75 (0.27,2.09) 0.578
## Free labor 0.46 (0.3,0.73) 0.51 (0.29,0.9) 0.02
## Others 1.15 (0.62,2.13) 2.51 (1.15,5.47) 0.021
##
## MC: Yes vs No 0.44 (0.28,0.7) 0.43 (0.23,0.79) 0.007
##
## TP: ref.=Work/Study
## Picking Children 0.42 (0.22,0.78) 0.35 (0.16,0.75) 0.007
## Entertainment/Food 0.68 (0.44,1.06) 0.45 (0.27,0.75) 0.002
## Others 0.53 (0.15,1.9) 0.33 (0.07,1.63) 0.173
##
## SP: ref.=No
## Yes 1.88 (0.98,3.6) 0.97 (0.43,2.21) 0.949
## Don't know 0.41 (0.15,1.11) 0.33 (0.11,0.98) 0.046
##
## WI: ref.=No
## At Bus Stops 2.54 (1.66,3.86) 1.82 (1.04,3.19) 0.035
## Internet/Facebook 3.4 (2.02,5.72) 3.27 (1.71,6.26) < 0.001
## From Friends 4.4 (2.45,7.89) 2.69 (1.32,5.47) 0.006
## Others 2.29 (1.16,4.5) 1.84 (0.79,4.27) 0.157
##
## BC: Yes vs No 0.52 (0.37,0.73) 0.44 (0.29,0.67) < 0.001
##
## CA: Yes vs No 0.63 (0.45,0.88) 0.4 (0.26,0.6) < 0.001
##
## SBM: ref.=Don't agree
## Normal 1.19 (0.79,1.79) 1.2 (0.76,1.88) 0.431
## Agree 6.34 (4.07,9.88) 8.5 (4.98,14.5) < 0.001
##
## P(LR-test)
## OC: ref.=Pupils/Students 0.002
## Officers/Workers
## Housewife/Unemployed
## Free labor
## Others
##
## MC: Yes vs No 0.007
##
## TP: ref.=Work/Study < 0.001
## Picking Children
## Entertainment/Food
## Others
##
## SP: ref.=No 0.034
## Yes
## Don't know
##
## WI: ref.=No 0.004
## At Bus Stops
## Internet/Facebook
## From Friends
## Others
##
## BC: Yes vs No < 0.001
##
## CA: Yes vs No < 0.001
##
## SBM: ref.=Don't agree < 0.001
## Normal
## Agree
##
## Log-likelihood = -328.0847
## No. of observations = 724
## AIC value = 694.1694
## Thay vì sử dụng hàm glm dùng cách 2 với hàm lrm trong gói rms: mg2-8 variables, mg1-19 variables, mg-23 variables
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
##
## dotplot
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:table1':
##
## label, label<-, units
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
## Registered S3 method overwritten by 'rms':
## method from
## print.lrtest epiDisplay
##
## Attaching package: 'rms'
## The following object is masked from 'package:epiDisplay':
##
## lrtest
## The following objects are masked from 'package:car':
##
## Predict, vif
mg2.c2 = lrm(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, data = BS)
mg2.c2
## Logistic Regression Model
##
## lrm(formula = MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, data = BS)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 724 LR chi2 185.43 R2 0.329 C 0.807
## No 530 d.f. 18 g 1.549 Dxy 0.614
## Yes 194 Pr(> chi2) <0.0001 gr 4.705 gamma 0.615
## max |deriv| 3e-09 gp 0.239 tau-a 0.241
## Brier 0.146
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 0.0422 0.4844 0.09 0.9306
## OC=Officers/Workers -0.3102 0.2751 -1.13 0.2594
## OC=Housewife/Unemployed -0.2922 0.5254 -0.56 0.5781
## OC=Free labor -0.6636 0.2847 -2.33 0.0198
## OC=Others 0.9186 0.3981 2.31 0.0210
## MC=Yes -0.8526 0.3177 -2.68 0.0073
## TP=Picking Children -1.0501 0.3895 -2.70 0.0070
## TP=Entertainment/Food -0.8035 0.2654 -3.03 0.0025
## TP=Others -1.1131 0.8178 -1.36 0.1735
## SP=Yes -0.0267 0.4173 -0.06 0.9490
## SP=Don't know -1.1037 0.5532 -2.00 0.0460
## WI=At Bus Stops 0.6013 0.2848 2.11 0.0348
## WI=Internet/Facebook 1.1850 0.3314 3.58 0.0003
## WI=From Friends 0.9889 0.3627 2.73 0.0064
## WI=Others 0.6088 0.4297 1.42 0.1565
## BC=Yes -0.8165 0.2090 -3.91 <0.0001
## CA=Yes -0.9269 0.2120 -4.37 <0.0001
## SBM=Normal 0.1803 0.2288 0.79 0.4306
## SBM=Agree 2.1399 0.2727 7.85 <0.0001
##
mg1.c2 = lrm(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, data = BS)
mg1.c2
## Logistic Regression Model
##
## lrm(formula = MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI +
## BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, data = BS)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 724 LR chi2 223.21 R2 0.386 C 0.838
## No 530 d.f. 42 g 1.783 Dxy 0.676
## Yes 194 Pr(> chi2) <0.0001 gr 5.949 gamma 0.676
## max |deriv| 6e-07 gp 0.261 tau-a 0.266
## Brier 0.136
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -0.4609 1.0141 -0.45 0.6495
## AG=25-60 0.5099 0.4083 1.25 0.2117
## AG=>60 1.6002 0.8095 1.98 0.0481
## OC=Officers/Workers -0.6016 0.4441 -1.35 0.1755
## OC=Housewife/Unemployed -0.6725 0.6706 -1.00 0.3159
## OC=Free labor -1.0678 0.4356 -2.45 0.0142
## OC=Others 0.1452 0.5678 0.26 0.7981
## MC=Yes -0.8036 0.4241 -1.89 0.0581
## MO=Yes -0.1423 0.3816 -0.37 0.7092
## NM=1 0.7977 0.8286 0.96 0.3357
## NM=2 0.5844 0.7997 0.73 0.4649
## NM=>=3 1.1535 0.8008 1.44 0.1498
## TM=Motorbike 0.0066 0.4391 0.01 0.9881
## TM=Car 0.8102 0.5633 1.44 0.1503
## TM=Hichhiking/Taxi 0.5046 0.4978 1.01 0.3107
## TP=Picking Children -1.4523 0.4314 -3.37 0.0008
## TP=Entertainment/Food -0.8559 0.2841 -3.01 0.0026
## TP=Others -1.3578 0.9183 -1.48 0.1393
## SP=Yes -0.0403 0.4460 -0.09 0.9280
## SP=Don't know -1.1199 0.5983 -1.87 0.0612
## WI=At Bus Stops 0.6227 0.3041 2.05 0.0406
## WI=Internet/Facebook 1.2949 0.3580 3.62 0.0003
## WI=From Friends 0.9059 0.3942 2.30 0.0216
## WI=Others 0.6702 0.4635 1.45 0.1481
## BC=Yes -0.8890 0.2316 -3.84 0.0001
## CA=Yes -0.9157 0.2362 -3.88 0.0001
## BSC=Normal -0.2274 0.4041 -0.56 0.5736
## BSC=Agree -0.1941 0.3623 -0.54 0.5922
## BSA=Normal -0.5962 0.4435 -1.34 0.1788
## BSA=Agree 0.0965 0.4089 0.24 0.8134
## BRC=Normal -0.3440 0.4311 -0.80 0.4250
## BRC=Agree -0.2640 0.3412 -0.77 0.4392
## MR=Comfortable 0.0527 0.3932 0.13 0.8935
## MR=Low price 0.9698 0.5564 1.74 0.0813
## MR=Accessibility 0.8040 0.4368 1.84 0.0657
## MR=Reliability -0.0978 0.5206 -0.19 0.8509
## MR=others 0.2320 0.6564 0.35 0.7238
## BUB=Yes -0.0509 0.2359 -0.22 0.8291
## SBM=Normal 0.2521 0.2868 0.88 0.3793
## SBM=Agree 1.9634 0.3299 5.95 <0.0001
## SBS=Normal 0.0080 0.2739 0.03 0.9766
## SBS=Agree 0.7757 0.3844 2.02 0.0436
## DI -0.0324 0.0227 -1.43 0.1535
##
mg.c2 = lrm(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI, data = BS)
mg.c2
## Logistic Regression Model
##
## lrm(formula = MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM +
## TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB +
## SBM + SBS + TI, data = BS)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 724 LR chi2 229.27 R2 0.395 C 0.841
## No 530 d.f. 46 g 1.803 Dxy 0.682
## Yes 194 Pr(> chi2) <0.0001 gr 6.069 gamma 0.682
## max |deriv| 1e-06 gp 0.263 tau-a 0.268
## Brier 0.135
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -0.5530 1.0410 -0.53 0.5953
## GD=Male 0.3038 0.2333 1.30 0.1929
## AG=25-60 0.4082 0.4177 0.98 0.3285
## AG=>60 1.4890 0.8194 1.82 0.0692
## OC=Officers/Workers -0.6459 0.4433 -1.46 0.1451
## OC=Housewife/Unemployed -0.4423 0.6863 -0.64 0.5193
## OC=Free labor -0.9863 0.4365 -2.26 0.0238
## OC=Others -0.0154 0.5777 -0.03 0.9787
## NC=Yes 0.2117 0.2323 0.91 0.3622
## MC=Yes -0.8439 0.4270 -1.98 0.0481
## CC=Yes 0.1336 0.3590 0.37 0.7097
## MO=Yes -0.2218 0.3776 -0.59 0.5569
## NM=1 0.7581 0.8286 0.91 0.3603
## NM=2 0.4766 0.8008 0.60 0.5518
## NM=>=3 1.0529 0.8016 1.31 0.1890
## TM=Motorbike -0.0973 0.4356 -0.22 0.8232
## TM=Car 0.2328 0.5875 0.40 0.6919
## TM=Hichhiking/Taxi 0.3671 0.5009 0.73 0.4635
## TP=Picking Children -1.2738 0.4312 -2.95 0.0031
## TP=Entertainment/Food -0.8708 0.2909 -2.99 0.0028
## TP=Others -1.2720 0.9125 -1.39 0.1633
## TS=Yes -0.2758 0.2137 -1.29 0.1968
## SP=Yes -0.1912 0.4559 -0.42 0.6750
## SP=Don't know -1.1700 0.5989 -1.95 0.0507
## WI=At Bus Stops 0.7294 0.3083 2.37 0.0180
## WI=Internet/Facebook 1.4148 0.3662 3.86 0.0001
## WI=From Friends 0.9877 0.4007 2.47 0.0137
## WI=Others 0.8260 0.4597 1.80 0.0724
## BC=Yes -0.7979 0.2327 -3.43 0.0006
## CA=Yes -0.7818 0.2353 -3.32 0.0009
## BSC=Normal -0.2792 0.4066 -0.69 0.4923
## BSC=Agree -0.2173 0.3684 -0.59 0.5554
## BSA=Normal -0.5720 0.4459 -1.28 0.1996
## BSA=Agree 0.0956 0.4132 0.23 0.8171
## BRC=Normal -0.4355 0.4317 -1.01 0.3130
## BRC=Agree -0.4091 0.3428 -1.19 0.2328
## MR=Comfortable -0.0269 0.3959 -0.07 0.9459
## MR=Low price 0.8461 0.5582 1.52 0.1296
## MR=Accessibility 0.7670 0.4389 1.75 0.0806
## MR=Reliability -0.1995 0.5264 -0.38 0.7047
## MR=others 0.1972 0.6711 0.29 0.7689
## BUB=Yes -0.0449 0.2361 -0.19 0.8491
## SBM=Normal 0.3480 0.2902 1.20 0.2305
## SBM=Agree 1.9672 0.3357 5.86 <0.0001
## SBS=Normal -0.0234 0.2788 -0.08 0.9331
## SBS=Agree 0.7616 0.3877 1.96 0.0495
## TI 0.0124 0.0074 1.67 0.0954
##
5. Test models
# 4.1. Deviance - càng nhỏ càng tốt (sai khác giữa giá trị thực tế và giá trị ước lượng theo mô hình): mg2 tốt nhất
deviance(mg)
## [1] 612.3312
deviance(mg1)
## [1] 618.3861
deviance(mg2)
## [1] 656.1694
# 4.2. LRT (Likelihood Ratio Test) - Chênh lệch deviance giữa mô hình đơn giản và mô hình phức tạp --> có ý nghĩa thống kê - đều có ý nghĩa tk
library(rms)
lrtest(mg2, mg)
##
## Model 1: MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM
## Model 2: MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS +
## SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS +
## TI
##
## L.R. Chisq d.f. P
## 43.83826440 28.00000000 0.02882647
lrtest(mg,mg2)
##
## Model 1: MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS +
## SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS +
## TI
## Model 2: MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM
##
## L.R. Chisq d.f. P
## 43.83826440 28.00000000 0.02882647
lrtest(mg2, mg1)
##
## Model 1: MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM
## Model 2: MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA +
## BSC + BSA + BRC + MR + BUB + SBM + SBS + DI
##
## L.R. Chisq d.f. P
## 37.783318 24.000000 0.036485
# 4.3. AIC (Aikake Information Criterion) - càng nhỏ càng tốt - mg2 tốt nhất
AIC(mg)
## [1] 706.3312
AIC(mg1)
## [1] 704.3861
AIC(mg2)
## [1] 694.1694
## Calculate MacFadden's R2 voi packages "pscl" - mg tốt nhất
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(mg2)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -328.0847125 -420.7996718 185.4299185 0.2203304 0.2259499 0.3287621
pR2(mg1)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -309.1930534 -420.7996718 223.2132367 0.2652251 0.2653092 0.3860308
pR2(mg)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML r2CU
## -306.1655803 -420.7996718 229.2681829 0.2724196 0.2714280 0.3949337
# 4.4. Calcule the number of observation that model predict correctly = Accuracy (Độ chính xác của mô hình)
## Model mg2
contrasts(BS$MSI) # Qs là không chuyển đổi nhưng phân loại thành chuyển đổi thì gán giá trị là 0 và ngược lại
## Yes
## No 0
## Yes 1
dim(BS)
## [1] 724 46
glm.probs <- predict(mg2, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg2 (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng). Có thể dùng hàm probmg2 <- fitted(mg2), head(probmg2), tail(probmg2)
glm.probs[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên
## 1 2 3 4 5 6 7
## 0.24692122 0.04561607 0.38494464 0.25878074 0.04561607 0.05414121 0.04561607
## 8 9 10
## 0.27849642 0.24084863 0.35468576
glm.pred <- rep("No", 724)
glm.pred[glm.probs >= 0.5] = "Yes"
table(glm.pred, BS$MSI)
##
## glm.pred No Yes
## No 495 110
## Yes 35 84
mean(glm.pred == BS$MSI) # Tỷ lệ dự báo chính xác của mô hình 80% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-80 = 20%
## [1] 0.7997238
round(prop.table(table(BS$MSI))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không chuyển đổi xe buýt 73.2% < 80% --> KL: mức độ chính xác dự báo của mô hình 2 > lớp có tỷ lệ cao nhất ở biến phụ thuộc
##
## No Yes
## 73.2 26.8
## Model mg
contrasts(BS$MSI)
## Yes
## No 0
## Yes 1
glm.probs.mg1 <- predict(mg1, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng)
glm.probs.mg1[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên
## 1 2 3 4 5 6 7
## 0.47252684 0.03151333 0.58623739 0.23954668 0.02832060 0.04888496 0.04486295
## 8 9 10
## 0.19156063 0.13865798 0.40021393
glm.pred.mg1 <- rep("No", 724)
glm.pred.mg1[glm.probs.mg1 >= 0.5] = "Yes"
table(glm.pred.mg1, BS$MSI)
##
## glm.pred.mg1 No Yes
## No 493 101
## Yes 37 93
mean(glm.pred.mg1 == BS$MSI) # Tỷ lệ dự báo chính xác của mô hình 81% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-81 = 19%
## [1] 0.8093923
round(prop.table(table(BS$MSI))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không chuyển đổi sang xe buýt 73.2% < 81% --> KL: mức độ chính xác dự báo của mô hình > lớp có tỷ lệ cao nhất ở biến phụ thuộc
##
## No Yes
## 73.2 26.8
# 4.5. Calculate Sensitivity - Độ nhạy
493/(493+101) # Đạt 82% --> Rất lý tưởng: Mô hình xác định tới 82% người không chuyển đổi sang xe buýt
## [1] 0.8299663
# 4.6. Calculate Specificity - Độ đặc hiệu
93/(93+37) # Đạt 71% --> Mô hình có khả năng dự báo 71% người chuyển đổi sang xe buýt
## [1] 0.7153846
# 4.7. Hosmer-Lemeshow = Goodness of fit test - Kiểm định sự phù hợp của mô hình với library(MKmisc) và hàm HLgof.test(fit = fitted(mg2), obs = BSs$MSI) # trong package "MKmisc" (Version 3.6.3) : Nếu p-value < 0.05 --> MH không phù hợp --> Ít sử dụng
# 4.8. Kiểm tra tính phổ quát của mô hình
## Chạy lại hai mô hình với hàm train thuộc package "caret"
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
## The following object is masked from 'package:purrr':
##
## lift
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10, classProbs = TRUE, summaryFunction = defaultSummary) # sử dụng kiểm tra chéo 10 lớp (number=10), lặp lại 10 lần (repeats=10) với lựa chọn method="repeatedcv, lựa chọn classProbs = TRUE áp dụng cho mô hình phân loại và lựa chọn summaryFunction = defaultSummary để chỉ thị R tính toán chỉ 2 thông tin về phẩm chất của mô hình là Accuracy và Kappa.
mg1train = train(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, data = BS, method = "glm", family = "binomial", trControl = ctrl)
summary(mg1train)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6346 -0.6513 -0.3886 0.4517 3.0879
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.460888 1.014052 -0.455 0.649468
## `AG25-60` 0.509909 0.408317 1.249 0.211735
## `AG>60` 1.600156 0.809458 1.977 0.048062 *
## `OCOfficers/Workers` -0.601602 0.444097 -1.355 0.175525
## `OCHousewife/Unemployed` -0.672492 0.670571 -1.003 0.315927
## `OCFree labor` -1.067818 0.435640 -2.451 0.014240 *
## OCOthers 0.145226 0.567793 0.256 0.798126
## MCYes -0.803585 0.424113 -1.895 0.058127 .
## MOYes -0.142332 0.381606 -0.373 0.709162
## NM1 0.797740 0.828571 0.963 0.335653
## NM2 0.584359 0.799689 0.731 0.464942
## `NM>=3` 1.153470 0.800814 1.440 0.149762
## TMMotorbike 0.006570 0.439137 0.015 0.988063
## TMCar 0.810212 0.563283 1.438 0.150328
## `TMHichhiking/Taxi` 0.504619 0.497789 1.014 0.310716
## `TPPicking Children` -1.452278 0.431442 -3.366 0.000762 ***
## `TPEntertainment/Food` -0.855917 0.284101 -3.013 0.002589 **
## TPOthers -1.357762 0.918309 -1.479 0.139262
## SPYes -0.040310 0.446021 -0.090 0.927988
## `SPDon't know` -1.119875 0.598265 -1.872 0.061224 .
## `WIAt Bus Stops` 0.622671 0.304066 2.048 0.040578 *
## `WIInternet/Facebook` 1.294873 0.357963 3.617 0.000298 ***
## `WIFrom Friends` 0.905878 0.394216 2.298 0.021566 *
## WIOthers 0.670218 0.463461 1.446 0.148145
## BCYes -0.888957 0.231589 -3.839 0.000124 ***
## CAYes -0.915710 0.236190 -3.877 0.000106 ***
## BSCNormal -0.227395 0.404108 -0.563 0.573633
## BSCAgree -0.194065 0.362292 -0.536 0.592195
## BSANormal -0.596242 0.443520 -1.344 0.178838
## BSAAgree 0.096531 0.408890 0.236 0.813369
## BRCNormal -0.343956 0.431132 -0.798 0.424988
## BRCAgree -0.263950 0.341203 -0.774 0.439175
## MRComfortable 0.052656 0.393148 0.134 0.893455
## `MRLow price` 0.969812 0.556347 1.743 0.081302 .
## MRAccessibility 0.804047 0.436812 1.841 0.065663 .
## MRReliability -0.097818 0.520557 -0.188 0.850948
## MRothers 0.231989 0.656438 0.353 0.723784
## BUBYes -0.050910 0.235854 -0.216 0.829101
## SBMNormal 0.252138 0.286773 0.879 0.379279
## SBMAgree 1.963412 0.329928 5.951 2.66e-09 ***
## SBSNormal 0.008038 0.273902 0.029 0.976590
## SBSAgree 0.775681 0.384369 2.018 0.043585 *
## DI -0.032412 0.022708 -1.427 0.153481
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 618.39 on 681 degrees of freedom
## AIC: 704.39
##
## Number of Fisher Scoring iterations: 5
mg2train = train(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, data = BS, method = "glm", family = "binomial", trControl = ctrl)
summary(mg2train)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0246 -0.7121 -0.4431 0.5256 2.7444
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.04216 0.48436 0.087 0.930642
## `OCOfficers/Workers` -0.31020 0.27507 -1.128 0.259441
## `OCHousewife/Unemployed` -0.29222 0.52541 -0.556 0.578092
## `OCFree labor` -0.66363 0.28470 -2.331 0.019753 *
## OCOthers 0.91856 0.39809 2.307 0.021032 *
## MCYes -0.85258 0.31772 -2.683 0.007286 **
## `TPPicking Children` -1.05006 0.38954 -2.696 0.007026 **
## `TPEntertainment/Food` -0.80345 0.26544 -3.027 0.002471 **
## TPOthers -1.11305 0.81779 -1.361 0.173497
## SPYes -0.02669 0.41726 -0.064 0.948998
## `SPDon't know` -1.10368 0.55322 -1.995 0.046040 *
## `WIAt Bus Stops` 0.60130 0.28481 2.111 0.034751 *
## `WIInternet/Facebook` 1.18500 0.33140 3.576 0.000349 ***
## `WIFrom Friends` 0.98890 0.36275 2.726 0.006408 **
## WIOthers 0.60877 0.42965 1.417 0.156513
## BCYes -0.81650 0.20905 -3.906 9.39e-05 ***
## CAYes -0.92689 0.21197 -4.373 1.23e-05 ***
## SBMNormal 0.18031 0.22877 0.788 0.430593
## SBMAgree 2.13985 0.27266 7.848 4.22e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 656.17 on 705 degrees of freedom
## AIC: 694.17
##
## Number of Fisher Scoring iterations: 5
## Các thông tin về khả năng dự báo của mô hình bằng confusionMatrix()
predmg1 <- predict(mg1train, newdata = BS)
confusionMatrix(data = predmg1, BS$MSI)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 493 101
## Yes 37 93
##
## Accuracy : 0.8094
## 95% CI : (0.7789, 0.8374)
## No Information Rate : 0.732
## P-Value [Acc > NIR] : 7.289e-07
##
## Kappa : 0.4574
##
## Mcnemar's Test P-Value : 8.189e-08
##
## Sensitivity : 0.9302
## Specificity : 0.4794
## Pos Pred Value : 0.8300
## Neg Pred Value : 0.7154
## Prevalence : 0.7320
## Detection Rate : 0.6809
## Detection Prevalence : 0.8204
## Balanced Accuracy : 0.7048
##
## 'Positive' Class : No
##
predmg2 <- predict(mg2train, newdata = BS)
confusionMatrix(data = predmg2, BS$MSI)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 495 110
## Yes 35 84
##
## Accuracy : 0.7997
## 95% CI : (0.7687, 0.8283)
## No Information Rate : 0.732
## P-Value [Acc > NIR] : 1.412e-05
##
## Kappa : 0.4182
##
## Mcnemar's Test P-Value : 7.978e-10
##
## Sensitivity : 0.9340
## Specificity : 0.4330
## Pos Pred Value : 0.8182
## Neg Pred Value : 0.7059
## Prevalence : 0.7320
## Detection Rate : 0.6837
## Detection Prevalence : 0.8356
## Balanced Accuracy : 0.6835
##
## 'Positive' Class : No
##
## Lấy dataframe với thông tin Accuracy của mô hình
a = mg1train$resample
a = a[, -3] # Trừ cột resample
a$Mohinh = "Logitmg1" # Thêm biến mô hình
a
## Accuracy Kappa Mohinh
## 1 0.8055556 0.44309392 Logitmg1
## 2 0.7777778 0.25097529 Logitmg1
## 3 0.7945205 0.42032822 Logitmg1
## 4 0.7222222 0.26002055 Logitmg1
## 5 0.7083333 0.14864865 Logitmg1
## 6 0.8194444 0.49240781 Logitmg1
## 7 0.8082192 0.46826223 Logitmg1
## 8 0.7534247 0.33903421 Logitmg1
## 9 0.8082192 0.42905028 Logitmg1
## 10 0.7500000 0.33401850 Logitmg1
## 11 0.7534247 0.26592179 Logitmg1
## 12 0.7916667 0.34146341 Logitmg1
## 13 0.7671233 0.34303864 Logitmg1
## 14 0.8194444 0.49240781 Logitmg1
## 15 0.8611111 0.60220994 Logitmg1
## 16 0.7500000 0.25602755 Logitmg1
## 17 0.7777778 0.38658147 Logitmg1
## 18 0.7671233 0.34303864 Logitmg1
## 19 0.7638889 0.38181818 Logitmg1
## 20 0.7397260 0.33540968 Logitmg1
## 21 0.7083333 0.18004338 Logitmg1
## 22 0.8194444 0.47297297 Logitmg1
## 23 0.7945205 0.43989770 Logitmg1
## 24 0.8611111 0.60220994 Logitmg1
## 25 0.7916667 0.34146341 Logitmg1
## 26 0.7777778 0.36353591 Logitmg1
## 27 0.7945205 0.43989770 Logitmg1
## 28 0.7916667 0.41431670 Logitmg1
## 29 0.7671233 0.36521739 Logitmg1
## 30 0.7534247 0.31633715 Logitmg1
## 31 0.7083333 0.07804878 Logitmg1
## 32 0.7671233 0.31925398 Logitmg1
## 33 0.7808219 0.34748603 Logitmg1
## 34 0.7500000 0.28397790 Logitmg1
## 35 0.7500000 0.35650447 Logitmg1
## 36 0.7945205 0.42032822 Logitmg1
## 37 0.7945205 0.39934174 Logitmg1
## 38 0.8750000 0.66108787 Logitmg1
## 39 0.8194444 0.51046025 Logitmg1
## 40 0.7222222 0.23322684 Logitmg1
## 41 0.7083333 0.14864865 Logitmg1
## 42 0.8472222 0.58577406 Logitmg1
## 43 0.7534247 0.29202586 Logitmg1
## 44 0.7260274 0.21336207 Logitmg1
## 45 0.8194444 0.47297297 Logitmg1
## 46 0.7083333 0.20920502 Logitmg1
## 47 0.7808219 0.34748603 Logitmg1
## 48 0.8082192 0.44935345 Logitmg1
## 49 0.7777778 0.40801644 Logitmg1
## 50 0.8472222 0.57049892 Logitmg1
## 51 0.7083333 0.11475410 Logitmg1
## 52 0.7397260 0.35578263 Logitmg1
## 53 0.7260274 0.21336207 Logitmg1
## 54 0.7500000 0.35650447 Logitmg1
## 55 0.7916667 0.41431670 Logitmg1
## 56 0.8472222 0.58577406 Logitmg1
## 57 0.7397260 0.31370609 Logitmg1
## 58 0.7638889 0.22137405 Logitmg1
## 59 0.8082192 0.44935345 Logitmg1
## 60 0.7500000 0.15734720 Logitmg1
## 61 0.7397260 0.21058623 Logitmg1
## 62 0.7638889 0.28337237 Logitmg1
## 63 0.7808219 0.41247485 Logitmg1
## 64 0.8194444 0.45199063 Logitmg1
## 65 0.8055556 0.48201439 Logitmg1
## 66 0.7777778 0.38658147 Logitmg1
## 67 0.7945205 0.45818902 Logitmg1
## 68 0.7500000 0.25602755 Logitmg1
## 69 0.7638889 0.35983264 Logitmg1
## 70 0.7671233 0.34303864 Logitmg1
## 71 0.7916667 0.41431670 Logitmg1
## 72 0.7916667 0.45454545 Logitmg1
## 73 0.7671233 0.34303864 Logitmg1
## 74 0.8194444 0.45199063 Logitmg1
## 75 0.7945205 0.39934174 Logitmg1
## 76 0.7500000 0.28397790 Logitmg1
## 77 0.7397260 0.33540968 Logitmg1
## 78 0.7222222 0.23322684 Logitmg1
## 79 0.7671233 0.29368241 Logitmg1
## 80 0.7638889 0.35983264 Logitmg1
## 81 0.7534247 0.26592179 Logitmg1
## 82 0.7361111 0.28451883 Logitmg1
## 83 0.7260274 0.24037461 Logitmg1
## 84 0.7777778 0.40801644 Logitmg1
## 85 0.7945205 0.43989770 Logitmg1
## 86 0.8194444 0.49240781 Logitmg1
## 87 0.8219178 0.51457801 Logitmg1
## 88 0.7916667 0.45454545 Logitmg1
## 89 0.8055556 0.44309392 Logitmg1
## 90 0.8333333 0.46201743 Logitmg1
## 91 0.7397260 0.29053708 Logitmg1
## 92 0.7500000 0.33401850 Logitmg1
## 93 0.7808219 0.39229969 Logitmg1
## 94 0.8055556 0.37235367 Logitmg1
## 95 0.7916667 0.39189189 Logitmg1
## 96 0.7500000 0.22580645 Logitmg1
## 97 0.8055556 0.46325879 Logitmg1
## 98 0.8082192 0.46826223 Logitmg1
## 99 0.7916667 0.43514644 Logitmg1
## 100 0.7808219 0.32250580 Logitmg1
b = mg2train$resample
b = b[, -3]
b$Mohinh = "Logitmg2"
b
## Accuracy Kappa Mohinh
## 1 0.7808219 0.2955368 Logitmg2
## 2 0.8055556 0.4632588 Logitmg2
## 3 0.7916667 0.3676815 Logitmg2
## 4 0.7222222 0.2044199 Logitmg2
## 5 0.8493151 0.6026719 Logitmg2
## 6 0.8194444 0.4292683 Logitmg2
## 7 0.7260274 0.1843575 Logitmg2
## 8 0.7777778 0.3635359 Logitmg2
## 9 0.8219178 0.4794295 Logitmg2
## 10 0.7500000 0.2258065 Logitmg2
## 11 0.8055556 0.4213548 Logitmg2
## 12 0.7945205 0.3993417 Logitmg2
## 13 0.7945205 0.3993417 Logitmg2
## 14 0.8194444 0.4729730 Logitmg2
## 15 0.7945205 0.4203282 Logitmg2
## 16 0.7777778 0.2826899 Logitmg2
## 17 0.8333333 0.5560123 Logitmg2
## 18 0.7397260 0.1797753 Logitmg2
## 19 0.8055556 0.4213548 Logitmg2
## 20 0.7222222 0.2332268 Logitmg2
## 21 0.7397260 0.2905371 Logitmg2
## 22 0.7397260 0.2657491 Logitmg2
## 23 0.7808219 0.3922997 Logitmg2
## 24 0.7808219 0.3225058 Logitmg2
## 25 0.7916667 0.3414634 Logitmg2
## 26 0.7916667 0.3414634 Logitmg2
## 27 0.7916667 0.3918919 Logitmg2
## 28 0.7916667 0.4143167 Logitmg2
## 29 0.8194444 0.4519906 Logitmg2
## 30 0.7500000 0.2258065 Logitmg2
## 31 0.7916667 0.3414634 Logitmg2
## 32 0.6986301 0.1644121 Logitmg2
## 33 0.8333333 0.5040184 Logitmg2
## 34 0.7671233 0.2936824 Logitmg2
## 35 0.7638889 0.2833724 Logitmg2
## 36 0.8219178 0.5145780 Logitmg2
## 37 0.7397260 0.1797753 Logitmg2
## 38 0.8333333 0.5399361 Logitmg2
## 39 0.7777778 0.3635359 Logitmg2
## 40 0.8194444 0.4924078 Logitmg2
## 41 0.7638889 0.2536585 Logitmg2
## 42 0.7777778 0.3386912 Logitmg2
## 43 0.7083333 0.1147541 Logitmg2
## 44 0.8082192 0.4290503 Logitmg2
## 45 0.7916667 0.3414634 Logitmg2
## 46 0.7222222 0.1733639 Logitmg2
## 47 0.7777778 0.4080164 Logitmg2
## 48 0.8082192 0.4493534 Logitmg2
## 49 0.7945205 0.4581890 Logitmg2
## 50 0.7808219 0.3225058 Logitmg2
## 51 0.8356164 0.5106145 Logitmg2
## 52 0.7500000 0.2258065 Logitmg2
## 53 0.7916667 0.4143167 Logitmg2
## 54 0.7671233 0.3859476 Logitmg2
## 55 0.7777778 0.4080164 Logitmg2
## 56 0.8356164 0.5442248 Logitmg2
## 57 0.7916667 0.2819149 Logitmg2
## 58 0.7916667 0.3676815 Logitmg2
## 59 0.7397260 0.2657491 Logitmg2
## 60 0.7638889 0.2536585 Logitmg2
## 61 0.7777778 0.3386912 Logitmg2
## 62 0.7500000 0.2560276 Logitmg2
## 63 0.8055556 0.4213548 Logitmg2
## 64 0.8055556 0.4213548 Logitmg2
## 65 0.7534247 0.2659218 Logitmg2
## 66 0.7916667 0.3676815 Logitmg2
## 67 0.7808219 0.3922997 Logitmg2
## 68 0.7945205 0.3767786 Logitmg2
## 69 0.7916667 0.3676815 Logitmg2
## 70 0.7397260 0.2657491 Logitmg2
## 71 0.7638889 0.3818182 Logitmg2
## 72 0.7945205 0.3524542 Logitmg2
## 73 0.7500000 0.2560276 Logitmg2
## 74 0.7945205 0.4581890 Logitmg2
## 75 0.7777778 0.3118280 Logitmg2
## 76 0.8333333 0.5040184 Logitmg2
## 77 0.7534247 0.2920259 Logitmg2
## 78 0.8493151 0.5749074 Logitmg2
## 79 0.7638889 0.2536585 Logitmg2
## 80 0.8194444 0.4292683 Logitmg2
## 81 0.7945205 0.3524542 Logitmg2
## 82 0.7671233 0.2936824 Logitmg2
## 83 0.7638889 0.3108108 Logitmg2
## 84 0.7500000 0.2560276 Logitmg2
## 85 0.8055556 0.4430939 Logitmg2
## 86 0.8082192 0.4859155 Logitmg2
## 87 0.7777778 0.3386912 Logitmg2
## 88 0.8055556 0.4430939 Logitmg2
## 89 0.7500000 0.2258065 Logitmg2
## 90 0.7808219 0.3474860 Logitmg2
## 91 0.8055556 0.4632588 Logitmg2
## 92 0.7916667 0.3918919 Logitmg2
## 93 0.7808219 0.3474860 Logitmg2
## 94 0.7123288 0.1590784 Logitmg2
## 95 0.8472222 0.5857741 Logitmg2
## 96 0.7534247 0.2920259 Logitmg2
## 97 0.8055556 0.3723537 Logitmg2
## 98 0.7638889 0.2833724 Logitmg2
## 99 0.8082192 0.4682622 Logitmg2
## 100 0.7777778 0.2509753 Logitmg2
resamplemod = rbind(a, b) # Hợp nhất 2 data frame a và b
resamplemod
## Accuracy Kappa Mohinh
## 1 0.8055556 0.44309392 Logitmg1
## 2 0.7777778 0.25097529 Logitmg1
## 3 0.7945205 0.42032822 Logitmg1
## 4 0.7222222 0.26002055 Logitmg1
## 5 0.7083333 0.14864865 Logitmg1
## 6 0.8194444 0.49240781 Logitmg1
## 7 0.8082192 0.46826223 Logitmg1
## 8 0.7534247 0.33903421 Logitmg1
## 9 0.8082192 0.42905028 Logitmg1
## 10 0.7500000 0.33401850 Logitmg1
## 11 0.7534247 0.26592179 Logitmg1
## 12 0.7916667 0.34146341 Logitmg1
## 13 0.7671233 0.34303864 Logitmg1
## 14 0.8194444 0.49240781 Logitmg1
## 15 0.8611111 0.60220994 Logitmg1
## 16 0.7500000 0.25602755 Logitmg1
## 17 0.7777778 0.38658147 Logitmg1
## 18 0.7671233 0.34303864 Logitmg1
## 19 0.7638889 0.38181818 Logitmg1
## 20 0.7397260 0.33540968 Logitmg1
## 21 0.7083333 0.18004338 Logitmg1
## 22 0.8194444 0.47297297 Logitmg1
## 23 0.7945205 0.43989770 Logitmg1
## 24 0.8611111 0.60220994 Logitmg1
## 25 0.7916667 0.34146341 Logitmg1
## 26 0.7777778 0.36353591 Logitmg1
## 27 0.7945205 0.43989770 Logitmg1
## 28 0.7916667 0.41431670 Logitmg1
## 29 0.7671233 0.36521739 Logitmg1
## 30 0.7534247 0.31633715 Logitmg1
## 31 0.7083333 0.07804878 Logitmg1
## 32 0.7671233 0.31925398 Logitmg1
## 33 0.7808219 0.34748603 Logitmg1
## 34 0.7500000 0.28397790 Logitmg1
## 35 0.7500000 0.35650447 Logitmg1
## 36 0.7945205 0.42032822 Logitmg1
## 37 0.7945205 0.39934174 Logitmg1
## 38 0.8750000 0.66108787 Logitmg1
## 39 0.8194444 0.51046025 Logitmg1
## 40 0.7222222 0.23322684 Logitmg1
## 41 0.7083333 0.14864865 Logitmg1
## 42 0.8472222 0.58577406 Logitmg1
## 43 0.7534247 0.29202586 Logitmg1
## 44 0.7260274 0.21336207 Logitmg1
## 45 0.8194444 0.47297297 Logitmg1
## 46 0.7083333 0.20920502 Logitmg1
## 47 0.7808219 0.34748603 Logitmg1
## 48 0.8082192 0.44935345 Logitmg1
## 49 0.7777778 0.40801644 Logitmg1
## 50 0.8472222 0.57049892 Logitmg1
## 51 0.7083333 0.11475410 Logitmg1
## 52 0.7397260 0.35578263 Logitmg1
## 53 0.7260274 0.21336207 Logitmg1
## 54 0.7500000 0.35650447 Logitmg1
## 55 0.7916667 0.41431670 Logitmg1
## 56 0.8472222 0.58577406 Logitmg1
## 57 0.7397260 0.31370609 Logitmg1
## 58 0.7638889 0.22137405 Logitmg1
## 59 0.8082192 0.44935345 Logitmg1
## 60 0.7500000 0.15734720 Logitmg1
## 61 0.7397260 0.21058623 Logitmg1
## 62 0.7638889 0.28337237 Logitmg1
## 63 0.7808219 0.41247485 Logitmg1
## 64 0.8194444 0.45199063 Logitmg1
## 65 0.8055556 0.48201439 Logitmg1
## 66 0.7777778 0.38658147 Logitmg1
## 67 0.7945205 0.45818902 Logitmg1
## 68 0.7500000 0.25602755 Logitmg1
## 69 0.7638889 0.35983264 Logitmg1
## 70 0.7671233 0.34303864 Logitmg1
## 71 0.7916667 0.41431670 Logitmg1
## 72 0.7916667 0.45454545 Logitmg1
## 73 0.7671233 0.34303864 Logitmg1
## 74 0.8194444 0.45199063 Logitmg1
## 75 0.7945205 0.39934174 Logitmg1
## 76 0.7500000 0.28397790 Logitmg1
## 77 0.7397260 0.33540968 Logitmg1
## 78 0.7222222 0.23322684 Logitmg1
## 79 0.7671233 0.29368241 Logitmg1
## 80 0.7638889 0.35983264 Logitmg1
## 81 0.7534247 0.26592179 Logitmg1
## 82 0.7361111 0.28451883 Logitmg1
## 83 0.7260274 0.24037461 Logitmg1
## 84 0.7777778 0.40801644 Logitmg1
## 85 0.7945205 0.43989770 Logitmg1
## 86 0.8194444 0.49240781 Logitmg1
## 87 0.8219178 0.51457801 Logitmg1
## 88 0.7916667 0.45454545 Logitmg1
## 89 0.8055556 0.44309392 Logitmg1
## 90 0.8333333 0.46201743 Logitmg1
## 91 0.7397260 0.29053708 Logitmg1
## 92 0.7500000 0.33401850 Logitmg1
## 93 0.7808219 0.39229969 Logitmg1
## 94 0.8055556 0.37235367 Logitmg1
## 95 0.7916667 0.39189189 Logitmg1
## 96 0.7500000 0.22580645 Logitmg1
## 97 0.8055556 0.46325879 Logitmg1
## 98 0.8082192 0.46826223 Logitmg1
## 99 0.7916667 0.43514644 Logitmg1
## 100 0.7808219 0.32250580 Logitmg1
## 101 0.7808219 0.29553679 Logitmg2
## 102 0.8055556 0.46325879 Logitmg2
## 103 0.7916667 0.36768150 Logitmg2
## 104 0.7222222 0.20441989 Logitmg2
## 105 0.8493151 0.60267194 Logitmg2
## 106 0.8194444 0.42926829 Logitmg2
## 107 0.7260274 0.18435754 Logitmg2
## 108 0.7777778 0.36353591 Logitmg2
## 109 0.8219178 0.47942951 Logitmg2
## 110 0.7500000 0.22580645 Logitmg2
## 111 0.8055556 0.42135476 Logitmg2
## 112 0.7945205 0.39934174 Logitmg2
## 113 0.7945205 0.39934174 Logitmg2
## 114 0.8194444 0.47297297 Logitmg2
## 115 0.7945205 0.42032822 Logitmg2
## 116 0.7777778 0.28268991 Logitmg2
## 117 0.8333333 0.55601233 Logitmg2
## 118 0.7397260 0.17977528 Logitmg2
## 119 0.8055556 0.42135476 Logitmg2
## 120 0.7222222 0.23322684 Logitmg2
## 121 0.7397260 0.29053708 Logitmg2
## 122 0.7397260 0.26574907 Logitmg2
## 123 0.7808219 0.39229969 Logitmg2
## 124 0.7808219 0.32250580 Logitmg2
## 125 0.7916667 0.34146341 Logitmg2
## 126 0.7916667 0.34146341 Logitmg2
## 127 0.7916667 0.39189189 Logitmg2
## 128 0.7916667 0.41431670 Logitmg2
## 129 0.8194444 0.45199063 Logitmg2
## 130 0.7500000 0.22580645 Logitmg2
## 131 0.7916667 0.34146341 Logitmg2
## 132 0.6986301 0.16441207 Logitmg2
## 133 0.8333333 0.50401837 Logitmg2
## 134 0.7671233 0.29368241 Logitmg2
## 135 0.7638889 0.28337237 Logitmg2
## 136 0.8219178 0.51457801 Logitmg2
## 137 0.7397260 0.17977528 Logitmg2
## 138 0.8333333 0.53993610 Logitmg2
## 139 0.7777778 0.36353591 Logitmg2
## 140 0.8194444 0.49240781 Logitmg2
## 141 0.7638889 0.25365854 Logitmg2
## 142 0.7777778 0.33869116 Logitmg2
## 143 0.7083333 0.11475410 Logitmg2
## 144 0.8082192 0.42905028 Logitmg2
## 145 0.7916667 0.34146341 Logitmg2
## 146 0.7222222 0.17336395 Logitmg2
## 147 0.7777778 0.40801644 Logitmg2
## 148 0.8082192 0.44935345 Logitmg2
## 149 0.7945205 0.45818902 Logitmg2
## 150 0.7808219 0.32250580 Logitmg2
## 151 0.8356164 0.51061453 Logitmg2
## 152 0.7500000 0.22580645 Logitmg2
## 153 0.7916667 0.41431670 Logitmg2
## 154 0.7671233 0.38594755 Logitmg2
## 155 0.7777778 0.40801644 Logitmg2
## 156 0.8356164 0.54422477 Logitmg2
## 157 0.7916667 0.28191489 Logitmg2
## 158 0.7916667 0.36768150 Logitmg2
## 159 0.7397260 0.26574907 Logitmg2
## 160 0.7638889 0.25365854 Logitmg2
## 161 0.7777778 0.33869116 Logitmg2
## 162 0.7500000 0.25602755 Logitmg2
## 163 0.8055556 0.42135476 Logitmg2
## 164 0.8055556 0.42135476 Logitmg2
## 165 0.7534247 0.26592179 Logitmg2
## 166 0.7916667 0.36768150 Logitmg2
## 167 0.7808219 0.39229969 Logitmg2
## 168 0.7945205 0.37677860 Logitmg2
## 169 0.7916667 0.36768150 Logitmg2
## 170 0.7397260 0.26574907 Logitmg2
## 171 0.7638889 0.38181818 Logitmg2
## 172 0.7945205 0.35245417 Logitmg2
## 173 0.7500000 0.25602755 Logitmg2
## 174 0.7945205 0.45818902 Logitmg2
## 175 0.7777778 0.31182796 Logitmg2
## 176 0.8333333 0.50401837 Logitmg2
## 177 0.7534247 0.29202586 Logitmg2
## 178 0.8493151 0.57490736 Logitmg2
## 179 0.7638889 0.25365854 Logitmg2
## 180 0.8194444 0.42926829 Logitmg2
## 181 0.7945205 0.35245417 Logitmg2
## 182 0.7671233 0.29368241 Logitmg2
## 183 0.7638889 0.31081081 Logitmg2
## 184 0.7500000 0.25602755 Logitmg2
## 185 0.8055556 0.44309392 Logitmg2
## 186 0.8082192 0.48591549 Logitmg2
## 187 0.7777778 0.33869116 Logitmg2
## 188 0.8055556 0.44309392 Logitmg2
## 189 0.7500000 0.22580645 Logitmg2
## 190 0.7808219 0.34748603 Logitmg2
## 191 0.8055556 0.46325879 Logitmg2
## 192 0.7916667 0.39189189 Logitmg2
## 193 0.7808219 0.34748603 Logitmg2
## 194 0.7123288 0.15907844 Logitmg2
## 195 0.8472222 0.58577406 Logitmg2
## 196 0.7534247 0.29202586 Logitmg2
## 197 0.8055556 0.37235367 Logitmg2
## 198 0.7638889 0.28337237 Logitmg2
## 199 0.8082192 0.46826223 Logitmg2
## 200 0.7777778 0.25097529 Logitmg2
## Phân tích hình ảnh
# install.packages("ggplot2")
library(ggplot2)
library(ggthemes)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
h1 <- ggplot(resamplemod) + geom_boxplot(aes(x = Mohinh, y = Accuracy, fill = Mohinh)) + coord_flip() + geom_hline(yintercept = 0.7320, color = "blue") + theme_wsj() # Giá trị 0.7320 = 73,20% là tỷ lệ nhóm chiếm ưu thế (không chuyển đổi xe buýt)
h1 # Nhận xét: Cả 2 mô hình đều có khả năng phân loại tốt - độ chính xác accuracy của cả 2 mô hình đều lớn hơn 73.20% (đường màu xanh)
h2 <- ggplot(resamplemod, aes(Accuracy, fill = Mohinh)) + geom_density(alpha = 0.3) + theme_wsj() + facet_wrap(~ Mohinh)
h2
grid.arrange(h1, h2, ncol = 2, nrow = 1)
## Kiểm định t-test để đánh giá sự khác biệt về Accuracy và Kappa của 2 mô hình: p<0.05 có ý nghĩa thống kê về sự khác biệt Accuracy và Kappa của 2 mô hình
t.test(resamplemod$Accuracy ~ resamplemod$Mohinh)
##
## Welch Two Sample t-test
##
## data: resamplemod$Accuracy by resamplemod$Mohinh
## t = -1.1687, df = 194.25, p-value = 0.244
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.015555007 0.003979665
## sample estimates:
## mean in group Logitmg1 mean in group Logitmg2
## 0.7769616 0.7827492
summary(a)
## Accuracy Kappa Mohinh
## Min. :0.7083 Min. :0.07805 Length:100
## 1st Qu.:0.7500 1st Qu.:0.28903 Class :character
## Median :0.7778 Median :0.36168 Mode :character
## Mean :0.7770 Mean :0.36674
## 3rd Qu.:0.8056 3rd Qu.:0.44466
## Max. :0.8750 Max. :0.66109
summary(b)
## Accuracy Kappa Mohinh
## Min. :0.6986 Min. :0.1148 Length:100
## 1st Qu.:0.7639 1st Qu.:0.2825 Class :character
## Median :0.7862 Median :0.3635 Mode :character
## Mean :0.7827 Mean :0.3586
## 3rd Qu.:0.8056 3rd Qu.:0.4291
## Max. :0.8493 Max. :0.6027
t.test(resamplemod$Kappa ~ resamplemod$Mohinh)
##
## Welch Two Sample t-test
##
## data: resamplemod$Kappa by resamplemod$Mohinh
## t = 0.52532, df = 197.18, p-value = 0.6
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02231013 0.03851178
## sample estimates:
## mean in group Logitmg1 mean in group Logitmg2
## 0.3667391 0.3586382
# 4.9. Calculate AUC (Area under Curve) - Diện tích dưới đường cong ROC (Receiver Operating Characteristics), đánh giá mức độ lồi về phía trên của đường ROC (Tiêu chí thường được sử dụng khi so sánh các mô hình phân loại với nhau): sử dụng hàm roc trong gói "pROC": AUC càng gần 1 càng tốt, >0.7 xem như chấp nhận được
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
##
## ci
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
## Mô hình mg1
mgtrain1 <- train(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, method = "glm", family = "binomial", data = BS)
summary(mgtrain1)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6346 -0.6513 -0.3886 0.4517 3.0879
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.460888 1.014052 -0.455 0.649468
## `AG25-60` 0.509909 0.408317 1.249 0.211735
## `AG>60` 1.600156 0.809458 1.977 0.048062 *
## `OCOfficers/Workers` -0.601602 0.444097 -1.355 0.175525
## `OCHousewife/Unemployed` -0.672492 0.670571 -1.003 0.315927
## `OCFree labor` -1.067818 0.435640 -2.451 0.014240 *
## OCOthers 0.145226 0.567793 0.256 0.798126
## MCYes -0.803585 0.424113 -1.895 0.058127 .
## MOYes -0.142332 0.381606 -0.373 0.709162
## NM1 0.797740 0.828571 0.963 0.335653
## NM2 0.584359 0.799689 0.731 0.464942
## `NM>=3` 1.153470 0.800814 1.440 0.149762
## TMMotorbike 0.006570 0.439137 0.015 0.988063
## TMCar 0.810212 0.563283 1.438 0.150328
## `TMHichhiking/Taxi` 0.504619 0.497789 1.014 0.310716
## `TPPicking Children` -1.452278 0.431442 -3.366 0.000762 ***
## `TPEntertainment/Food` -0.855917 0.284101 -3.013 0.002589 **
## TPOthers -1.357762 0.918309 -1.479 0.139262
## SPYes -0.040310 0.446021 -0.090 0.927988
## `SPDon't know` -1.119875 0.598265 -1.872 0.061224 .
## `WIAt Bus Stops` 0.622671 0.304066 2.048 0.040578 *
## `WIInternet/Facebook` 1.294873 0.357963 3.617 0.000298 ***
## `WIFrom Friends` 0.905878 0.394216 2.298 0.021566 *
## WIOthers 0.670218 0.463461 1.446 0.148145
## BCYes -0.888957 0.231589 -3.839 0.000124 ***
## CAYes -0.915710 0.236190 -3.877 0.000106 ***
## BSCNormal -0.227395 0.404108 -0.563 0.573633
## BSCAgree -0.194065 0.362292 -0.536 0.592195
## BSANormal -0.596242 0.443520 -1.344 0.178838
## BSAAgree 0.096531 0.408890 0.236 0.813369
## BRCNormal -0.343956 0.431132 -0.798 0.424988
## BRCAgree -0.263950 0.341203 -0.774 0.439175
## MRComfortable 0.052656 0.393148 0.134 0.893455
## `MRLow price` 0.969812 0.556347 1.743 0.081302 .
## MRAccessibility 0.804047 0.436812 1.841 0.065663 .
## MRReliability -0.097818 0.520557 -0.188 0.850948
## MRothers 0.231989 0.656438 0.353 0.723784
## BUBYes -0.050910 0.235854 -0.216 0.829101
## SBMNormal 0.252138 0.286773 0.879 0.379279
## SBMAgree 1.963412 0.329928 5.951 2.66e-09 ***
## SBSNormal 0.008038 0.273902 0.029 0.976590
## SBSAgree 0.775681 0.384369 2.018 0.043585 *
## DI -0.032412 0.022708 -1.427 0.153481
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 618.39 on 681 degrees of freedom
## AIC: 704.39
##
## Number of Fisher Scoring iterations: 5
pred1mg <- predict(mgtrain1, newdata = BS, type = "prob")
rmg <- roc(BS$MSI, pred1mg$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg$auc
## Area under the curve: 0.838
plot(rmg, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)
plot(smooth(rmg), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)
## Có thể dùng các lệnh đơn giản
auc(rmg)
## Area under the curve: 0.838
plot.roc(rmg)
ci(rmg)
## 95% CI: 0.8053-0.8708 (DeLong)
## Mô hình mg2
mg2train1 <- train(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, data = BS, method = "glm", family = "binomial")
summary(mg2train1)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0246 -0.7121 -0.4431 0.5256 2.7444
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.04216 0.48436 0.087 0.930642
## `OCOfficers/Workers` -0.31020 0.27507 -1.128 0.259441
## `OCHousewife/Unemployed` -0.29222 0.52541 -0.556 0.578092
## `OCFree labor` -0.66363 0.28470 -2.331 0.019753 *
## OCOthers 0.91856 0.39809 2.307 0.021032 *
## MCYes -0.85258 0.31772 -2.683 0.007286 **
## `TPPicking Children` -1.05006 0.38954 -2.696 0.007026 **
## `TPEntertainment/Food` -0.80345 0.26544 -3.027 0.002471 **
## TPOthers -1.11305 0.81779 -1.361 0.173497
## SPYes -0.02669 0.41726 -0.064 0.948998
## `SPDon't know` -1.10368 0.55322 -1.995 0.046040 *
## `WIAt Bus Stops` 0.60130 0.28481 2.111 0.034751 *
## `WIInternet/Facebook` 1.18500 0.33140 3.576 0.000349 ***
## `WIFrom Friends` 0.98890 0.36275 2.726 0.006408 **
## WIOthers 0.60877 0.42965 1.417 0.156513
## BCYes -0.81650 0.20905 -3.906 9.39e-05 ***
## CAYes -0.92689 0.21197 -4.373 1.23e-05 ***
## SBMNormal 0.18031 0.22877 0.788 0.430593
## SBMAgree 2.13985 0.27266 7.848 4.22e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 656.17 on 705 degrees of freedom
## AIC: 694.17
##
## Number of Fisher Scoring iterations: 5
pred1mg2 <- predict(mg2train1, newdata = BS, type = "prob")
rmg2 <- roc(BS$MSI, pred1mg2$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg2$auc
## Area under the curve: 0.8071
auc(rmg2)
## Area under the curve: 0.8071
plot.roc(rmg2)
ci(rmg2)
## 95% CI: 0.7711-0.8431 (DeLong)
plot(rmg2, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)
plot(smooth(rmg2), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)
## Tính hệ số Gini = 2AUC-1
Ginimg <- 2*(rmg$auc)-1
Ginimg
## [1] 0.6760941
Ginimg2 <- 2*(rmg2$auc)-1
Ginimg2
## [1] 0.6141509
6. Importance of variables in models
library(caret)
varImp(mg2)
## Overall
## OCOfficers/Workers 1.1277131
## OCHousewife/Unemployed 0.5561732
## OCFree labor 2.3310002
## OCOthers 2.3074161
## MCYes 2.6834709
## TPPicking Children 2.6956221
## TPEntertainment/Food 3.0268242
## TPOthers 1.3610535
## SPYes 0.0639657
## SPDon't know 1.9950247
## WIAt Bus Stops 2.1112429
## WIInternet/Facebook 3.5757098
## WIFrom Friends 2.7261485
## WIOthers 1.4168956
## BCYes 3.9058054
## CAYes 4.3728317
## SBMNormal 0.7881773
## SBMAgree 7.8481853
# plot(varImp(mg2))
7. Probility of bus use - Prohibit model
# Prohibit Models
library(aod)
##
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
##
## rats
## Model mg1
prohibitmg1 <- glm(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial(link = "probit"), data = BS)
summary(prohibitmg1)
##
## Call:
## glm(formula = MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI +
## BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial(link = "probit"),
## data = BS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6222 -0.6786 -0.3974 0.4737 3.1306
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.409066 0.576083 -0.710 0.477653
## AG25-60 0.327883 0.230042 1.425 0.154065
## AG>60 0.872810 0.453828 1.923 0.054453 .
## OCOfficers/Workers -0.385970 0.251363 -1.536 0.124659
## OCHousewife/Unemployed -0.330146 0.365637 -0.903 0.366560
## OCFree labor -0.655956 0.247240 -2.653 0.007975 **
## OCOthers 0.022091 0.322865 0.068 0.945450
## MCYes -0.470112 0.243446 -1.931 0.053473 .
## MOYes -0.060611 0.219026 -0.277 0.781987
## NM1 0.497254 0.473525 1.050 0.293667
## NM2 0.366264 0.458343 0.799 0.424230
## NM>=3 0.686721 0.459089 1.496 0.134697
## TMMotorbike -0.025545 0.250540 -0.102 0.918789
## TMCar 0.377005 0.320117 1.178 0.238912
## TMHichhiking/Taxi 0.250315 0.287445 0.871 0.383849
## TPPicking Children -0.726259 0.230270 -3.154 0.001611 **
## TPEntertainment/Food -0.465732 0.159769 -2.915 0.003557 **
## TPOthers -0.805282 0.503666 -1.599 0.109856
## SPYes 0.037535 0.246830 0.152 0.879134
## SPDon't know -0.592327 0.320357 -1.849 0.064464 .
## WIAt Bus Stops 0.316128 0.169090 1.870 0.061541 .
## WIInternet/Facebook 0.701176 0.202539 3.462 0.000536 ***
## WIFrom Friends 0.502028 0.226176 2.220 0.026444 *
## WIOthers 0.330956 0.257032 1.288 0.197883
## BCYes -0.486766 0.130179 -3.739 0.000185 ***
## CAYes -0.495117 0.132620 -3.733 0.000189 ***
## BSCNormal -0.078621 0.228104 -0.345 0.730343
## BSCAgree -0.096139 0.206924 -0.465 0.642212
## BSANormal -0.336870 0.252355 -1.335 0.181908
## BSAAgree 0.046502 0.236533 0.197 0.844144
## BRCNormal -0.162264 0.242802 -0.668 0.503945
## BRCAgree -0.140211 0.197327 -0.711 0.477361
## MRComfortable 0.048848 0.220095 0.222 0.824360
## MRLow price 0.553298 0.316626 1.747 0.080554 .
## MRAccessibility 0.462671 0.245769 1.883 0.059762 .
## MRReliability -0.038754 0.291157 -0.133 0.894112
## MRothers 0.180552 0.378096 0.478 0.632986
## BUBYes -0.001546 0.133257 -0.012 0.990742
## SBMNormal 0.145600 0.163637 0.890 0.373588
## SBMAgree 1.126078 0.189087 5.955 2.6e-09 ***
## SBSNormal -0.006763 0.156507 -0.043 0.965531
## SBSAgree 0.438031 0.223619 1.959 0.050133 .
## DI -0.016295 0.012740 -1.279 0.200879
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 621.07 on 681 degrees of freedom
## AIC: 707.07
##
## Number of Fisher Scoring iterations: 6
## Model mg2
prohibitmg2 <- glm(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial(link = "probit"), data = BS)
summary(prohibitmg2)
##
## Call:
## glm(formula = MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial(link = "probit"),
## data = BS)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9872 -0.7276 -0.4385 0.5471 2.8378
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.02832 0.27459 -0.103 0.917853
## OCOfficers/Workers -0.19292 0.15721 -1.227 0.219764
## OCHousewife/Unemployed -0.13386 0.28889 -0.463 0.643099
## OCFree labor -0.39711 0.16316 -2.434 0.014940 *
## OCOthers 0.49647 0.23159 2.144 0.032054 *
## MCYes -0.48138 0.18515 -2.600 0.009325 **
## TPPicking Children -0.56673 0.21358 -2.654 0.007965 **
## TPEntertainment/Food -0.45503 0.15033 -3.027 0.002470 **
## TPOthers -0.67399 0.44839 -1.503 0.132808
## SPYes 0.02745 0.23117 0.119 0.905494
## SPDon't know -0.62736 0.29901 -2.098 0.035892 *
## WIAt Bus Stops 0.30447 0.16024 1.900 0.057412 .
## WIInternet/Facebook 0.64101 0.19027 3.369 0.000755 ***
## WIFrom Friends 0.53193 0.21178 2.512 0.012014 *
## WIOthers 0.30776 0.24237 1.270 0.204152
## BCYes -0.45796 0.11899 -3.849 0.000119 ***
## CAYes -0.51245 0.11987 -4.275 1.91e-05 ***
## SBMNormal 0.12153 0.13025 0.933 0.350802
## SBMAgree 1.26007 0.15521 8.118 4.72e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 841.60 on 723 degrees of freedom
## Residual deviance: 657.32 on 705 degrees of freedom
## AIC: 695.32
##
## Number of Fisher Scoring iterations: 6
# Đánh giá tác động tổng thể Purpose (Total effect of Travel Purpose) trong mô hinhg mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với Purpose là từ 7:9)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 7:9) # Giá trị 15.2 ứng với p-value < 0.05 --> KL: tác động tổng thể về TP (mục đich chuyến đi) của người sử dụng lên khả năng chuyển đổi xe buýt là tồn tại và có ý nghĩa thống kê
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 15.2, df = 3, P(> X2) = 0.0016
# Đánh giá tác động tổng thể WI (Total effect of WI) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với WI là từ 12:15)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 12:15)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 13.4, df = 4, P(> X2) = 0.0093
# Đánh giá tác động tổng thể SBM (Total effect of SBM) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với SBM là từ 18:19)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 18:19)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 70.2, df = 2, P(> X2) = 5.6e-16
# Đánh giá tác động tổng thể SP (Total effect of SP) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với SP là từ 10:11)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 10:11)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 7.2, df = 2, P(> X2) = 0.028
# Đánh giá tác động tổng thể OC (Total effect of OC) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với OC là từ 2:5)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 2:5)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 16.4, df = 4, P(> X2) = 0.0025
8. Perception of bus in two groups people: Shift to bus and non ship to bus 8.1. Tạo dữ liệu biểu đồ likert tương ứng với 2 nhóm : Chuyển đổi BP_MS và không chuyển đổi BP_nMS
# Create data for Likert graph by BS (remove ID and TM =7-Bus)
head(MS)
## ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1 1 0 1 6 3 3 1 2 10 1 1 0 4 1 0 1 15 1 3 3 3 4 4
## 2 2 0 1 3 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5
## 3 3 0 1 3 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4
## 4 4 0 1 3 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4
## 5 5 0 1 3 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4
## 6 6 0 1 4 1 4 1 20 30 1 1 0 5 3 0 2 50 0 3 2 4 4 4
## BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1 4 4 3 4 4 4 4 1 1 1 0 0 5 4 2 2 0 0 0 0 0 1 2
## 2 2 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3
## 3 3 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3
## 4 4 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3
## 5 2 2 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2
## 6 3 3 3 2 4 4 4 2 0 0 0 1 4 3 4 2 1 1 1 1 1 1 2
## NR
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
str(MS)
## 'data.frame': 810 obs. of 47 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CA : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TM : int 6 3 3 3 3 4 4 3 3 3 ...
## $ TP : int 3 1 1 1 1 1 1 1 1 1 ...
## $ FR : int 3 4 4 4 4 4 4 4 4 4 ...
## $ DT : int 1 1 3 1 3 1 1 1 2 1 ...
## $ DI : num 2 8 5 5 8 20 15 10 12 10 ...
## $ TI : num 10 15 15 10 15 30 20 25 30 25 ...
## $ SC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TS : int 0 1 1 1 0 0 0 0 1 1 ...
## $ MR : int 4 2 4 2 2 5 5 2 2 2 ...
## $ WE : int 1 3 1 1 3 3 3 3 1 3 ...
## $ WK : int 0 0 0 0 0 0 0 1 0 0 ...
## $ NBR: int 1 1 2 4 2 2 4 2 1 4 ...
## $ CO : num 15 5 3 10 5 50 40 5 7 7 ...
## $ BUB: int 1 0 1 1 1 0 0 1 1 1 ...
## $ SBM: int 3 2 2 2 2 3 2 3 3 3 ...
## $ SBS: int 3 2 2 2 2 2 2 2 3 2 ...
## $ BST: int 3 2 2 3 3 4 4 5 4 2 ...
## $ BSC: int 4 4 4 4 4 4 4 5 4 5 ...
## $ BSA: int 4 5 4 4 4 4 4 5 4 5 ...
## $ BRE: int 4 2 3 4 2 3 2 3 3 3 ...
## $ BCO: int 4 2 3 4 2 3 2 3 3 2 ...
## $ BIN: int 3 4 4 4 3 3 3 3 3 4 ...
## $ BHE: int 4 2 2 3 2 2 2 3 3 2 ...
## $ BEN: int 4 5 4 3 4 4 5 5 4 5 ...
## $ BRC: int 4 5 4 3 4 4 5 5 4 5 ...
## $ BWE: int 4 3 4 5 4 4 5 5 4 4 ...
## $ SP : int 1 2 1 1 2 2 2 1 1 1 ...
## $ BI : int 1 0 1 1 0 0 0 1 1 1 ...
## $ WI : int 1 0 2 1 0 0 0 2 3 2 ...
## $ MSI: int 0 0 0 0 0 0 0 1 0 0 ...
## $ GD : int 0 0 1 1 0 1 1 0 0 1 ...
## $ AG : int 5 3 3 3 4 4 5 4 3 4 ...
## $ OC : int 4 6 2 2 6 3 3 7 7 3 ...
## $ IN : int 2 3 4 3 3 4 4 2 3 3 ...
## $ NC : int 2 1 0 0 1 2 2 0 1 0 ...
## $ MC : int 0 1 1 1 1 1 1 1 1 1 ...
## $ CC : int 0 0 0 0 0 1 1 0 0 0 ...
## $ BO : int 0 1 0 1 0 1 0 0 1 0 ...
## $ MO : int 0 1 1 1 1 1 1 1 1 1 ...
## $ RO : int 0 0 0 0 0 1 1 0 0 0 ...
## $ NB : int 1 1 0 1 0 1 0 0 1 0 ...
## $ NM : int 2 3 3 3 2 2 2 2 3 3 ...
## $ NR : int 1 0 0 0 0 1 1 0 0 0 ...
dim(MS)
## [1] 810 47
BS_likert = subset(MS,TM < 7)
head(BS_likert)
## ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1 1 0 1 6 3 3 1 2 10 1 1 0 4 1 0 1 15 1 3 3 3 4 4
## 2 2 0 1 3 1 4 1 8 15 1 1 1 2 3 0 1 5 0 2 2 2 4 5
## 3 3 0 1 3 1 4 3 5 15 1 1 1 4 1 0 2 3 1 2 2 2 4 4
## 4 4 0 1 3 1 4 1 5 10 1 1 1 2 1 0 4 10 1 2 2 3 4 4
## 5 5 0 1 3 1 4 3 8 15 1 1 0 2 3 0 2 5 1 2 2 3 4 4
## 6 6 0 1 4 1 4 1 20 30 1 1 0 5 3 0 2 50 0 3 2 4 4 4
## BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1 4 4 3 4 4 4 4 1 1 1 0 0 5 4 2 2 0 0 0 0 0 1 2
## 2 2 2 4 2 5 5 3 2 0 0 0 0 3 6 3 1 1 0 1 1 0 1 3
## 3 3 3 4 2 4 4 4 1 1 2 0 1 3 2 4 0 1 0 0 1 0 0 3
## 4 4 4 4 3 3 3 5 1 1 1 0 1 3 2 3 0 1 0 1 1 0 1 3
## 5 2 2 3 2 4 4 4 2 0 0 0 0 4 6 3 1 1 0 0 1 0 0 2
## 6 3 3 3 2 4 4 4 2 0 0 0 1 4 3 4 2 1 1 1 1 1 1 2
## NR
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 1
str(BS_likert)
## 'data.frame': 724 obs. of 47 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CA : int 0 0 0 0 0 0 0 0 0 0 ...
## $ BC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TM : int 6 3 3 3 3 4 4 3 3 3 ...
## $ TP : int 3 1 1 1 1 1 1 1 1 1 ...
## $ FR : int 3 4 4 4 4 4 4 4 4 4 ...
## $ DT : int 1 1 3 1 3 1 1 1 2 1 ...
## $ DI : num 2 8 5 5 8 20 15 10 12 10 ...
## $ TI : num 10 15 15 10 15 30 20 25 30 25 ...
## $ SC : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ TS : int 0 1 1 1 0 0 0 0 1 1 ...
## $ MR : int 4 2 4 2 2 5 5 2 2 2 ...
## $ WE : int 1 3 1 1 3 3 3 3 1 3 ...
## $ WK : int 0 0 0 0 0 0 0 1 0 0 ...
## $ NBR: int 1 1 2 4 2 2 4 2 1 4 ...
## $ CO : num 15 5 3 10 5 50 40 5 7 7 ...
## $ BUB: int 1 0 1 1 1 0 0 1 1 1 ...
## $ SBM: int 3 2 2 2 2 3 2 3 3 3 ...
## $ SBS: int 3 2 2 2 2 2 2 2 3 2 ...
## $ BST: int 3 2 2 3 3 4 4 5 4 2 ...
## $ BSC: int 4 4 4 4 4 4 4 5 4 5 ...
## $ BSA: int 4 5 4 4 4 4 4 5 4 5 ...
## $ BRE: int 4 2 3 4 2 3 2 3 3 3 ...
## $ BCO: int 4 2 3 4 2 3 2 3 3 2 ...
## $ BIN: int 3 4 4 4 3 3 3 3 3 4 ...
## $ BHE: int 4 2 2 3 2 2 2 3 3 2 ...
## $ BEN: int 4 5 4 3 4 4 5 5 4 5 ...
## $ BRC: int 4 5 4 3 4 4 5 5 4 5 ...
## $ BWE: int 4 3 4 5 4 4 5 5 4 4 ...
## $ SP : int 1 2 1 1 2 2 2 1 1 1 ...
## $ BI : int 1 0 1 1 0 0 0 1 1 1 ...
## $ WI : int 1 0 2 1 0 0 0 2 3 2 ...
## $ MSI: int 0 0 0 0 0 0 0 1 0 0 ...
## $ GD : int 0 0 1 1 0 1 1 0 0 1 ...
## $ AG : int 5 3 3 3 4 4 5 4 3 4 ...
## $ OC : int 4 6 2 2 6 3 3 7 7 3 ...
## $ IN : int 2 3 4 3 3 4 4 2 3 3 ...
## $ NC : int 2 1 0 0 1 2 2 0 1 0 ...
## $ MC : int 0 1 1 1 1 1 1 1 1 1 ...
## $ CC : int 0 0 0 0 0 1 1 0 0 0 ...
## $ BO : int 0 1 0 1 0 1 0 0 1 0 ...
## $ MO : int 0 1 1 1 1 1 1 1 1 1 ...
## $ RO : int 0 0 0 0 0 1 1 0 0 0 ...
## $ NB : int 1 1 0 1 0 1 0 0 1 0 ...
## $ NM : int 2 3 3 3 2 2 2 2 3 3 ...
## $ NR : int 1 0 0 0 0 1 1 0 0 0 ...
dim(BS_likert)
## [1] 724 47
## Create dat_BST
dat_BST <- BS_likert[ , c(18,19,20,21,34)]
head(dat_BST)
## BUB SBM SBS BST MSI
## 1 1 3 3 3 0
## 2 0 2 2 2 0
## 3 1 2 2 2 0
## 4 1 2 2 3 0
## 5 1 2 2 3 0
## 6 0 3 2 4 0
dim(dat_BST)
## [1] 724 5
size <- 724
dat_BST$BPE <- rep("BST", size = size) # Tạo biến mới BPE
names(dat_BST)[names(dat_BST) == "BST"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BST)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 3 0 BST
## 2 0 2 2 2 0 BST
## 3 1 2 2 2 0 BST
## 4 1 2 2 3 0 BST
## 5 1 2 2 3 0 BST
## 6 0 3 2 4 0 BST
## Create dat_BSC
dat_BSC <- BS_likert[ , c(18,19,20,22,34)]
head(dat_BSC)
## BUB SBM SBS BSC MSI
## 1 1 3 3 4 0
## 2 0 2 2 4 0
## 3 1 2 2 4 0
## 4 1 2 2 4 0
## 5 1 2 2 4 0
## 6 0 3 2 4 0
dim(dat_BSC)
## [1] 724 5
str(dat_BSC)
## 'data.frame': 724 obs. of 5 variables:
## $ BUB: int 1 0 1 1 1 0 0 1 1 1 ...
## $ SBM: int 3 2 2 2 2 3 2 3 3 3 ...
## $ SBS: int 3 2 2 2 2 2 2 2 3 2 ...
## $ BSC: int 4 4 4 4 4 4 4 5 4 5 ...
## $ MSI: int 0 0 0 0 0 0 0 1 0 0 ...
dat_BSC$BPE <- rep("BSC", size = size) # Tạo biến mới BPE
names(dat_BSC)[names(dat_BSC) == "BSC"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BSC)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 4 0 BSC
## 2 0 2 2 4 0 BSC
## 3 1 2 2 4 0 BSC
## 4 1 2 2 4 0 BSC
## 5 1 2 2 4 0 BSC
## 6 0 3 2 4 0 BSC
## Create dat_BSA
dat_BSA <- BS_likert[ , c(18,19,20,23,34)]
head(dat_BSA)
## BUB SBM SBS BSA MSI
## 1 1 3 3 4 0
## 2 0 2 2 5 0
## 3 1 2 2 4 0
## 4 1 2 2 4 0
## 5 1 2 2 4 0
## 6 0 3 2 4 0
dim(dat_BSA)
## [1] 724 5
dat_BSA$BPE <- rep("BSA", size = size) # Tạo biến mới BPE
names(dat_BSA)[names(dat_BSA) == "BSA"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BSA)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 4 0 BSA
## 2 0 2 2 5 0 BSA
## 3 1 2 2 4 0 BSA
## 4 1 2 2 4 0 BSA
## 5 1 2 2 4 0 BSA
## 6 0 3 2 4 0 BSA
## Create dat_BRE
dat_BRE <- BS_likert[ , c(18,19,20,24,34)]
head(dat_BRE)
## BUB SBM SBS BRE MSI
## 1 1 3 3 4 0
## 2 0 2 2 2 0
## 3 1 2 2 3 0
## 4 1 2 2 4 0
## 5 1 2 2 2 0
## 6 0 3 2 3 0
dim(dat_BRE)
## [1] 724 5
dat_BRE$BPE <- rep("BRE", size = size) # Tạo biến mới BPE
names(dat_BRE)[names(dat_BRE) == "BRE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BRE)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 4 0 BRE
## 2 0 2 2 2 0 BRE
## 3 1 2 2 3 0 BRE
## 4 1 2 2 4 0 BRE
## 5 1 2 2 2 0 BRE
## 6 0 3 2 3 0 BRE
## Create dat_BCO
dat_BCO <- BS_likert[ , c(18,19,20,25,34)]
head(dat_BCO)
## BUB SBM SBS BCO MSI
## 1 1 3 3 4 0
## 2 0 2 2 2 0
## 3 1 2 2 3 0
## 4 1 2 2 4 0
## 5 1 2 2 2 0
## 6 0 3 2 3 0
dim(dat_BCO)
## [1] 724 5
dat_BCO$BPE <- rep("BCO", size = size) # Tạo biến mới BPE
names(dat_BCO)[names(dat_BCO) == "BCO"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BCO)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 4 0 BCO
## 2 0 2 2 2 0 BCO
## 3 1 2 2 3 0 BCO
## 4 1 2 2 4 0 BCO
## 5 1 2 2 2 0 BCO
## 6 0 3 2 3 0 BCO
## Create dat_BIN
dat_BIN <- BS_likert[ , c(18,19,20,26,34)]
head(dat_BIN)
## BUB SBM SBS BIN MSI
## 1 1 3 3 3 0
## 2 0 2 2 4 0
## 3 1 2 2 4 0
## 4 1 2 2 4 0
## 5 1 2 2 3 0
## 6 0 3 2 3 0
dim(dat_BIN)
## [1] 724 5
dat_BIN$BPE <- rep("BIN", size = size) # Tạo biến mới BPE
names(dat_BIN)[names(dat_BIN) == "BIN"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BIN)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 3 0 BIN
## 2 0 2 2 4 0 BIN
## 3 1 2 2 4 0 BIN
## 4 1 2 2 4 0 BIN
## 5 1 2 2 3 0 BIN
## 6 0 3 2 3 0 BIN
## Create dat_BHE
dat_BHE <- BS_likert[ , c(18,19,20,27,34)]
head(dat_BHE)
## BUB SBM SBS BHE MSI
## 1 1 3 3 4 0
## 2 0 2 2 2 0
## 3 1 2 2 2 0
## 4 1 2 2 3 0
## 5 1 2 2 2 0
## 6 0 3 2 2 0
dim(dat_BHE)
## [1] 724 5
dat_BHE$BPE <- rep("BHE", size = size) # Tạo biến mới BPE
names(dat_BHE)[names(dat_BHE) == "BHE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BHE)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 4 0 BHE
## 2 0 2 2 2 0 BHE
## 3 1 2 2 2 0 BHE
## 4 1 2 2 3 0 BHE
## 5 1 2 2 2 0 BHE
## 6 0 3 2 2 0 BHE
## Create dat_BEN
dat_BEN <- BS_likert[ , c(18,19,20,28,34)]
head(dat_BEN)
## BUB SBM SBS BEN MSI
## 1 1 3 3 4 0
## 2 0 2 2 5 0
## 3 1 2 2 4 0
## 4 1 2 2 3 0
## 5 1 2 2 4 0
## 6 0 3 2 4 0
dim(dat_BEN)
## [1] 724 5
dat_BEN$BPE <- rep("BEN", size = size) # Tạo biến mới BPE
names(dat_BEN)[names(dat_BEN) == "BEN"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BEN)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 4 0 BEN
## 2 0 2 2 5 0 BEN
## 3 1 2 2 4 0 BEN
## 4 1 2 2 3 0 BEN
## 5 1 2 2 4 0 BEN
## 6 0 3 2 4 0 BEN
## Create dat_BRC
dat_BRC <- BS_likert[ , c(18,19,20,29,34)]
head(dat_BRC)
## BUB SBM SBS BRC MSI
## 1 1 3 3 4 0
## 2 0 2 2 5 0
## 3 1 2 2 4 0
## 4 1 2 2 3 0
## 5 1 2 2 4 0
## 6 0 3 2 4 0
dim(dat_BRC)
## [1] 724 5
dat_BRC$BPE <- rep("BRC", size = size) # Tạo biến mới BPE
names(dat_BRC)[names(dat_BRC) == "BRC"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BRC)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 4 0 BRC
## 2 0 2 2 5 0 BRC
## 3 1 2 2 4 0 BRC
## 4 1 2 2 3 0 BRC
## 5 1 2 2 4 0 BRC
## 6 0 3 2 4 0 BRC
## Create dat_BWE
dat_BWE <- BS_likert[ , c(18,19,20,30,34)]
head(dat_BWE)
## BUB SBM SBS BWE MSI
## 1 1 3 3 4 0
## 2 0 2 2 3 0
## 3 1 2 2 4 0
## 4 1 2 2 5 0
## 5 1 2 2 4 0
## 6 0 3 2 4 0
dim(dat_BWE)
## [1] 724 5
dat_BWE$BPE <- rep("BWE", size = size) # Tạo biến mới BPE
names(dat_BWE)[names(dat_BWE) == "BWE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BWE)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 4 0 BWE
## 2 0 2 2 3 0 BWE
## 3 1 2 2 4 0 BWE
## 4 1 2 2 5 0 BWE
## 5 1 2 2 4 0 BWE
## 6 0 3 2 4 0 BWE
## Tạo dữ liệu likert từ 10 dữ liệu: dat_BST, dat_BSC, dat_BSA, dat_BRE, dat_BCO, dat_BIN, dat_BHE, dat_BEN, dat_BRC, dat_BWE
B.Per <- rbind(dat_BST, dat_BSC, dat_BSA, dat_BRE, dat_BCO, dat_BIN, dat_BHE, dat_BEN, dat_BRC, dat_BWE)
head(B.Per)
## BUB SBM SBS Res MSI BPE
## 1 1 3 3 3 0 BST
## 2 0 2 2 2 0 BST
## 3 1 2 2 2 0 BST
## 4 1 2 2 3 0 BST
## 5 1 2 2 3 0 BST
## 6 0 3 2 4 0 BST
str(B.Per)
## 'data.frame': 7240 obs. of 6 variables:
## $ BUB: int 1 0 1 1 1 0 0 1 1 1 ...
## $ SBM: int 3 2 2 2 2 3 2 3 3 3 ...
## $ SBS: int 3 2 2 2 2 2 2 2 3 2 ...
## $ Res: int 3 2 2 3 3 4 4 5 4 2 ...
## $ MSI: int 0 0 0 0 0 0 0 1 0 0 ...
## $ BPE: chr "BST" "BST" "BST" "BST" ...
dim(B.Per)
## [1] 7240 6
# Cách 2: Tạo dữ liệu thủ công trên excel với các cột BPE, Res, BUB, MSI, SBM, SBS: t_likert = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 3-Mode shift\\So lieu mode choice tu phan 1\\8. MS-Bieu do Likert.csv" ; BP_likert = read.csv(t_likert, header = T, sep = ";")
# Data Bus perception of people shift to bus
BP_MS = subset(B.Per,MSI == 1)
str(BP_MS)
## 'data.frame': 1940 obs. of 6 variables:
## $ BUB: int 1 1 1 1 1 1 1 1 0 0 ...
## $ SBM: int 3 1 4 4 5 5 4 4 3 4 ...
## $ SBS: int 2 1 3 2 4 4 4 4 2 2 ...
## $ Res: int 5 3 2 3 1 1 4 4 2 2 ...
## $ MSI: int 1 1 1 1 1 1 1 1 1 1 ...
## $ BPE: chr "BST" "BST" "BST" "BST" ...
dim(BP_MS)
## [1] 1940 6
BP_MS = BP_MS[ , c(4, 6)]
dim(BP_MS)
## [1] 1940 2
head(BP_MS)
## Res BPE
## 8 5 BST
## 12 3 BST
## 15 2 BST
## 16 3 BST
## 18 1 BST
## 19 1 BST
# Data Bus perception of people shift to bus (MSI = 1) and used to use bus before (BUB = 1)
BP_MS_BU = subset(B.Per,BUB == 1 & MSI == 1)
str(BP_MS_BU)
## 'data.frame': 1400 obs. of 6 variables:
## $ BUB: int 1 1 1 1 1 1 1 1 1 1 ...
## $ SBM: int 3 1 4 4 5 5 4 4 4 4 ...
## $ SBS: int 2 1 3 2 4 4 4 4 2 3 ...
## $ Res: int 5 3 2 3 1 1 4 4 4 4 ...
## $ MSI: int 1 1 1 1 1 1 1 1 1 1 ...
## $ BPE: chr "BST" "BST" "BST" "BST" ...
dim(BP_MS_BU)
## [1] 1400 6
BP_MS_BU = BP_MS_BU[ , c(4, 6)]
dim(BP_MS_BU)
## [1] 1400 2
head(BP_MS_BU)
## Res BPE
## 8 5 BST
## 12 3 BST
## 15 2 BST
## 16 3 BST
## 18 1 BST
## 19 1 BST
# Data Bus perception of people don't shift to bus
BP_nMS = subset(B.Per,MSI == 0)
str(BP_nMS)
## 'data.frame': 5300 obs. of 6 variables:
## $ BUB: int 1 0 1 1 1 0 0 1 1 1 ...
## $ SBM: int 3 2 2 2 2 3 2 3 3 3 ...
## $ SBS: int 3 2 2 2 2 2 2 3 2 2 ...
## $ Res: int 3 2 2 3 3 4 4 4 2 2 ...
## $ MSI: int 0 0 0 0 0 0 0 0 0 0 ...
## $ BPE: chr "BST" "BST" "BST" "BST" ...
dim(BP_nMS)
## [1] 5300 6
BP_nMS = BP_nMS[ , c(4, 6)]
dim(BP_nMS)
## [1] 5300 2
head(BP_nMS)
## Res BPE
## 1 3 BST
## 2 2 BST
## 3 2 BST
## 4 3 BST
## 5 3 BST
## 6 4 BST
8.2. Vẽ biểu đồ likert cho nhóm đối tượng chuyển đổi - BP_MS
# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_MS)
## Res BPE
## 8 5 BST
## 12 3 BST
## 15 2 BST
## 16 3 BST
## 18 1 BST
## 19 1 BST
attach(BP_MS)
BP_MS = within(BP_MS, {
Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
})
str(BP_MS)
## 'data.frame': 1940 obs. of 2 variables:
## $ Res: Factor w/ 5 levels "Very Disagree",..: 5 3 2 3 1 1 4 4 2 2 ...
## $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_MS)
## Res BPE
## Very Disagree:136 BST :194
## Disagree :334 BSC :194
## Normal :440 BSA :194
## Agree :827 BRE :194
## Very Agree :203 BCO :194
## BIN :194
## (Other):776
t = compareGroups(Res ~ BPE, data = BP_MS)
createTable(t)
##
## --------Summary descriptives table by 'Res'---------
##
## ____________________________________________________________________________
## Very Disagree Disagree Normal Agree Very Agree p.overall
## N=136 N=334 N=440 N=827 N=203
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## BPE: <0.001
## BST 13 (9.56%) 53 (15.9%) 57 (13.0%) 62 (7.50%) 9 (4.43%)
## BSC 15 (11.0%) 7 (2.10%) 34 (7.73%) 93 (11.2%) 45 (22.2%)
## BSA 14 (10.3%) 63 (18.9%) 57 (13.0%) 54 (6.53%) 6 (2.96%)
## BRE 9 (6.62%) 57 (17.1%) 73 (16.6%) 49 (5.93%) 6 (2.96%)
## BCO 19 (14.0%) 10 (2.99%) 21 (4.77%) 96 (11.6%) 48 (23.6%)
## BIN 12 (8.82%) 29 (8.68%) 57 (13.0%) 83 (10.0%) 13 (6.40%)
## BHE 11 (8.09%) 12 (3.59%) 31 (7.05%) 122 (14.8%) 18 (8.87%)
## BEN 12 (8.82%) 18 (5.39%) 32 (7.27%) 116 (14.0%) 16 (7.88%)
## BRC 17 (12.5%) 75 (22.5%) 53 (12.0%) 46 (5.56%) 3 (1.48%)
## BWE 14 (10.3%) 10 (2.99%) 25 (5.68%) 106 (12.8%) 39 (19.2%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_MS %>%
group_by(BPE, Res) %>%
count() %>%
ungroup() %>%
group_by(BPE) %>%
mutate(percent = 100*n / sum(n)) %>%
mutate(percent = round(percent, 2)) %>%
mutate(bar_text = paste0(percent, "%")) %>%
ungroup() -> df_for_ploting
df_for_ploting %>%
filter(Res == Res[5]) %>%
arrange(percent) %>%
pull(BPE) -> order_x
order_x
## [1] BRC BSA BRE BST BIN BEN BHE BWE BSC BCO
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot:
my_colors <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font <- "Roboto Condensed"
df_for_ploting %>%
mutate(BPE = factor(BPE, levels = order_x), Res = factor(Res, levels = Res[5:1])) -> df_odered
df_odered
## # A tibble: 50 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 13 6.7 6.7%
## 2 BST Disagree 53 27.3 27.32%
## 3 BST Normal 57 29.4 29.38%
## 4 BST Agree 62 32.0 31.96%
## 5 BST Very Agree 9 4.64 4.64%
## 6 BSC Very Disagree 15 7.73 7.73%
## 7 BSC Disagree 7 3.61 3.61%
## 8 BSC Normal 34 17.5 17.53%
## 9 BSC Agree 93 47.9 47.94%
## 10 BSC Very Agree 45 23.2 23.2%
## # ... with 40 more rows
library(extrafont)
## Registering fonts with R
theme_set(theme_minimal())
gg <- df_odered %>%
ggplot(aes(x = BPE, y = percent, fill = Res)) +
geom_col(width = 0.8) +
coord_flip() +
scale_fill_manual(values = my_colors[5:1], name = "") +
theme(legend.position = "top") +
theme(text = element_text(family = my_font)) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) +
theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) +
theme(axis.text = element_text(color = "grey20", size = 10.2)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
theme(legend.key.height = unit(0.15, "mm")) +
labs(x = NULL, y = NULL,
title = "Bus perception of people who have ability to shift to bus",
subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# For displaying percent of "Very Disagree":
df_odered %>%
filter(Res == "Very Disagree") %>%
filter(percent >= 3) -> df_for_text1
df_for_text1
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 13 6.7 6.7%
## 2 BSC Very Disagree 15 7.73 7.73%
## 3 BSA Very Disagree 14 7.22 7.22%
## 4 BRE Very Disagree 9 4.64 4.64%
## 5 BCO Very Disagree 19 9.79 9.79%
## 6 BIN Very Disagree 12 6.19 6.19%
## 7 BHE Very Disagree 11 5.67 5.67%
## 8 BEN Very Disagree 12 6.19 6.19%
## 9 BRC Very Disagree 17 8.76 8.76%
## 10 BWE Very Disagree 14 7.22 7.22%
# For displaying percent of "Disagree":
df_odered %>%
filter(Res == "Disagree") %>%
filter(percent >= 3) -> df_for_text2
df_for_text2
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Disagree 53 27.3 27.32%
## 2 BSC Disagree 7 3.61 3.61%
## 3 BSA Disagree 63 32.5 32.47%
## 4 BRE Disagree 57 29.4 29.38%
## 5 BCO Disagree 10 5.15 5.15%
## 6 BIN Disagree 29 15.0 14.95%
## 7 BHE Disagree 12 6.19 6.19%
## 8 BEN Disagree 18 9.28 9.28%
## 9 BRC Disagree 75 38.7 38.66%
## 10 BWE Disagree 10 5.15 5.15%
# For displaying percent of "Normal":
df_odered %>%
filter(Res == "Normal") %>%
filter(percent >= 3) -> df_for_text3
df_for_text3
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Normal 57 29.4 29.38%
## 2 BSC Normal 34 17.5 17.53%
## 3 BSA Normal 57 29.4 29.38%
## 4 BRE Normal 73 37.6 37.63%
## 5 BCO Normal 21 10.8 10.82%
## 6 BIN Normal 57 29.4 29.38%
## 7 BHE Normal 31 16.0 15.98%
## 8 BEN Normal 32 16.5 16.49%
## 9 BRC Normal 53 27.3 27.32%
## 10 BWE Normal 25 12.9 12.89%
# For displaying percent of "Agree":
df_odered %>%
filter(Res == "Agree") %>%
filter(percent >= 3) -> df_for_text4
df_for_text4
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Agree 62 32.0 31.96%
## 2 BSC Agree 93 47.9 47.94%
## 3 BSA Agree 54 27.8 27.84%
## 4 BRE Agree 49 25.3 25.26%
## 5 BCO Agree 96 49.5 49.48%
## 6 BIN Agree 83 42.8 42.78%
## 7 BHE Agree 122 62.9 62.89%
## 8 BEN Agree 116 59.8 59.79%
## 9 BRC Agree 46 23.7 23.71%
## 10 BWE Agree 106 54.6 54.64%
# For displaying percent of "Very Agree":
df_odered %>%
filter(Res == "Very Agree") %>%
filter(percent >= 3) -> df_for_text5
df_for_text5
## # A tibble: 9 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 9 4.64 4.64%
## 2 BSC Very Agree 45 23.2 23.2%
## 3 BSA Very Agree 6 3.09 3.09%
## 4 BRE Very Agree 6 3.09 3.09%
## 5 BCO Very Agree 48 24.7 24.74%
## 6 BIN Very Agree 13 6.7 6.7%
## 7 BHE Very Agree 18 9.28 9.28%
## 8 BEN Very Agree 16 8.25 8.25%
## 9 BWE Very Agree 39 20.1 20.1%
# For displaying percent of "Very Agree":
df_odered %>%
filter(Res == "Very Agree") -> df_for_text
df_for_text
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 9 4.64 4.64%
## 2 BSC Very Agree 45 23.2 23.2%
## 3 BSA Very Agree 6 3.09 3.09%
## 4 BRE Very Agree 6 3.09 3.09%
## 5 BCO Very Agree 48 24.7 24.74%
## 6 BIN Very Agree 13 6.7 6.7%
## 7 BHE Very Agree 18 9.28 9.28%
## 8 BEN Very Agree 16 8.25 8.25%
## 9 BRC Very Agree 3 1.55 1.55%
## 10 BWE Very Agree 39 20.1 20.1%
# Ad text layers-thay đổi giá trị y để thể hiện vị trí ghi text phù hợp
gg +
geom_text(data = df_for_text1 %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font) +
geom_text(data = df_for_text1 %>% filter(BPE == "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font) +
geom_text(data = df_for_text, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
8.3. Vẽ biểu đồ likert cho nhóm đối tượng chuyển đổi và đã từng sử dụng xe buýt trước đó - BP_MS_BU
# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_MS_BU)
## Res BPE
## 8 5 BST
## 12 3 BST
## 15 2 BST
## 16 3 BST
## 18 1 BST
## 19 1 BST
attach(BP_MS_BU)
## The following objects are masked from BP_MS:
##
## BPE, Res
BP_MS_BU = within(BP_MS_BU, {
Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
})
str(BP_MS_BU)
## 'data.frame': 1400 obs. of 2 variables:
## $ Res: Factor w/ 5 levels "Very Disagree",..: 5 3 2 3 1 1 4 4 4 4 ...
## $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_MS_BU)
## Res BPE
## Very Disagree:108 BST :140
## Disagree :202 BSC :140
## Normal :339 BSA :140
## Agree :603 BRE :140
## Very Agree :148 BCO :140
## BIN :140
## (Other):560
t_BU = compareGroups(Res ~ BPE, data = BP_MS_BU)
createTable(t_BU)
##
## --------Summary descriptives table by 'Res'---------
##
## ___________________________________________________________________________
## Very Disagree Disagree Normal Agree Very Agree p.overall
## N=108 N=202 N=339 N=603 N=148
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## BPE: <0.001
## BST 9 (8.33%) 33 (16.3%) 45 (13.3%) 45 (7.46%) 8 (5.41%)
## BSC 14 (13.0%) 3 (1.49%) 28 (8.26%) 67 (11.1%) 28 (18.9%)
## BSA 10 (9.26%) 47 (23.3%) 42 (12.4%) 36 (5.97%) 5 (3.38%)
## BRE 8 (7.41%) 33 (16.3%) 56 (16.5%) 37 (6.14%) 6 (4.05%)
## BCO 14 (13.0%) 5 (2.48%) 18 (5.31%) 72 (11.9%) 31 (20.9%)
## BIN 9 (8.33%) 18 (8.91%) 42 (12.4%) 59 (9.78%) 12 (8.11%)
## BHE 10 (9.26%) 4 (1.98%) 23 (6.78%) 89 (14.8%) 14 (9.46%)
## BEN 12 (11.1%) 9 (4.46%) 25 (7.37%) 82 (13.6%) 12 (8.11%)
## BRC 12 (11.1%) 47 (23.3%) 42 (12.4%) 36 (5.97%) 3 (2.03%)
## BWE 10 (9.26%) 3 (1.49%) 18 (5.31%) 80 (13.3%) 29 (19.6%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_MS_BU %>%
group_by(BPE, Res) %>%
count() %>%
ungroup() %>%
group_by(BPE) %>%
mutate(percent = 100*n / sum(n)) %>%
mutate(percent = round(percent, 2)) %>%
mutate(bar_text = paste0(percent, "%")) %>%
ungroup() -> df_for_ploting_BU
df_for_ploting_BU %>%
filter(Res == Res[5]) %>%
arrange(percent) %>%
pull(BPE) -> order_x_BU
order_x_BU
## [1] BRC BSA BRE BST BIN BEN BHE BSC BWE BCO
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot:
my_colors_BU <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font_BU <- "Roboto Condensed"
df_for_ploting_BU %>%
mutate(BPE = factor(BPE, levels = order_x_BU), Res = factor(Res, levels = Res[5:1])) -> df_odered_BU
df_odered_BU
## # A tibble: 50 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 9 6.43 6.43%
## 2 BST Disagree 33 23.6 23.57%
## 3 BST Normal 45 32.1 32.14%
## 4 BST Agree 45 32.1 32.14%
## 5 BST Very Agree 8 5.71 5.71%
## 6 BSC Very Disagree 14 10 10%
## 7 BSC Disagree 3 2.14 2.14%
## 8 BSC Normal 28 20 20%
## 9 BSC Agree 67 47.9 47.86%
## 10 BSC Very Agree 28 20 20%
## # ... with 40 more rows
library(extrafont)
theme_set(theme_minimal())
gg_BU <- df_odered_BU %>%
ggplot(aes(x = BPE, y = percent, fill = Res)) +
geom_col(width = 0.8) +
coord_flip() +
scale_fill_manual(values = my_colors[5:1], name = "") +
theme(legend.position = "top") +
theme(text = element_text(family = my_font)) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) +
theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) +
theme(axis.text = element_text(color = "grey20", size = 10.2)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
theme(legend.key.height = unit(0.15, "mm")) +
labs(x = NULL, y = NULL,
title = "Bus perception of people shifting to bus and being used to use bus before",
subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg_BU
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# For displaying percent of "Very Disagree":
df_odered_BU %>%
filter(Res == "Very Disagree") %>%
filter(percent >= 3) -> df_for_text1_BU
df_for_text1_BU
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 9 6.43 6.43%
## 2 BSC Very Disagree 14 10 10%
## 3 BSA Very Disagree 10 7.14 7.14%
## 4 BRE Very Disagree 8 5.71 5.71%
## 5 BCO Very Disagree 14 10 10%
## 6 BIN Very Disagree 9 6.43 6.43%
## 7 BHE Very Disagree 10 7.14 7.14%
## 8 BEN Very Disagree 12 8.57 8.57%
## 9 BRC Very Disagree 12 8.57 8.57%
## 10 BWE Very Disagree 10 7.14 7.14%
# For displaying percent of "Disagree":
df_odered_BU %>%
filter(Res == "Disagree") %>%
filter(percent >= 3) -> df_for_text2_BU
df_for_text2_BU
## # A tibble: 7 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Disagree 33 23.6 23.57%
## 2 BSA Disagree 47 33.6 33.57%
## 3 BRE Disagree 33 23.6 23.57%
## 4 BCO Disagree 5 3.57 3.57%
## 5 BIN Disagree 18 12.9 12.86%
## 6 BEN Disagree 9 6.43 6.43%
## 7 BRC Disagree 47 33.6 33.57%
# For displaying percent of "Normal":
df_odered_BU %>%
filter(Res == "Normal") %>%
filter(percent >= 3) -> df_for_text3_BU
df_for_text3_BU
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Normal 45 32.1 32.14%
## 2 BSC Normal 28 20 20%
## 3 BSA Normal 42 30 30%
## 4 BRE Normal 56 40 40%
## 5 BCO Normal 18 12.9 12.86%
## 6 BIN Normal 42 30 30%
## 7 BHE Normal 23 16.4 16.43%
## 8 BEN Normal 25 17.9 17.86%
## 9 BRC Normal 42 30 30%
## 10 BWE Normal 18 12.9 12.86%
# For displaying percent of "Agree":
df_odered_BU %>%
filter(Res == "Agree") %>%
filter(percent >= 3) -> df_for_text4_BU
df_for_text4_BU
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Agree 45 32.1 32.14%
## 2 BSC Agree 67 47.9 47.86%
## 3 BSA Agree 36 25.7 25.71%
## 4 BRE Agree 37 26.4 26.43%
## 5 BCO Agree 72 51.4 51.43%
## 6 BIN Agree 59 42.1 42.14%
## 7 BHE Agree 89 63.6 63.57%
## 8 BEN Agree 82 58.6 58.57%
## 9 BRC Agree 36 25.7 25.71%
## 10 BWE Agree 80 57.1 57.14%
# For displaying percent of "Very Agree":
df_odered_BU %>%
filter(Res == "Very Agree") %>%
filter(percent >= 3) -> df_for_text5_BU
df_for_text5_BU
## # A tibble: 9 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 8 5.71 5.71%
## 2 BSC Very Agree 28 20 20%
## 3 BSA Very Agree 5 3.57 3.57%
## 4 BRE Very Agree 6 4.29 4.29%
## 5 BCO Very Agree 31 22.1 22.14%
## 6 BIN Very Agree 12 8.57 8.57%
## 7 BHE Very Agree 14 10 10%
## 8 BEN Very Agree 12 8.57 8.57%
## 9 BWE Very Agree 29 20.7 20.71%
# For displaying percent of "Very Agree":
df_odered_BU %>%
filter(Res == "Very Agree") -> df_for_text_BU
df_for_text_BU
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 8 5.71 5.71%
## 2 BSC Very Agree 28 20 20%
## 3 BSA Very Agree 5 3.57 3.57%
## 4 BRE Very Agree 6 4.29 4.29%
## 5 BCO Very Agree 31 22.1 22.14%
## 6 BIN Very Agree 12 8.57 8.57%
## 7 BHE Very Agree 14 10 10%
## 8 BEN Very Agree 12 8.57 8.57%
## 9 BRC Very Agree 3 2.14 2.14%
## 10 BWE Very Agree 29 20.7 20.71%
# Ad text layers-thay đổi giá trị y để thể hiện vị trí ghi text phù hợp
gg_BU +
geom_text(data = df_for_text1_BU %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_BU) +
geom_text(data = df_for_text1_BU %>% filter(BPE == "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_BU) +
geom_text(data = df_for_text_BU, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font_BU)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
8.4. Vẽ biểu đồ likert cho nhóm đối tượng không chuyển đổi - BP_nMS
# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_nMS)
## Res BPE
## 1 3 BST
## 2 2 BST
## 3 2 BST
## 4 3 BST
## 5 3 BST
## 6 4 BST
attach(BP_nMS)
## The following objects are masked from BP_MS_BU:
##
## BPE, Res
## The following objects are masked from BP_MS:
##
## BPE, Res
BP_nMS = within(BP_nMS, {
Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
})
str(BP_nMS)
## 'data.frame': 5300 obs. of 2 variables:
## $ Res: Factor w/ 5 levels "Very Disagree",..: 3 2 2 3 3 4 4 4 2 2 ...
## $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_nMS)
## Res BPE
## Very Disagree: 207 BST : 530
## Disagree : 967 BSC : 530
## Normal :1435 BSA : 530
## Agree :2246 BRE : 530
## Very Agree : 445 BCO : 530
## BIN : 530
## (Other):2120
t_nMS = compareGroups(Res ~ BPE, data = BP_nMS)
createTable(t_nMS)
##
## --------Summary descriptives table by 'Res'---------
##
## _______________________________________________________________________________
## Very Disagree Disagree Normal Agree Very Agree p.overall
## N=207 N=967 N=1435 N=2246 N=445
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## BPE: <0.001
## BST 15 (7.25%) 143 (14.8%) 166 (11.6%) 176 (7.84%) 30 (6.74%)
## BSC 18 (8.70%) 42 (4.34%) 119 (8.29%) 285 (12.7%) 66 (14.8%)
## BSA 18 (8.70%) 210 (21.7%) 177 (12.3%) 103 (4.59%) 22 (4.94%)
## BRE 22 (10.6%) 113 (11.7%) 223 (15.5%) 151 (6.72%) 21 (4.72%)
## BCO 17 (8.21%) 45 (4.65%) 95 (6.62%) 295 (13.1%) 78 (17.5%)
## BIN 12 (5.80%) 93 (9.62%) 158 (11.0%) 234 (10.4%) 33 (7.42%)
## BHE 20 (9.66%) 35 (3.62%) 147 (10.2%) 283 (12.6%) 45 (10.1%)
## BEN 25 (12.1%) 60 (6.20%) 136 (9.48%) 271 (12.1%) 38 (8.54%)
## BRC 47 (22.7%) 195 (20.2%) 161 (11.2%) 117 (5.21%) 10 (2.25%)
## BWE 13 (6.28%) 31 (3.21%) 53 (3.69%) 331 (14.7%) 102 (22.9%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_nMS %>%
group_by(BPE, Res) %>%
count() %>%
ungroup() %>%
group_by(BPE) %>%
mutate(percent = 100*n / sum(n)) %>%
mutate(percent = round(percent, 2)) %>%
mutate(bar_text = paste0(percent, "%")) %>%
ungroup() -> df_for_ploting_nMS
df_for_ploting_nMS %>%
filter(Res == Res[5]) %>%
arrange(percent) %>%
pull(BPE) -> order_x_nMS
order_x_nMS
## [1] BRC BRE BSA BST BIN BEN BHE BSC BCO BWE
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot:
my_colors_nMS <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font_nMS <- "Roboto Condensed"
df_for_ploting_nMS %>%
mutate(BPE = factor(BPE, levels = order_x_nMS), Res = factor(Res, levels = Res[5:1])) -> df_odered_nMS
df_odered_nMS
## # A tibble: 50 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 15 2.83 2.83%
## 2 BST Disagree 143 27.0 26.98%
## 3 BST Normal 166 31.3 31.32%
## 4 BST Agree 176 33.2 33.21%
## 5 BST Very Agree 30 5.66 5.66%
## 6 BSC Very Disagree 18 3.4 3.4%
## 7 BSC Disagree 42 7.92 7.92%
## 8 BSC Normal 119 22.4 22.45%
## 9 BSC Agree 285 53.8 53.77%
## 10 BSC Very Agree 66 12.4 12.45%
## # ... with 40 more rows
library(extrafont)
theme_set(theme_minimal())
gg_nMS <- df_odered_nMS %>%
ggplot(aes(x = BPE, y = percent, fill = Res)) +
geom_col(width = 0.8) +
coord_flip() +
scale_fill_manual(values = my_colors[5:1], name = "") +
theme(legend.position = "top") +
theme(text = element_text(family = my_font)) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) +
theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) +
theme(axis.text = element_text(color = "grey20", size = 10.2)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
theme(legend.key.height = unit(0.15, "mm")) +
labs(x = NULL, y = NULL,
title = "Bus perception of people not shifting to bus",
subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg_nMS
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# For displaying percent of "Very Disagree":
df_odered_nMS %>%
filter(Res == "Very Disagree") -> df_for_text1_nMS
df_for_text1_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Disagree 15 2.83 2.83%
## 2 BSC Very Disagree 18 3.4 3.4%
## 3 BSA Very Disagree 18 3.4 3.4%
## 4 BRE Very Disagree 22 4.15 4.15%
## 5 BCO Very Disagree 17 3.21 3.21%
## 6 BIN Very Disagree 12 2.26 2.26%
## 7 BHE Very Disagree 20 3.77 3.77%
## 8 BEN Very Disagree 25 4.72 4.72%
## 9 BRC Very Disagree 47 8.87 8.87%
## 10 BWE Very Disagree 13 2.45 2.45%
# For displaying percent of "Disagree":
df_odered_nMS %>%
filter(Res == "Disagree") %>%
filter(percent >= 3) -> df_for_text2_nMS
df_for_text2_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Disagree 143 27.0 26.98%
## 2 BSC Disagree 42 7.92 7.92%
## 3 BSA Disagree 210 39.6 39.62%
## 4 BRE Disagree 113 21.3 21.32%
## 5 BCO Disagree 45 8.49 8.49%
## 6 BIN Disagree 93 17.6 17.55%
## 7 BHE Disagree 35 6.6 6.6%
## 8 BEN Disagree 60 11.3 11.32%
## 9 BRC Disagree 195 36.8 36.79%
## 10 BWE Disagree 31 5.85 5.85%
# For displaying percent of "Normal":
df_odered_nMS %>%
filter(Res == "Normal") %>%
filter(percent >= 3) -> df_for_text3_nMS
df_for_text3_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Normal 166 31.3 31.32%
## 2 BSC Normal 119 22.4 22.45%
## 3 BSA Normal 177 33.4 33.4%
## 4 BRE Normal 223 42.1 42.08%
## 5 BCO Normal 95 17.9 17.92%
## 6 BIN Normal 158 29.8 29.81%
## 7 BHE Normal 147 27.7 27.74%
## 8 BEN Normal 136 25.7 25.66%
## 9 BRC Normal 161 30.4 30.38%
## 10 BWE Normal 53 10 10%
# For displaying percent of "Agree":
df_odered_nMS %>%
filter(Res == "Agree") %>%
filter(percent >= 3) -> df_for_text4_nMS
df_for_text4_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Agree 176 33.2 33.21%
## 2 BSC Agree 285 53.8 53.77%
## 3 BSA Agree 103 19.4 19.43%
## 4 BRE Agree 151 28.5 28.49%
## 5 BCO Agree 295 55.7 55.66%
## 6 BIN Agree 234 44.2 44.15%
## 7 BHE Agree 283 53.4 53.4%
## 8 BEN Agree 271 51.1 51.13%
## 9 BRC Agree 117 22.1 22.08%
## 10 BWE Agree 331 62.4 62.45%
# For displaying percent of "Very Agree":
df_odered_nMS %>%
filter(Res == "Very Agree") %>%
filter(percent >= 3) -> df_for_text5_nMS
df_for_text5_nMS
## # A tibble: 9 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 30 5.66 5.66%
## 2 BSC Very Agree 66 12.4 12.45%
## 3 BSA Very Agree 22 4.15 4.15%
## 4 BRE Very Agree 21 3.96 3.96%
## 5 BCO Very Agree 78 14.7 14.72%
## 6 BIN Very Agree 33 6.23 6.23%
## 7 BHE Very Agree 45 8.49 8.49%
## 8 BEN Very Agree 38 7.17 7.17%
## 9 BWE Very Agree 102 19.2 19.25%
# For displaying percent of "Very Agree":
df_odered_nMS %>%
filter(Res == "Very Agree") -> df_for_text_nMS
df_for_text_nMS
## # A tibble: 10 x 5
## BPE Res n percent bar_text
## <fct> <fct> <int> <dbl> <chr>
## 1 BST Very Agree 30 5.66 5.66%
## 2 BSC Very Agree 66 12.4 12.45%
## 3 BSA Very Agree 22 4.15 4.15%
## 4 BRE Very Agree 21 3.96 3.96%
## 5 BCO Very Agree 78 14.7 14.72%
## 6 BIN Very Agree 33 6.23 6.23%
## 7 BHE Very Agree 45 8.49 8.49%
## 8 BEN Very Agree 38 7.17 7.17%
## 9 BRC Very Agree 10 1.89 1.89%
## 10 BWE Very Agree 102 19.2 19.25%
# Ad text layers:
gg_nMS +
geom_text(data = df_for_text1_nMS %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_nMS) +
geom_text(data = df_for_text_nMS, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font_nMS)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database