1. Read and Create Data

t = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 3-Mode shift\\So lieu mode choice tu phan 1\\7. Data MSI - 810.csv"
MS = read.csv(t, header = T, sep = ";")
head(MS)
##   ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1  1  0  1  6  3  3  1  2 10  1  1  0  4  1  0   1 15   1   3   3   3   4   4
## 2  2  0  1  3  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5
## 3  3  0  1  3  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4
## 4  4  0  1  3  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4
## 5  5  0  1  3  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4
## 6  6  0  1  4  1  4  1 20 30  1  1  0  5  3  0   2 50   0   3   2   4   4   4
##   BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1   4   4   3   4   4   4   4  1  1  1   0  0  5  4  2  2  0  0  0  0  0  1  2
## 2   2   2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3
## 3   3   3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3
## 4   4   4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3
## 5   2   2   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2
## 6   3   3   3   2   4   4   4  2  0  0   0  1  4  3  4  2  1  1  1  1  1  1  2
##   NR
## 1  1
## 2  0
## 3  0
## 4  0
## 5  0
## 6  1
str(MS$MSI)
##  int [1:810] 0 0 0 0 0 0 0 1 0 0 ...
# View data (number of rows and colume)
dim(MS) 
## [1] 810  47
# Data non bus
BS = subset(MS,TM < 7)
BS = BS[, 2:47]
head(BS)
##   CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE
## 1  0  1  6  3  3  1  2 10  1  1  0  4  1  0   1 15   1   3   3   3   4   4   4
## 2  0  1  3  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5   2
## 3  0  1  3  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4   3
## 4  0  1  3  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4   4
## 5  0  1  3  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4   2
## 6  0  1  4  1  4  1 20 30  1  1  0  5  3  0   2 50   0   3   2   4   4   4   3
##   BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 1   4   3   4   4   4   4  1  1  1   0  0  5  4  2  2  0  0  0  0  0  1  2  1
## 2   2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3  0
## 3   3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3  0
## 4   4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3  0
## 5   2   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2  0
## 6   3   3   2   4   4   4  2  0  0   0  1  4  3  4  2  1  1  1  1  1  1  2  1
dim(BS)
## [1] 724  46
# Sumerize Data
summary(BS) 
##        CA               BC               TM             TP      
##  Min.   :0.0000   Min.   :0.0000   Min.   :1.00   Min.   :1.00  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:3.00   1st Qu.:1.00  
##  Median :0.0000   Median :1.0000   Median :3.00   Median :1.00  
##  Mean   :0.4544   Mean   :0.5207   Mean   :3.13   Mean   :1.57  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:3.00   3rd Qu.:2.00  
##  Max.   :1.0000   Max.   :1.0000   Max.   :6.00   Max.   :4.00  
##        FR              DT              DI               TI        
##  Min.   :1.000   Min.   :1.000   Min.   : 0.015   Min.   :  0.00  
##  1st Qu.:4.000   1st Qu.:1.000   1st Qu.: 2.000   1st Qu.: 10.00  
##  Median :4.000   Median :1.000   Median : 5.000   Median : 15.00  
##  Mean   :3.622   Mean   :1.863   Mean   : 6.159   Mean   : 16.84  
##  3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:10.000   3rd Qu.: 20.00  
##  Max.   :4.000   Max.   :4.000   Max.   :35.000   Max.   :180.00  
##        SC               LS               TS               MR       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :1.000  
##  1st Qu.:1.0000   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:2.000  
##  Median :1.0000   Median :1.0000   Median :1.0000   Median :2.000  
##  Mean   :0.8674   Mean   :0.7873   Mean   :0.7044   Mean   :2.798  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :3.0000   Max.   :6.000  
##        WE              WK              NBR              CO       
##  Min.   :1.000   Min.   :0.0000   Min.   :1.000   Min.   :  0.0  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:  4.0  
##  Median :1.000   Median :0.0000   Median :2.000   Median : 10.0  
##  Mean   :1.664   Mean   :0.2445   Mean   :2.916   Mean   : 13.4  
##  3rd Qu.:3.000   3rd Qu.:0.0000   3rd Qu.:4.000   3rd Qu.: 15.0  
##  Max.   :3.000   Max.   :1.0000   Max.   :6.000   Max.   :300.0  
##       BUB              SBM             SBS             BST       
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :1.0000   Median :2.000   Median :2.000   Median :3.000  
##  Mean   :0.6119   Mean   :2.628   Mean   :2.474   Mean   :2.711  
##  3rd Qu.:1.0000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##       BSC             BSA             BRE            BCO             BIN      
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.00  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.00   1st Qu.:2.000   1st Qu.:2.00  
##  Median :4.000   Median :4.000   Median :4.00   Median :3.000   Median :3.00  
##  Mean   :3.474   Mean   :3.583   Mean   :3.33   Mean   :3.088   Mean   :3.03  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:4.000   3rd Qu.:4.00  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.00  
##       BHE             BEN            BRC             BWE              SP       
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:3.00   1st Qu.:3.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :3.000   Median :4.00   Median :4.000   Median :4.000   Median :1.000  
##  Mean   :2.829   Mean   :3.67   Mean   :3.713   Mean   :3.862   Mean   :1.023  
##  3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:1.000  
##  Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.000   Max.   :2.000  
##        BI               WI             MSI              GD        
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :1.0000   Median :1.000   Median :0.000   Median :1.0000  
##  Mean   :0.5622   Mean   :1.133   Mean   :0.268   Mean   :0.5843  
##  3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :6.000   Max.   :1.000   Max.   :1.0000  
##        AG              OC              IN              NC        
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:0.0000  
##  Median :4.000   Median :3.000   Median :2.000   Median :1.0000  
##  Mean   :3.544   Mean   :4.225   Mean   :2.267   Mean   :0.7845  
##  3rd Qu.:4.000   3rd Qu.:7.000   3rd Qu.:3.000   3rd Qu.:1.0000  
##  Max.   :6.000   Max.   :8.000   Max.   :4.000   Max.   :3.0000  
##        MC               CC               BO               MO       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.000  
##  Median :1.0000   Median :0.0000   Median :0.0000   Median :1.000  
##  Mean   :0.8798   Mean   :0.2044   Mean   :0.4019   Mean   :0.826  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000  
##        RO               NB              NM              NR        
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:2.000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.000   Median :2.000   Median :0.0000  
##  Mean   :0.1575   Mean   :0.703   Mean   :2.189   Mean   :0.2735  
##  3rd Qu.:0.0000   3rd Qu.:1.000   3rd Qu.:3.000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :4.000   Max.   :4.000   Max.   :3.0000
# Đổi biến TM (1-Walk/Bike, 2-Motor, 3-car,4-) 
BS$TM[BS$TM == 2] <- 1
BS$TM[BS$TM == 3] <- 2
BS$TM[BS$TM == 4] <- 3
BS$TM[BS$TM == 5] <- 4
BS$TM[BS$TM == 6] <- 4
head(BS$TM)
## [1] 4 2 2 2 2 3
# Đổi biến AG (1-<=24,2-24-60,3->60)
BS$AG[BS$AG == 2] <- 1
BS$AG[BS$AG == 3] <- 1
BS$AG[BS$AG == 4] <- 2
BS$AG[BS$AG == 5] <- 2
BS$AG[BS$AG == 6] <- 3
head(BS$AG)
## [1] 2 1 1 1 2 2
# Đổi biến OC (1-HS/SV,2-CNVC,3-Nội trợ,4-LĐ tự do,5-Khác)
BS$OC[BS$OC == 2] <- 1
BS$OC[BS$OC == 3] <- 2
BS$OC[BS$OC == 6] <- 2
BS$OC[BS$OC == 4] <- 3
BS$OC[BS$OC == 5] <- 3
BS$OC[BS$OC == 7] <- 4
BS$OC[BS$OC == 8] <- 5
head(BS$OC)
## [1] 3 2 1 1 2 2
# Đổi biến FR (1-1l/t,2-2l/t,3->=3l/t)
BS$FR[BS$FR == 4] <- 3
head(BS$FR)
## [1] 3 3 3 3 3 3
# Đổi biến DT (1-CĐ/6-8h/16-18h,0-Ngoài giờ CĐ)
BS$DT[BS$DT == 3] <- 1
BS$DT[BS$DT == 2] <- 0
BS$DT[BS$DT == 4] <- 0
head(BS$DT)
## [1] 1 1 1 1 1 1
# Đổi biến TS (1-có, 2-không)
BS$TS[BS$TS == 2] <- 1
BS$TS[BS$TS == 3] <- 1
head(BS$TS)
## [1] 0 1 1 1 0 0
# Đổi biến WE (1-có mưa, 2-không)
BS$WE[BS$WE == 1] <- 0
BS$WE[BS$WE == 2] <- 1
BS$WE[BS$WE == 3] <- 0
head(BS$WE)
## [1] 0 0 0 0 0 0
# Đổi biến SBM (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$SBM[BS$SBM == 2] <- 1
BS$SBM[BS$SBM == 3] <- 2
BS$SBM[BS$SBM == 4] <- 3
BS$SBM[BS$SBM == 5] <- 3
head(BS$SBM)
## [1] 2 1 1 1 1 2
# Đổi biến SBS (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$SBS[BS$SBS == 2] <- 1
BS$SBS[BS$SBS == 3] <- 2
BS$SBS[BS$SBS == 4] <- 3
BS$SBS[BS$SBS == 5] <- 3
head(BS$SBS)
## [1] 2 1 1 1 1 1
# Đổi biến BST (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BST[BS$BST == 2] <- 1
BS$BST[BS$BST == 3] <- 2
BS$BST[BS$BST == 4] <- 3
BS$BST[BS$BST == 5] <- 3
head(BS$BST)
## [1] 2 1 1 2 2 3
# Đổi biến BSC (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BSC[BS$BSC == 2] <- 1
BS$BSC[BS$BSC == 3] <- 2
BS$BSC[BS$BSC == 4] <- 3
BS$BSC[BS$BSC == 5] <- 3
head(BS$BSC)
## [1] 3 3 3 3 3 3
# Đổi biến BSA (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BSA[BS$BSA == 2] <- 1
BS$BSA[BS$BSA == 3] <- 2
BS$BSA[BS$BSA == 4] <- 3
BS$BSA[BS$BSA == 5] <- 3
head(BS$BSA)
## [1] 3 3 3 3 3 3
# Đổi biến BRE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BRE[BS$BRE == 2] <- 1
BS$BRE[BS$BRE == 3] <- 2
BS$BRE[BS$BRE == 4] <- 3
BS$BRE[BS$BRE == 5] <- 3
head(BS$BRE)
## [1] 3 1 2 3 1 2
# Đổi biến BCO (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BCO[BS$BCO == 2] <- 1
BS$BCO[BS$BCO == 3] <- 2
BS$BCO[BS$BCO == 4] <- 3
BS$BCO[BS$BCO == 5] <- 3
head(BS$BCO)
## [1] 3 1 2 3 1 2
# Đổi biến BIN (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BIN[BS$BIN == 2] <- 1
BS$BIN[BS$BIN == 3] <- 2
BS$BIN[BS$BIN == 4] <- 3
BS$BIN[BS$BIN == 5] <- 3
head(BS$BIN)
## [1] 2 3 3 3 2 2
# Đổi biến BHE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BHE[BS$BHE == 2] <- 1
BS$BHE[BS$BHE == 3] <- 2
BS$BHE[BS$BHE == 4] <- 3
BS$BHE[BS$BHE == 5] <- 3
head(BS$BHE)
## [1] 3 1 1 2 1 1
# Đổi biến BEN (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BEN[BS$BEN == 2] <- 1
BS$BEN[BS$BEN == 3] <- 2
BS$BEN[BS$BEN == 4] <- 3
BS$BEN[BS$BEN == 5] <- 3
head(BS$BEN)
## [1] 3 3 3 2 3 3
# Đổi biến BRC (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BRC[BS$BRC == 2] <- 1
BS$BRC[BS$BRC == 3] <- 2
BS$BRC[BS$BRC == 4] <- 3
BS$BRC[BS$BRC == 5] <- 3
head(BS$BRC)
## [1] 3 3 3 2 3 3
# Đổi biến BWE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
BS$BWE[BS$BWE == 2] <- 1
BS$BWE[BS$BWE == 3] <- 2
BS$BWE[BS$BWE == 4] <- 3
BS$BWE[BS$BWE == 5] <- 3
head(BS$BWE)
## [1] 3 2 3 3 3 3
# Đổi biến NB (0-0xe,1-1xe,2->=2xe)
BS$NB[BS$NB == 3] <- 2
BS$NB[BS$NB == 4] <- 2
head(BS$NB)
## [1] 1 1 0 1 0 1
# Đổi biến NM (0-0xe,1-1xe,2-2xe,3->=3xe)
BS$NM[BS$NM == 4] <- 3
head(BS$NM)
## [1] 2 3 3 3 2 2
# Đổi biến NR (0-0xe,1-Có xe)
BS$NR[BS$NR == 2] <- 1
BS$NR[BS$NR == 3] <- 1
head(BS$NR)
## [1] 1 0 0 0 0 1
# Đổi biến NC (0-No,1-Yes)
BS$NC[BS$NC == 2] <- 1
BS$NC[BS$NC == 3] <- 1
head(BS$NC)
## [1] 1 1 0 0 1 1
# Đổi biến WI (0-No,1-Yes)
BS$WI[BS$WI >= 5] <- 4
BS$WI[BS$WI >= 6] <- 4
head(BS$WI)
## [1] 1 0 2 1 0 0
head(BS$BUB)
## [1] 1 0 1 1 1 0
head(BS)
##   CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE
## 1  0  1  4  3  3  1  2 10  1  1  0  4  0  0   1 15   1   2   2   2   3   3   3
## 2  0  1  2  1  3  1  8 15  1  1  1  2  0  0   1  5   0   1   1   1   3   3   1
## 3  0  1  2  1  3  1  5 15  1  1  1  4  0  0   2  3   1   1   1   1   3   3   2
## 4  0  1  2  1  3  1  5 10  1  1  1  2  0  0   4 10   1   1   1   2   3   3   3
## 5  0  1  2  1  3  1  8 15  1  1  0  2  0  0   2  5   1   1   1   2   3   3   1
## 6  0  1  3  1  3  1 20 30  1  1  0  5  0  0   2 50   0   2   1   3   3   3   2
##   BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 1   3   2   3   3   3   3  1  1  1   0  0  2  3  2  1  0  0  0  0  0  1  2  1
## 2   1   3   1   3   3   2  2  0  0   0  0  1  2  3  1  1  0  1  1  0  1  3  0
## 3   2   3   1   3   3   3  1  1  2   0  1  1  1  4  0  1  0  0  1  0  0  3  0
## 4   3   3   2   2   2   3  1  1  1   0  1  1  1  3  0  1  0  1  1  0  1  3  0
## 5   1   2   1   3   3   3  2  0  0   0  0  2  2  3  1  1  0  0  1  0  0  2  0
## 6   2   2   1   3   3   3  2  0  0   0  1  2  2  4  1  1  1  1  1  1  1  2  1
dim(BS)
## [1] 724  46
str(BS)
## 'data.frame':    724 obs. of  46 variables:
##  $ CA : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TM : num  4 2 2 2 2 3 3 2 2 2 ...
##  $ TP : int  3 1 1 1 1 1 1 1 1 1 ...
##  $ FR : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ DT : num  1 1 1 1 1 1 1 1 0 1 ...
##  $ DI : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ TI : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ SC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LS : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TS : num  0 1 1 1 0 0 0 0 1 1 ...
##  $ MR : int  4 2 4 2 2 5 5 2 2 2 ...
##  $ WE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ WK : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ NBR: int  1 1 2 4 2 2 4 2 1 4 ...
##  $ CO : num  15 5 3 10 5 50 40 5 7 7 ...
##  $ BUB: int  1 0 1 1 1 0 0 1 1 1 ...
##  $ SBM: num  2 1 1 1 1 2 1 2 2 2 ...
##  $ SBS: num  2 1 1 1 1 1 1 1 2 1 ...
##  $ BST: num  2 1 1 2 2 3 3 3 3 1 ...
##  $ BSC: num  3 3 3 3 3 3 3 3 3 3 ...
##  $ BSA: num  3 3 3 3 3 3 3 3 3 3 ...
##  $ BRE: num  3 1 2 3 1 2 1 2 2 2 ...
##  $ BCO: num  3 1 2 3 1 2 1 2 2 1 ...
##  $ BIN: num  2 3 3 3 2 2 2 2 2 3 ...
##  $ BHE: num  3 1 1 2 1 1 1 2 2 1 ...
##  $ BEN: num  3 3 3 2 3 3 3 3 3 3 ...
##  $ BRC: num  3 3 3 2 3 3 3 3 3 3 ...
##  $ BWE: num  3 2 3 3 3 3 3 3 3 3 ...
##  $ SP : int  1 2 1 1 2 2 2 1 1 1 ...
##  $ BI : int  1 0 1 1 0 0 0 1 1 1 ...
##  $ WI : num  1 0 2 1 0 0 0 2 3 2 ...
##  $ MSI: int  0 0 0 0 0 0 0 1 0 0 ...
##  $ GD : int  0 0 1 1 0 1 1 0 0 1 ...
##  $ AG : num  2 1 1 1 2 2 2 2 1 2 ...
##  $ OC : num  3 2 1 1 2 2 2 4 4 2 ...
##  $ IN : int  2 3 4 3 3 4 4 2 3 3 ...
##  $ NC : num  1 1 0 0 1 1 1 0 1 0 ...
##  $ MC : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ CC : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ BO : int  0 1 0 1 0 1 0 0 1 0 ...
##  $ MO : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ RO : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ NB : num  1 1 0 1 0 1 0 0 1 0 ...
##  $ NM : num  2 3 3 3 2 2 2 2 3 3 ...
##  $ NR : num  1 0 0 0 0 1 1 0 0 0 ...
# Reformat variables in factor variables - Đổi định dạng biến
attach(BS)
BS = within(BS, {
  CA = factor(CA, labels = c("No", "Yes"))
  WE = factor(WE, labels = c("No", "Yes"))
  WK = factor(WK, labels = c("No", "Yes"))
  TM = factor(TM, labels = c("Walk/Bicycle", "Motorbike", "Car", "Hichhiking/Taxi"))
  TP = factor(TP, labels = c("Work/Study", "Picking Children", "Entertainment/Food", "Others"))
  FR = factor(FR, labels = c("Once", "twice", ">=3 times"))
  DT = factor(DT, labels = c("Normal", "Rush hour"))
  TS = factor(TS, labels = c("No", "Yes"))
  SC = factor(SC, labels = c("No", "Yes"))
  LS = factor(LS, labels = c("No", "Yes"))
  BC = factor(BC,labels = c("No", "Yes"))
  SP = factor(SP, labels = c("No", "Yes", "Don't know"))
  BI = factor(BI, labels = c("No", "Yes"))
  WI = factor(WI, labels = c("No", "At Bus Stops", "Internet/Facebook", "From Friends", "Others"))
  MSI = factor(MSI, labels = c("No", "Yes"))
  BST = factor(BST, labels = c("Don't agree", "Normal", "Agree"))
  BSC = factor(BSC, labels = c("Don't agree", "Normal", "Agree"))
  BSA = factor(BSA, labels = c("Don't agree", "Normal", "Agree"))
  BRE = factor(BRE, labels = c("Don't agree", "Normal", "Agree"))
  BCO = factor(BCO, labels = c("Don't agree", "Normal", "Agree"))
  BIN = factor(BIN, labels = c("Don't agree", "Normal", "Agree"))
  BHE = factor(BHE, labels = c("Don't agree", "Normal", "Agree"))
  BEN = factor(BEN, labels = c("Don't agree", "Normal", "Agree"))
  BRC = factor(BRC, labels = c("Don't agree", "Normal", "Agree"))
  BWE = factor(BWE, labels = c("Don't agree", "Normal", "Agree"))
  SBM = factor(SBM, labels = c("Don't agree", "Normal", "Agree"))
  SBS = factor(SBS, labels = c("Don't agree", "Normal", "Agree"))
  BWE = factor(BWE, labels = c("Don't agree", "Normal", "Agree"))
  MR = factor(MR, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "Reliability", "others"))
  NBR = factor(NBR, labels = c("No Route", "Inconvenience", "Unsafety", "Long waiting time", "Unreliability", "others"))
  BUB = factor(BUB, labels = c("No", "Yes"))
  GD = factor(GD, labels = c("Female", "Male"))
  AG = factor(AG, labels = c("<=24", "25-60", ">60"))
  OC = factor(OC, labels = c("Pupils/Students", "Officers/Workers", "Housewife/Unemployed", "Free labor", "Others"))
  IN = factor(IN, labels = c("<8 millions", "(8-15) millions", "(15-25) millions",  ">25 millions"))
  NC = factor(NC, labels = c("No", "Yes"))
  MC = factor(MC, labels = c("No", "Yes"))
  CC = factor(CC, labels = c("No", "Yes"))
  BO = factor(BO, labels = c("No", "Yes"))
  MO = factor(MO, labels = c("No", "Yes"))
  RO = factor(RO, labels = c("No", "Yes"))
  NB = factor(NB, labels = c("None", "1", ">=2"))
  NM = factor(NM, labels = c("None", "1", "2", ">=3"))
  NR = factor(NR, labels = c("No", "Yes"))
  DI = as.numeric(DI)
  CO = as.numeric(CO)
  TI = as.numeric(TI)
    } )
head(BS)
##   CA  BC              TM                 TP        FR        DT DI TI  SC  LS
## 1 No Yes Hichhiking/Taxi Entertainment/Food >=3 times Rush hour  2 10 Yes Yes
## 2 No Yes       Motorbike         Work/Study >=3 times Rush hour  8 15 Yes Yes
## 3 No Yes       Motorbike         Work/Study >=3 times Rush hour  5 15 Yes Yes
## 4 No Yes       Motorbike         Work/Study >=3 times Rush hour  5 10 Yes Yes
## 5 No Yes       Motorbike         Work/Study >=3 times Rush hour  8 15 Yes Yes
## 6 No Yes             Car         Work/Study >=3 times Rush hour 20 30 Yes Yes
##    TS            MR WE WK               NBR CO BUB         SBM         SBS
## 1  No Accessibility No No          No Route 15 Yes      Normal      Normal
## 2 Yes   Comfortable No No          No Route  5  No Don't agree Don't agree
## 3 Yes Accessibility No No     Inconvenience  3 Yes Don't agree Don't agree
## 4 Yes   Comfortable No No Long waiting time 10 Yes Don't agree Don't agree
## 5  No   Comfortable No No     Inconvenience  5 Yes Don't agree Don't agree
## 6  No   Reliability No No     Inconvenience 50  No      Normal Don't agree
##           BST   BSC   BSA         BRE         BCO    BIN         BHE    BEN
## 1      Normal Agree Agree       Agree       Agree Normal       Agree  Agree
## 2 Don't agree Agree Agree Don't agree Don't agree  Agree Don't agree  Agree
## 3 Don't agree Agree Agree      Normal      Normal  Agree Don't agree  Agree
## 4      Normal Agree Agree       Agree       Agree  Agree      Normal Normal
## 5      Normal Agree Agree Don't agree Don't agree Normal Don't agree  Agree
## 6       Agree Agree Agree      Normal      Normal Normal Don't agree  Agree
##      BRC    BWE         SP  BI                WI MSI     GD    AG
## 1  Agree  Agree        Yes Yes      At Bus Stops  No Female 25-60
## 2  Agree Normal Don't know  No                No  No Female  <=24
## 3  Agree  Agree        Yes Yes Internet/Facebook  No   Male  <=24
## 4 Normal  Agree        Yes Yes      At Bus Stops  No   Male  <=24
## 5  Agree  Agree Don't know  No                No  No Female 25-60
## 6  Agree  Agree Don't know  No                No  No   Male 25-60
##                     OC               IN  NC  MC  CC  BO  MO  RO   NB  NM  NR
## 1 Housewife/Unemployed  (8-15) millions Yes  No  No  No  No  No    1   2 Yes
## 2     Officers/Workers (15-25) millions Yes Yes  No Yes Yes  No    1 >=3  No
## 3      Pupils/Students     >25 millions  No Yes  No  No Yes  No None >=3  No
## 4      Pupils/Students (15-25) millions  No Yes  No Yes Yes  No    1 >=3  No
## 5     Officers/Workers (15-25) millions Yes Yes  No  No Yes  No None   2  No
## 6     Officers/Workers     >25 millions Yes Yes Yes Yes Yes Yes    1   2 Yes
str(BS)
## 'data.frame':    724 obs. of  46 variables:
##  $ CA : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ BC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ TM : Factor w/ 4 levels "Walk/Bicycle",..: 4 2 2 2 2 3 3 2 2 2 ...
##  $ TP : Factor w/ 4 levels "Work/Study","Picking Children",..: 3 1 1 1 1 1 1 1 1 1 ...
##  $ FR : Factor w/ 3 levels "Once","twice",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ DT : Factor w/ 2 levels "Normal","Rush hour": 2 2 2 2 2 2 2 2 1 2 ...
##  $ DI : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ TI : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ SC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ LS : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ TS : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 1 1 1 2 2 ...
##  $ MR : Factor w/ 6 levels "Safety","Comfortable",..: 4 2 4 2 2 5 5 2 2 2 ...
##  $ WE : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ WK : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
##  $ NBR: Factor w/ 6 levels "No Route","Inconvenience",..: 1 1 2 4 2 2 4 2 1 4 ...
##  $ CO : num  15 5 3 10 5 50 40 5 7 7 ...
##  $ BUB: Factor w/ 2 levels "No","Yes": 2 1 2 2 2 1 1 2 2 2 ...
##  $ SBM: Factor w/ 3 levels "Don't agree",..: 2 1 1 1 1 2 1 2 2 2 ...
##  $ SBS: Factor w/ 3 levels "Don't agree",..: 2 1 1 1 1 1 1 1 2 1 ...
##  $ BST: Factor w/ 3 levels "Don't agree",..: 2 1 1 2 2 3 3 3 3 1 ...
##  $ BSC: Factor w/ 3 levels "Don't agree",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ BSA: Factor w/ 3 levels "Don't agree",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ BRE: Factor w/ 3 levels "Don't agree",..: 3 1 2 3 1 2 1 2 2 2 ...
##  $ BCO: Factor w/ 3 levels "Don't agree",..: 3 1 2 3 1 2 1 2 2 1 ...
##  $ BIN: Factor w/ 3 levels "Don't agree",..: 2 3 3 3 2 2 2 2 2 3 ...
##  $ BHE: Factor w/ 3 levels "Don't agree",..: 3 1 1 2 1 1 1 2 2 1 ...
##  $ BEN: Factor w/ 3 levels "Don't agree",..: 3 3 3 2 3 3 3 3 3 3 ...
##  $ BRC: Factor w/ 3 levels "Don't agree",..: 3 3 3 2 3 3 3 3 3 3 ...
##  $ BWE: Factor w/ 3 levels "Don't agree",..: 3 2 3 3 3 3 3 3 3 3 ...
##  $ SP : Factor w/ 3 levels "No","Yes","Don't know": 2 3 2 2 3 3 3 2 2 2 ...
##  $ BI : Factor w/ 2 levels "No","Yes": 2 1 2 2 1 1 1 2 2 2 ...
##  $ WI : Factor w/ 5 levels "No","At Bus Stops",..: 2 1 3 2 1 1 1 3 4 3 ...
##  $ MSI: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
##  $ GD : Factor w/ 2 levels "Female","Male": 1 1 2 2 1 2 2 1 1 2 ...
##  $ AG : Factor w/ 3 levels "<=24","25-60",..: 2 1 1 1 2 2 2 2 1 2 ...
##  $ OC : Factor w/ 5 levels "Pupils/Students",..: 3 2 1 1 2 2 2 4 4 2 ...
##  $ IN : Factor w/ 4 levels "<8 millions",..: 2 3 4 3 3 4 4 2 3 3 ...
##  $ NC : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 2 2 1 2 1 ...
##  $ MC : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
##  $ CC : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
##  $ BO : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 2 1 1 2 1 ...
##  $ MO : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
##  $ RO : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
##  $ NB : Factor w/ 3 levels "None","1",">=2": 2 2 1 2 1 2 1 1 2 1 ...
##  $ NM : Factor w/ 4 levels "None","1","2",..: 3 4 4 4 3 3 3 3 4 4 ...
##  $ NR : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 2 2 1 1 1 ...

2. Descriptive analyse

# Descriptive analyse with package "table 1" - sort by MSI
  ## install.packages("table1", dependencies = T)
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1 (~ GD + AG + OC + IN + NC + MC + CC + BO + MO + RO + NB + NM + NR + TM + TP + FR + DT + TS + SP + BI + WI + LS + SC + BC + WE + WK + CA + BST + BSC + BSA + BRE + BCO + BIN + BHE + BEN + BRC + BWE + MR + NBR + BUB + SBM + SBS + CO + DI + TI| MSI , data = BS)
No
(N=530)
Yes
(N=194)
Overall
(N=724)
GD
Female 231 (43.6%) 70 (36.1%) 301 (41.6%)
Male 299 (56.4%) 124 (63.9%) 423 (58.4%)
AG
<=24 216 (40.8%) 106 (54.6%) 322 (44.5%)
25-60 302 (57.0%) 82 (42.3%) 384 (53.0%)
>60 12 (2.3%) 6 (3.1%) 18 (2.5%)
OC
Pupils/Students 166 (31.3%) 93 (47.9%) 259 (35.8%)
Officers/Workers 164 (30.9%) 40 (20.6%) 204 (28.2%)
Housewife/Unemployed 34 (6.4%) 6 (3.1%) 40 (5.5%)
Free labor 135 (25.5%) 35 (18.0%) 170 (23.5%)
Others 31 (5.8%) 20 (10.3%) 51 (7.0%)
IN
<8 millions 119 (22.5%) 36 (18.6%) 155 (21.4%)
(8-15) millions 214 (40.4%) 90 (46.4%) 304 (42.0%)
(15-25) millions 134 (25.3%) 48 (24.7%) 182 (25.1%)
>25 millions 63 (11.9%) 20 (10.3%) 83 (11.5%)
NC
No 222 (41.9%) 81 (41.8%) 303 (41.9%)
Yes 308 (58.1%) 113 (58.2%) 421 (58.1%)
MC
No 50 (9.4%) 37 (19.1%) 87 (12.0%)
Yes 480 (90.6%) 157 (80.9%) 637 (88.0%)
CC
No 413 (77.9%) 163 (84.0%) 576 (79.6%)
Yes 117 (22.1%) 31 (16.0%) 148 (20.4%)
BO
No 314 (59.2%) 119 (61.3%) 433 (59.8%)
Yes 216 (40.8%) 75 (38.7%) 291 (40.2%)
MO
No 78 (14.7%) 48 (24.7%) 126 (17.4%)
Yes 452 (85.3%) 146 (75.3%) 598 (82.6%)
RO
No 444 (83.8%) 166 (85.6%) 610 (84.3%)
Yes 86 (16.2%) 28 (14.4%) 114 (15.7%)
NB
None 242 (45.7%) 82 (42.3%) 324 (44.8%)
1 212 (40.0%) 91 (46.9%) 303 (41.9%)
>=2 76 (14.3%) 21 (10.8%) 97 (13.4%)
NM
None 17 (3.2%) 3 (1.5%) 20 (2.8%)
1 80 (15.1%) 24 (12.4%) 104 (14.4%)
2 279 (52.6%) 90 (46.4%) 369 (51.0%)
>=3 154 (29.1%) 77 (39.7%) 231 (31.9%)
NR
No 399 (75.3%) 147 (75.8%) 546 (75.4%)
Yes 131 (24.7%) 47 (24.2%) 178 (24.6%)
TM
Walk/Bicycle 45 (8.5%) 30 (15.5%) 75 (10.4%)
Motorbike 391 (73.8%) 125 (64.4%) 516 (71.3%)
Car 64 (12.1%) 18 (9.3%) 82 (11.3%)
Hichhiking/Taxi 30 (5.7%) 21 (10.8%) 51 (7.0%)
TP
Work/Study 337 (63.6%) 146 (75.3%) 483 (66.7%)
Picking Children 72 (13.6%) 13 (6.7%) 85 (11.7%)
Entertainment/Food 108 (20.4%) 32 (16.5%) 140 (19.3%)
Others 13 (2.5%) 3 (1.5%) 16 (2.2%)
FR
Once 35 (6.6%) 9 (4.6%) 44 (6.1%)
twice 37 (7.0%) 9 (4.6%) 46 (6.4%)
>=3 times 458 (86.4%) 176 (90.7%) 634 (87.6%)
DT
Normal 124 (23.4%) 41 (21.1%) 165 (22.8%)
Rush hour 406 (76.6%) 153 (78.9%) 559 (77.2%)
TS
No 252 (47.5%) 107 (55.2%) 359 (49.6%)
Yes 278 (52.5%) 87 (44.8%) 365 (50.4%)
SP
No 52 (9.8%) 12 (6.2%) 64 (8.8%)
Yes 404 (76.2%) 175 (90.2%) 579 (80.0%)
Don't know 74 (14.0%) 7 (3.6%) 81 (11.2%)
BI
No 267 (50.4%) 50 (25.8%) 317 (43.8%)
Yes 263 (49.6%) 144 (74.2%) 407 (56.2%)
WI
No 267 (50.4%) 50 (25.8%) 317 (43.8%)
At Bus Stops 139 (26.2%) 66 (34.0%) 205 (28.3%)
Internet/Facebook 55 (10.4%) 35 (18.0%) 90 (12.4%)
From Friends 34 (6.4%) 28 (14.4%) 62 (8.6%)
Others 35 (6.6%) 15 (7.7%) 50 (6.9%)
LS
No 106 (20.0%) 48 (24.7%) 154 (21.3%)
Yes 424 (80.0%) 146 (75.3%) 570 (78.7%)
SC
No 78 (14.7%) 18 (9.3%) 96 (13.3%)
Yes 452 (85.3%) 176 (90.7%) 628 (86.7%)
BC
No 231 (43.6%) 116 (59.8%) 347 (47.9%)
Yes 299 (56.4%) 78 (40.2%) 377 (52.1%)
WE
No 523 (98.7%) 192 (99.0%) 715 (98.8%)
Yes 7 (1.3%) 2 (1.0%) 9 (1.2%)
WK
No 397 (74.9%) 150 (77.3%) 547 (75.6%)
Yes 133 (25.1%) 44 (22.7%) 177 (24.4%)
CA
No 273 (51.5%) 122 (62.9%) 395 (54.6%)
Yes 257 (48.5%) 72 (37.1%) 329 (45.4%)
BST
Don't agree 242 (45.7%) 92 (47.4%) 334 (46.1%)
Normal 161 (30.4%) 53 (27.3%) 214 (29.6%)
Agree 127 (24.0%) 49 (25.3%) 176 (24.3%)
BSC
Don't agree 85 (16.0%) 30 (15.5%) 115 (15.9%)
Normal 136 (25.7%) 32 (16.5%) 168 (23.2%)
Agree 309 (58.3%) 132 (68.0%) 441 (60.9%)
BSA
Don't agree 55 (10.4%) 23 (11.9%) 78 (10.8%)
Normal 147 (27.7%) 31 (16.0%) 178 (24.6%)
Agree 328 (61.9%) 140 (72.2%) 468 (64.6%)
BRE
Don't agree 105 (19.8%) 41 (21.1%) 146 (20.2%)
Normal 158 (29.8%) 57 (29.4%) 215 (29.7%)
Agree 267 (50.4%) 96 (49.5%) 363 (50.1%)
BCO
Don't agree 158 (29.8%) 66 (34.0%) 224 (30.9%)
Normal 166 (31.3%) 57 (29.4%) 223 (30.8%)
Agree 206 (38.9%) 71 (36.6%) 277 (38.3%)
BIN
Don't agree 135 (25.5%) 66 (34.0%) 201 (27.8%)
Normal 223 (42.1%) 73 (37.6%) 296 (40.9%)
Agree 172 (32.5%) 55 (28.4%) 227 (31.4%)
BHE
Don't agree 228 (43.0%) 77 (39.7%) 305 (42.1%)
Normal 177 (33.4%) 57 (29.4%) 234 (32.3%)
Agree 125 (23.6%) 60 (30.9%) 185 (25.6%)
BEN
Don't agree 60 (11.3%) 22 (11.3%) 82 (11.3%)
Normal 119 (22.5%) 34 (17.5%) 153 (21.1%)
Agree 351 (66.2%) 138 (71.1%) 489 (67.5%)
BRC
Don't agree 62 (11.7%) 29 (14.9%) 91 (12.6%)
Normal 95 (17.9%) 21 (10.8%) 116 (16.0%)
Agree 373 (70.4%) 144 (74.2%) 517 (71.4%)
BWE
Don't agree 44 (8.3%) 24 (12.4%) 68 (9.4%)
Normal 53 (10.0%) 25 (12.9%) 78 (10.8%)
Agree 433 (81.7%) 145 (74.7%) 578 (79.8%)
MR
Safety 70 (13.2%) 19 (9.8%) 89 (12.3%)
Comfortable 255 (48.1%) 79 (40.7%) 334 (46.1%)
Low price 27 (5.1%) 15 (7.7%) 42 (5.8%)
Accessibility 117 (22.1%) 59 (30.4%) 176 (24.3%)
Reliability 46 (8.7%) 13 (6.7%) 59 (8.1%)
others 15 (2.8%) 9 (4.6%) 24 (3.3%)
NBR
No Route 114 (21.5%) 47 (24.2%) 161 (22.2%)
Inconvenience 165 (31.1%) 57 (29.4%) 222 (30.7%)
Unsafety 12 (2.3%) 7 (3.6%) 19 (2.6%)
Long waiting time 159 (30.0%) 48 (24.7%) 207 (28.6%)
Unreliability 50 (9.4%) 19 (9.8%) 69 (9.5%)
others 30 (5.7%) 16 (8.2%) 46 (6.4%)
BUB
No 227 (42.8%) 54 (27.8%) 281 (38.8%)
Yes 303 (57.2%) 140 (72.2%) 443 (61.2%)
SBM
Don't agree 300 (56.6%) 70 (36.1%) 370 (51.1%)
Normal 180 (34.0%) 50 (25.8%) 230 (31.8%)
Agree 50 (9.4%) 74 (38.1%) 124 (17.1%)
SBS
Don't agree 329 (62.1%) 93 (47.9%) 422 (58.3%)
Normal 176 (33.2%) 57 (29.4%) 233 (32.2%)
Agree 25 (4.7%) 44 (22.7%) 69 (9.5%)
CO
Mean (SD) 13.4 (20.4) 13.5 (22.9) 13.4 (21.0)
Median [Min, Max] 10.0 [0, 300] 10.0 [0, 250] 10.0 [0, 300]
DI
Mean (SD) 6.21 (5.42) 6.02 (5.48) 6.16 (5.44)
Median [Min, Max] 5.00 [0.0150, 35.0] 4.50 [0.0200, 30.0] 5.00 [0.0150, 35.0]
TI
Mean (SD) 16.0 (12.9) 19.1 (21.2) 16.8 (15.6)
Median [Min, Max] 15.0 [0, 180] 15.0 [1.00, 180] 15.0 [0, 180]
# Descriptive analyse with package "compareGroups" - sort by MSI (hon table1 la cung cap them tri so p. Doi voi bien phan loai, tri so p cung duoc tinh theo Chi-Square)
library(compareGroups)
t = compareGroups(MSI ~ GD + AG + OC + IN + NC + MC + CC + BO + MO + RO + NB + NM + NR + TM + TP + FR + DT + TS + SP + BI + WI + LS + SC + BC + WE + WK + CA + BST + BSC + BSA + BRE + BCO + BIN + BHE + BEN + BRC + BWE + MR + NBR + BUB + SBM + SBS + CO + DI + TI, data = BS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t)
## 
## --------Summary descriptives table by 'MSI'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=530       N=194              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## GD:                                                0.084   
##     Female               231 (43.6%) 70 (36.1%)            
##     Male                 299 (56.4%) 124 (63.9%)           
## AG:                                                0.002   
##     <=24                 216 (40.8%) 106 (54.6%)           
##     25-60                302 (57.0%) 82 (42.3%)            
##     >60                  12 (2.26%)   6 (3.09%)            
## OC:                                               <0.001   
##     Pupils/Students      166 (31.3%) 93 (47.9%)            
##     Officers/Workers     164 (30.9%) 40 (20.6%)            
##     Housewife/Unemployed 34 (6.42%)   6 (3.09%)            
##     Free labor           135 (25.5%) 35 (18.0%)            
##     Others               31 (5.85%)  20 (10.3%)            
## IN:                                                0.465   
##     <8 millions          119 (22.5%) 36 (18.6%)            
##     (8-15) millions      214 (40.4%) 90 (46.4%)            
##     (15-25) millions     134 (25.3%) 48 (24.7%)            
##     >25 millions         63 (11.9%)  20 (10.3%)            
## NC:                                                1.000   
##     No                   222 (41.9%) 81 (41.8%)            
##     Yes                  308 (58.1%) 113 (58.2%)           
## MC:                                                0.001   
##     No                   50 (9.43%)  37 (19.1%)            
##     Yes                  480 (90.6%) 157 (80.9%)           
## CC:                                                0.090   
##     No                   413 (77.9%) 163 (84.0%)           
##     Yes                  117 (22.1%) 31 (16.0%)            
## BO:                                                0.672   
##     No                   314 (59.2%) 119 (61.3%)           
##     Yes                  216 (40.8%) 75 (38.7%)            
## MO:                                                0.002   
##     No                   78 (14.7%)  48 (24.7%)            
##     Yes                  452 (85.3%) 146 (75.3%)           
## RO:                                                0.637   
##     No                   444 (83.8%) 166 (85.6%)           
##     Yes                  86 (16.2%)  28 (14.4%)            
## NB:                                                0.193   
##     None                 242 (45.7%) 82 (42.3%)            
##     1                    212 (40.0%) 91 (46.9%)            
##     >=2                  76 (14.3%)  21 (10.8%)            
## NM:                                                0.041   
##     None                 17 (3.21%)   3 (1.55%)            
##     1                    80 (15.1%)  24 (12.4%)            
##     2                    279 (52.6%) 90 (46.4%)            
##     >=3                  154 (29.1%) 77 (39.7%)            
## NR:                                                0.970   
##     No                   399 (75.3%) 147 (75.8%)           
##     Yes                  131 (24.7%) 47 (24.2%)            
## TM:                                                0.002   
##     Walk/Bicycle         45 (8.49%)  30 (15.5%)            
##     Motorbike            391 (73.8%) 125 (64.4%)           
##     Car                  64 (12.1%)  18 (9.28%)            
##     Hichhiking/Taxi      30 (5.66%)  21 (10.8%)            
## TP:                                                0.015   
##     Work/Study           337 (63.6%) 146 (75.3%)           
##     Picking Children     72 (13.6%)  13 (6.70%)            
##     Entertainment/Food   108 (20.4%) 32 (16.5%)            
##     Others               13 (2.45%)   3 (1.55%)            
## FR:                                                0.297   
##     Once                 35 (6.60%)   9 (4.64%)            
##     twice                37 (6.98%)   9 (4.64%)            
##     >=3 times            458 (86.4%) 176 (90.7%)           
## DT:                                                0.587   
##     Normal               124 (23.4%) 41 (21.1%)            
##     Rush hour            406 (76.6%) 153 (78.9%)           
## TS:                                                0.084   
##     No                   252 (47.5%) 107 (55.2%)           
##     Yes                  278 (52.5%) 87 (44.8%)            
## SP:                                               <0.001   
##     No                   52 (9.81%)  12 (6.19%)            
##     Yes                  404 (76.2%) 175 (90.2%)           
##     Don't know           74 (14.0%)   7 (3.61%)            
## BI:                                               <0.001   
##     No                   267 (50.4%) 50 (25.8%)            
##     Yes                  263 (49.6%) 144 (74.2%)           
## WI:                                               <0.001   
##     No                   267 (50.4%) 50 (25.8%)            
##     At Bus Stops         139 (26.2%) 66 (34.0%)            
##     Internet/Facebook    55 (10.4%)  35 (18.0%)            
##     From Friends         34 (6.42%)  28 (14.4%)            
##     Others               35 (6.60%)  15 (7.73%)            
## LS:                                                0.201   
##     No                   106 (20.0%) 48 (24.7%)            
##     Yes                  424 (80.0%) 146 (75.3%)           
## SC:                                                0.074   
##     No                   78 (14.7%)  18 (9.28%)            
##     Yes                  452 (85.3%) 176 (90.7%)           
## BC:                                               <0.001   
##     No                   231 (43.6%) 116 (59.8%)           
##     Yes                  299 (56.4%) 78 (40.2%)            
## WE:                                                1.000   
##     No                   523 (98.7%) 192 (99.0%)           
##     Yes                   7 (1.32%)   2 (1.03%)            
## WK:                                                0.568   
##     No                   397 (74.9%) 150 (77.3%)           
##     Yes                  133 (25.1%) 44 (22.7%)            
## CA:                                                0.008   
##     No                   273 (51.5%) 122 (62.9%)           
##     Yes                  257 (48.5%) 72 (37.1%)            
## BST:                                               0.725   
##     Don't agree          242 (45.7%) 92 (47.4%)            
##     Normal               161 (30.4%) 53 (27.3%)            
##     Agree                127 (24.0%) 49 (25.3%)            
## BSC:                                               0.025   
##     Don't agree          85 (16.0%)  30 (15.5%)            
##     Normal               136 (25.7%) 32 (16.5%)            
##     Agree                309 (58.3%) 132 (68.0%)           
## BSA:                                               0.005   
##     Don't agree          55 (10.4%)  23 (11.9%)            
##     Normal               147 (27.7%) 31 (16.0%)            
##     Agree                328 (61.9%) 140 (72.2%)           
## BRE:                                               0.926   
##     Don't agree          105 (19.8%) 41 (21.1%)            
##     Normal               158 (29.8%) 57 (29.4%)            
##     Agree                267 (50.4%) 96 (49.5%)            
## BCO:                                               0.555   
##     Don't agree          158 (29.8%) 66 (34.0%)            
##     Normal               166 (31.3%) 57 (29.4%)            
##     Agree                206 (38.9%) 71 (36.6%)            
## BIN:                                               0.075   
##     Don't agree          135 (25.5%) 66 (34.0%)            
##     Normal               223 (42.1%) 73 (37.6%)            
##     Agree                172 (32.5%) 55 (28.4%)            
## BHE:                                               0.130   
##     Don't agree          228 (43.0%) 77 (39.7%)            
##     Normal               177 (33.4%) 57 (29.4%)            
##     Agree                125 (23.6%) 60 (30.9%)            
## BEN:                                               0.343   
##     Don't agree          60 (11.3%)  22 (11.3%)            
##     Normal               119 (22.5%) 34 (17.5%)            
##     Agree                351 (66.2%) 138 (71.1%)           
## BRC:                                               0.051   
##     Don't agree          62 (11.7%)  29 (14.9%)            
##     Normal               95 (17.9%)  21 (10.8%)            
##     Agree                373 (70.4%) 144 (74.2%)           
## BWE:                                               0.107   
##     Don't agree          44 (8.30%)  24 (12.4%)            
##     Normal               53 (10.0%)  25 (12.9%)            
##     Agree                433 (81.7%) 145 (74.7%)           
## MR:                                                0.054   
##     Safety               70 (13.2%)  19 (9.79%)            
##     Comfortable          255 (48.1%) 79 (40.7%)            
##     Low price            27 (5.09%)  15 (7.73%)            
##     Accessibility        117 (22.1%) 59 (30.4%)            
##     Reliability          46 (8.68%)  13 (6.70%)            
##     others               15 (2.83%)   9 (4.64%)            
## NBR:                                               0.483   
##     No Route             114 (21.5%) 47 (24.2%)            
##     Inconvenience        165 (31.1%) 57 (29.4%)            
##     Unsafety             12 (2.26%)   7 (3.61%)            
##     Long waiting time    159 (30.0%) 48 (24.7%)            
##     Unreliability        50 (9.43%)  19 (9.79%)            
##     others               30 (5.66%)  16 (8.25%)            
## BUB:                                              <0.001   
##     No                   227 (42.8%) 54 (27.8%)            
##     Yes                  303 (57.2%) 140 (72.2%)           
## SBM:                                              <0.001   
##     Don't agree          300 (56.6%) 70 (36.1%)            
##     Normal               180 (34.0%) 50 (25.8%)            
##     Agree                50 (9.43%)  74 (38.1%)            
## SBS:                                              <0.001   
##     Don't agree          329 (62.1%) 93 (47.9%)            
##     Normal               176 (33.2%) 57 (29.4%)            
##     Agree                25 (4.72%)  44 (22.7%)            
## CO                       13.4 (20.4) 13.5 (22.9)   0.954   
## DI                       6.21 (5.42) 6.02 (5.48)   0.676   
## TI                       16.0 (12.9) 19.1 (21.2)   0.063   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05 (25 variables): GD, IN, NC, CC, BO, RO, NB, NR, FR, DT, TS, LS, SC, WE, WK, BST, BRE, BCO, BIN, BHE, BEN, BWE, NBR, CO, DI
t1 = compareGroups(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + BI + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI, data = BS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t1)
## 
## --------Summary descriptives table by 'MSI'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=530       N=194              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## AG:                                                0.002   
##     <=24                 216 (40.8%) 106 (54.6%)           
##     25-60                302 (57.0%) 82 (42.3%)            
##     >60                  12 (2.26%)   6 (3.09%)            
## OC:                                               <0.001   
##     Pupils/Students      166 (31.3%) 93 (47.9%)            
##     Officers/Workers     164 (30.9%) 40 (20.6%)            
##     Housewife/Unemployed 34 (6.42%)   6 (3.09%)            
##     Free labor           135 (25.5%) 35 (18.0%)            
##     Others               31 (5.85%)  20 (10.3%)            
## MC:                                                0.001   
##     No                   50 (9.43%)  37 (19.1%)            
##     Yes                  480 (90.6%) 157 (80.9%)           
## MO:                                                0.002   
##     No                   78 (14.7%)  48 (24.7%)            
##     Yes                  452 (85.3%) 146 (75.3%)           
## NM:                                                0.041   
##     None                 17 (3.21%)   3 (1.55%)            
##     1                    80 (15.1%)  24 (12.4%)            
##     2                    279 (52.6%) 90 (46.4%)            
##     >=3                  154 (29.1%) 77 (39.7%)            
## TM:                                                0.002   
##     Walk/Bicycle         45 (8.49%)  30 (15.5%)            
##     Motorbike            391 (73.8%) 125 (64.4%)           
##     Car                  64 (12.1%)  18 (9.28%)            
##     Hichhiking/Taxi      30 (5.66%)  21 (10.8%)            
## TP:                                                0.015   
##     Work/Study           337 (63.6%) 146 (75.3%)           
##     Picking Children     72 (13.6%)  13 (6.70%)            
##     Entertainment/Food   108 (20.4%) 32 (16.5%)            
##     Others               13 (2.45%)   3 (1.55%)            
## SP:                                               <0.001   
##     No                   52 (9.81%)  12 (6.19%)            
##     Yes                  404 (76.2%) 175 (90.2%)           
##     Don't know           74 (14.0%)   7 (3.61%)            
## BI:                                               <0.001   
##     No                   267 (50.4%) 50 (25.8%)            
##     Yes                  263 (49.6%) 144 (74.2%)           
## WI:                                               <0.001   
##     No                   267 (50.4%) 50 (25.8%)            
##     At Bus Stops         139 (26.2%) 66 (34.0%)            
##     Internet/Facebook    55 (10.4%)  35 (18.0%)            
##     From Friends         34 (6.42%)  28 (14.4%)            
##     Others               35 (6.60%)  15 (7.73%)            
## BC:                                               <0.001   
##     No                   231 (43.6%) 116 (59.8%)           
##     Yes                  299 (56.4%) 78 (40.2%)            
## CA:                                                0.008   
##     No                   273 (51.5%) 122 (62.9%)           
##     Yes                  257 (48.5%) 72 (37.1%)            
## BSC:                                               0.025   
##     Don't agree          85 (16.0%)  30 (15.5%)            
##     Normal               136 (25.7%) 32 (16.5%)            
##     Agree                309 (58.3%) 132 (68.0%)           
## BSA:                                               0.005   
##     Don't agree          55 (10.4%)  23 (11.9%)            
##     Normal               147 (27.7%) 31 (16.0%)            
##     Agree                328 (61.9%) 140 (72.2%)           
## BRC:                                               0.051   
##     Don't agree          62 (11.7%)  29 (14.9%)            
##     Normal               95 (17.9%)  21 (10.8%)            
##     Agree                373 (70.4%) 144 (74.2%)           
## MR:                                                0.054   
##     Safety               70 (13.2%)  19 (9.79%)            
##     Comfortable          255 (48.1%) 79 (40.7%)            
##     Low price            27 (5.09%)  15 (7.73%)            
##     Accessibility        117 (22.1%) 59 (30.4%)            
##     Reliability          46 (8.68%)  13 (6.70%)            
##     others               15 (2.83%)   9 (4.64%)            
## BUB:                                              <0.001   
##     No                   227 (42.8%) 54 (27.8%)            
##     Yes                  303 (57.2%) 140 (72.2%)           
## SBM:                                              <0.001   
##     Don't agree          300 (56.6%) 70 (36.1%)            
##     Normal               180 (34.0%) 50 (25.8%)            
##     Agree                50 (9.43%)  74 (38.1%)            
## SBS:                                              <0.001   
##     Don't agree          329 (62.1%) 93 (47.9%)            
##     Normal               176 (33.2%) 57 (29.4%)            
##     Agree                25 (4.72%)  44 (22.7%)            
## TI                       16.0 (12.9) 19.1 (21.2)   0.063   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05 (21/25 variables): IN, BO, RO, NB, NR, FR, DT, LS, SC, WE, WK, BST, BRE, BCO, BIN, BHE, BEN, BWE, NBR, CO, DI; Cor>0.7 (BI); remains 4/25 variables p-value to test (GD, NC, CC, TS)
t.c = compareGroups(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI, data = BS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t.c)
## 
## --------Summary descriptives table by 'MSI'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=530       N=194              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## GD:                                                0.084   
##     Female               231 (43.6%) 70 (36.1%)            
##     Male                 299 (56.4%) 124 (63.9%)           
## AG:                                                0.002   
##     <=24                 216 (40.8%) 106 (54.6%)           
##     25-60                302 (57.0%) 82 (42.3%)            
##     >60                  12 (2.26%)   6 (3.09%)            
## OC:                                               <0.001   
##     Pupils/Students      166 (31.3%) 93 (47.9%)            
##     Officers/Workers     164 (30.9%) 40 (20.6%)            
##     Housewife/Unemployed 34 (6.42%)   6 (3.09%)            
##     Free labor           135 (25.5%) 35 (18.0%)            
##     Others               31 (5.85%)  20 (10.3%)            
## NC:                                                1.000   
##     No                   222 (41.9%) 81 (41.8%)            
##     Yes                  308 (58.1%) 113 (58.2%)           
## MC:                                                0.001   
##     No                   50 (9.43%)  37 (19.1%)            
##     Yes                  480 (90.6%) 157 (80.9%)           
## CC:                                                0.090   
##     No                   413 (77.9%) 163 (84.0%)           
##     Yes                  117 (22.1%) 31 (16.0%)            
## MO:                                                0.002   
##     No                   78 (14.7%)  48 (24.7%)            
##     Yes                  452 (85.3%) 146 (75.3%)           
## NM:                                                0.041   
##     None                 17 (3.21%)   3 (1.55%)            
##     1                    80 (15.1%)  24 (12.4%)            
##     2                    279 (52.6%) 90 (46.4%)            
##     >=3                  154 (29.1%) 77 (39.7%)            
## TM:                                                0.002   
##     Walk/Bicycle         45 (8.49%)  30 (15.5%)            
##     Motorbike            391 (73.8%) 125 (64.4%)           
##     Car                  64 (12.1%)  18 (9.28%)            
##     Hichhiking/Taxi      30 (5.66%)  21 (10.8%)            
## TP:                                                0.015   
##     Work/Study           337 (63.6%) 146 (75.3%)           
##     Picking Children     72 (13.6%)  13 (6.70%)            
##     Entertainment/Food   108 (20.4%) 32 (16.5%)            
##     Others               13 (2.45%)   3 (1.55%)            
## TS:                                                0.084   
##     No                   252 (47.5%) 107 (55.2%)           
##     Yes                  278 (52.5%) 87 (44.8%)            
## SP:                                               <0.001   
##     No                   52 (9.81%)  12 (6.19%)            
##     Yes                  404 (76.2%) 175 (90.2%)           
##     Don't know           74 (14.0%)   7 (3.61%)            
## WI:                                               <0.001   
##     No                   267 (50.4%) 50 (25.8%)            
##     At Bus Stops         139 (26.2%) 66 (34.0%)            
##     Internet/Facebook    55 (10.4%)  35 (18.0%)            
##     From Friends         34 (6.42%)  28 (14.4%)            
##     Others               35 (6.60%)  15 (7.73%)            
## BC:                                               <0.001   
##     No                   231 (43.6%) 116 (59.8%)           
##     Yes                  299 (56.4%) 78 (40.2%)            
## CA:                                                0.008   
##     No                   273 (51.5%) 122 (62.9%)           
##     Yes                  257 (48.5%) 72 (37.1%)            
## BSC:                                               0.025   
##     Don't agree          85 (16.0%)  30 (15.5%)            
##     Normal               136 (25.7%) 32 (16.5%)            
##     Agree                309 (58.3%) 132 (68.0%)           
## BSA:                                               0.005   
##     Don't agree          55 (10.4%)  23 (11.9%)            
##     Normal               147 (27.7%) 31 (16.0%)            
##     Agree                328 (61.9%) 140 (72.2%)           
## BRC:                                               0.051   
##     Don't agree          62 (11.7%)  29 (14.9%)            
##     Normal               95 (17.9%)  21 (10.8%)            
##     Agree                373 (70.4%) 144 (74.2%)           
## MR:                                                0.054   
##     Safety               70 (13.2%)  19 (9.79%)            
##     Comfortable          255 (48.1%) 79 (40.7%)            
##     Low price            27 (5.09%)  15 (7.73%)            
##     Accessibility        117 (22.1%) 59 (30.4%)            
##     Reliability          46 (8.68%)  13 (6.70%)            
##     others               15 (2.83%)   9 (4.64%)            
## BUB:                                              <0.001   
##     No                   227 (42.8%) 54 (27.8%)            
##     Yes                  303 (57.2%) 140 (72.2%)           
## SBM:                                              <0.001   
##     Don't agree          300 (56.6%) 70 (36.1%)            
##     Normal               180 (34.0%) 50 (25.8%)            
##     Agree                50 (9.43%)  74 (38.1%)            
## SBS:                                              <0.001   
##     Don't agree          329 (62.1%) 93 (47.9%)            
##     Normal               176 (33.2%) 57 (29.4%)            
##     Agree                25 (4.72%)  44 (22.7%)            
## TI                       16.0 (12.9) 19.1 (21.2)   0.063   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

3. Descriptive analyse by graphs

library(magrittr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract()   masks magrittr::extract()
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(car)
## Warning: package 'car' was built under R version 4.0.4
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
BS %>%
  group_by(GD, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = GD)) +
  geom_col(position = "fill") +
  xlab("Mode shift intention") +
  ylab("Gender") +
  ggtitle("Probability of Bus Shift ~ Gender") # Nữ ít có khả năng chuyển đổi hơn

BS %>%
  group_by(AG, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = AG)) +
  geom_col(position = "fill") +
  xlab("Mode shift intention") +
  ylab("Age") +
  ggtitle("Probability of Bus Shift ~ Age") # Nhóm tuổi lao động ít có khả năng chuyển đổi hơn

BS %>%
  group_by(OC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = OC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Occupation") +
  ggtitle("Proportion of Bus Shift ~ Occupation") # Học sinh sinh viên có khả năng chuyển đổi nhiều hơn

BS %>%
  group_by(NC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = NC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Number of children") +
  ggtitle("Proportion of Bus Shift ~ Number of children") # Không có ảnh hưởng đến khả năng chuyển đổi PT

BS %>%
  group_by(MC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = MC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Motor Certification") +
  ggtitle("Proportion of Bus Shift ~ Motor Certification") # Có bằng lái xe máy ít có khả năng chuyển đổi hơn

BS %>%
  group_by(CC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = CC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Car Certification") +
  ggtitle("Proportion of Bus Shift ~ Car Certification") # Có bằng lái xe ô tô ít có khả năng chuyển đổi hơn

BS %>%
  group_by(MO, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = MO)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Motor Owning") +
  ggtitle("Proportion of Bus Shift ~ Motor Owning") # Sở hữu xe máy ít có khả năng chuyển đổi PT hơn

BS %>%
  group_by(NM, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = NM)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Number of Motor") +
  ggtitle("Proportion of Bus Shift ~ Number of Motor") # Số lượng xe máy trong gia đình sở hữu có xu hướng ảnh hưởng tích cực đến khả năng chuyển đổi (Hơi mâu thuẩn ???)

BS %>%
  group_by(TM, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = TM)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Travel Mode") +
  ggtitle("Proportion of Bus Shift ~ Travel Mode") # Người sử dụng xe máy và ô tô ít có khả năng chuyển đổi sang xe buýt hơn các đối tượng chọn phương tiện khác

BS %>%
  group_by(TP, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = TP)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Travel Purpose") +
  ggtitle("Proportion of Bus Shift ~ Travel Purpose") # Mục đích chuyến đi công việc có nhiều khả năng chuyển đổi sang xe buýt hơn

BS %>%
  group_by(TS, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = TS)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Temporary Stops") +
  ggtitle("Proportion of Bus Shift ~ Temporary Stops") # Có điểm dừng tạm thời trong hành trình chuyến đi sẽ ít có khả năng chuyển đổi PT sang buýt hơn

BS %>%
  group_by(SP, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = SP)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Bus Stop Present") +
  ggtitle("Proportion of Bus Shift ~ Bus Stop Present") # Gần trạm dừng sẽ buýt có nhiều khả năng chuyển đổi PT sang xe buýt hơn

BS %>%
  group_by(BI, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BI)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Bus Information") +
  ggtitle("Proportion of Bus Shift ~ Bus Information") # Biết về thông tin hệ thống buýt có nhiều khả năng chuyển đổi PT sang xe buýt hơn

BS %>%
  group_by(WI, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = WI)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Way to have bus information") +
  ggtitle("Proportion of Bus Shift ~ Way to have bus information")# Bioongs BI

BS %>%
  group_by(BC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Bus Condition") +
  ggtitle("Proportion of Bus Shift ~ Bus Condition") # Điều kiện trạm dừng (có mái che) tỷ lệ nghich với khả năng chuyển đổi (Mâu thuẩn ??? Kiểm tra)

BS %>%
  group_by(CA, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = CA)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Center Area") +
  ggtitle("Proportion of Bus Shift ~ Center Area") # Chuyến đi ở khu vực trung tâm ít có khả năng chuyển đổi hơn

BS %>%
  group_by(BSC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BSC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Perception of cost saving") +
  ggtitle("Proportion of Bus Shift ~ Perception of cost saving") # Cảm nhận xe buýt tiết kiệm chi phí đi lại có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi

BS %>%
  group_by(BSA, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BSA)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Perception of Safety") +
  ggtitle("Proportion of Bus Shift ~ Perception of Safety") # Cảm nhận xe buýt an toàn có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi

BS %>%
  group_by(BRC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BRC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Perception of Traffic congestion reduce") +
  ggtitle("Proportion of Bus Shift ~ Perception of Traffic congestion reduce") # Cảm nhận xe buýt giảm UTGT có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi

BS %>%
  group_by(MR, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = MR)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Mode Choice Reason") +
  ggtitle("Proportion of Bus Shift ~ Mode Choice Reason") 

BS %>%
  group_by(BUB, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BUB)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Bus use before") +
  ggtitle("Proportion of Bus Shift ~ Bus use before") # Trải nghiệm xe buýt có ảnh hưởng tích cực đến khả năng chuyển đổi

BS %>%
  group_by(SBM, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = SBM)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Like to shift to bus for main trips") +
  ggtitle("Proportion of Bus Shift ~ Like to shift to bus for main trips") 

BS %>%
  group_by(SBS, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = SBS)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Like to shift to bus for sub trips") +
  ggtitle("Proportion of Bus Shift ~ Like to shift to bus for sub trips")

## Travel Mode Choice ~ Countinuous Variables (Distance, Travel Period and Cost)
ggplot(BS, aes(x=TI, fill = MSI, color = MSI)) +
  geom_histogram (position = "dodge") +
  xlab("Travel Period") +
  ylab("Count") +
  ggtitle("Histogram of Travel Time")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(BS, aes(x=DI, fill = MSI, color = MSI)) +
  geom_histogram (position = "dodge") +
  xlab("Distance") +
  ylab("Count") +
  ggtitle("Histogram of Distance")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(BS, aes(x=CO, fill = MSI, color = MSI)) +
  geom_histogram (position = "dodge") +
  xlab("Cost") +
  ylab("Count") +
  ggtitle("Histogram of Cost")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Statistic Analysis by boxplot (continuous variales)
BS %>%
  group_by(DI, MSI) %>%
  count() %>% 
  ggplot(aes(x = MSI, y = DI, fill = MSI)) +
  geom_boxplot() +
  xlab("Mode Shift to Bus") +
  ylab("Distance") +
  ggtitle("Boxplot of Bus Shift ~ Distance")

BS %>%
  group_by(CO, MSI) %>%
  count() %>% 
  ggplot(aes(x = MSI, y = CO, fill = MSI)) +
  geom_boxplot() +
  xlab("Mode Shift to Bus") +
  ylab("Cost") +
  ggtitle("Boxplot of Bus Shift ~ Cost")

BS %>%
  group_by(TI, MSI) %>%
  count() %>% 
  ggplot(aes(x = MSI, y = TI, fill = MSI)) +
  geom_boxplot() +
  xlab("Mode Shift to Bus") +
  ylab("Travel Period") +
  ggtitle("Boxplot of Bus Shift ~ Travel Period")

## Correlation in continuous variables
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
CVMSI = data.frame(BS$MSI, BS$DI, BS$TI, BS$CO)
pairs.panels(CVMSI)

summary(BS)
##    CA        BC                    TM                       TP     
##  No :395   No :347   Walk/Bicycle   : 75   Work/Study        :483  
##  Yes:329   Yes:377   Motorbike      :516   Picking Children  : 85  
##                      Car            : 82   Entertainment/Food:140  
##                      Hichhiking/Taxi: 51   Others            : 16  
##                                                                    
##                                                                    
##          FR              DT            DI               TI           SC     
##  Once     : 44   Normal   :165   Min.   : 0.015   Min.   :  0.00   No : 96  
##  twice    : 46   Rush hour:559   1st Qu.: 2.000   1st Qu.: 10.00   Yes:628  
##  >=3 times:634                   Median : 5.000   Median : 15.00            
##                                  Mean   : 6.159   Mean   : 16.84            
##                                  3rd Qu.:10.000   3rd Qu.: 20.00            
##                                  Max.   :35.000   Max.   :180.00            
##    LS        TS                  MR        WE        WK     
##  No :154   No :359   Safety       : 89   No :715   No :547  
##  Yes:570   Yes:365   Comfortable  :334   Yes:  9   Yes:177  
##                      Low price    : 42                      
##                      Accessibility:176                      
##                      Reliability  : 59                      
##                      others       : 24                      
##                 NBR            CO         BUB               SBM     
##  No Route         :161   Min.   :  0.0   No :281   Don't agree:370  
##  Inconvenience    :222   1st Qu.:  4.0   Yes:443   Normal     :230  
##  Unsafety         : 19   Median : 10.0             Agree      :124  
##  Long waiting time:207   Mean   : 13.4                              
##  Unreliability    : 69   3rd Qu.: 15.0                              
##  others           : 46   Max.   :300.0                              
##           SBS               BST               BSC               BSA     
##  Don't agree:422   Don't agree:334   Don't agree:115   Don't agree: 78  
##  Normal     :233   Normal     :214   Normal     :168   Normal     :178  
##  Agree      : 69   Agree      :176   Agree      :441   Agree      :468  
##                                                                         
##                                                                         
##                                                                         
##           BRE               BCO               BIN               BHE     
##  Don't agree:146   Don't agree:224   Don't agree:201   Don't agree:305  
##  Normal     :215   Normal     :223   Normal     :296   Normal     :234  
##  Agree      :363   Agree      :277   Agree      :227   Agree      :185  
##                                                                         
##                                                                         
##                                                                         
##           BEN               BRC               BWE               SP     
##  Don't agree: 82   Don't agree: 91   Don't agree: 68   No        : 64  
##  Normal     :153   Normal     :116   Normal     : 78   Yes       :579  
##  Agree      :489   Agree      :517   Agree      :578   Don't know: 81  
##                                                                        
##                                                                        
##                                                                        
##    BI                      WI       MSI           GD          AG     
##  No :317   No               :317   No :530   Female:301   <=24 :322  
##  Yes:407   At Bus Stops     :205   Yes:194   Male  :423   25-60:384  
##            Internet/Facebook: 90                          >60  : 18  
##            From Friends     : 62                                     
##            Others           : 50                                     
##                                                                      
##                     OC                     IN        NC        MC     
##  Pupils/Students     :259   <8 millions     :155   No :303   No : 87  
##  Officers/Workers    :204   (8-15) millions :304   Yes:421   Yes:637  
##  Housewife/Unemployed: 40   (15-25) millions:182                      
##  Free labor          :170   >25 millions    : 83                      
##  Others              : 51                                             
##                                                                       
##    CC        BO        MO        RO         NB         NM        NR     
##  No :576   No :433   No :126   No :610   None:324   None: 20   No :546  
##  Yes:148   Yes:291   Yes:598   Yes:114   1   :303   1   :104   Yes:178  
##                                          >=2 : 97   2   :369            
##                                                     >=3 :231            
##                                                                         
## 
# Correlations between MSI and variables of Group 1 (Individual Characteristics)
Cor1 = data.frame(BS$MSI, BS$GD, BS$AG, BS$OC, BS$IN, BS$NC, BS$MC, BS$CC, BS$BO, BS$MO, BS$RO, BS$NB, BS$NM, BS$NR)
pairs.panels(Cor1)

Cor1_1 = data.frame(BS$MSI, BS$GD, BS$AG, BS$OC, BS$IN, BS$NC)
pairs.panels(Cor1_1)

Cor1_2 = data.frame(BS$MSI, BS$MC, BS$CC, BS$BO, BS$MO, BS$RO, BS$NB, BS$NM, BS$NR)
pairs.panels(Cor1_2)

# Correlation between MSI and variables of Group 2 (Trip Characteristics)
Cor2 = data.frame(BS$MSI, BS$TM, BS$TP, BS$FR, BS$DT, BS$TS, BS$CO, BS$DI)
pairs.panels(Cor2)

# Correlation between MSI and variables of Group 3 (Infrastructures)
Cor3 = data.frame(BS$MSI, BS$SP, BS$BI, BS$WI, BS$LS, BS$SC, BS$BC, BS$TI)
pairs.panels(Cor3)

# Correlation between MSI and variables of Group 4 (Surrouding Environment) and 5 (Others)
Cor45 = data.frame(BS$MSI, BS$WE, BS$WK, BS$CA, BS$MR, BS$NBR, BS$BUB, BS$SBM, BS$SBS)
pairs.panels(Cor45)

# Correlation between MSI and variables of Group 6 (Perception of Bus)
Cor6 = data.frame(BS$MSI, BS$BST, BS$BSC, BS$BSA, BS$BRE, BS$BCO, BS$BIN, BS$BHE, BS$BEN, BS$BRC, BS$BWE)
pairs.panels(Cor6)

4. Binary Model

# Model with multi-variables (remove variables with p-value>0,05 cor>0.7, in t.c) --> 23 variables
mg = glm(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI, family = binomial, data = BS)
mg
## 
## Call:  glm(formula = MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + 
##     TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + 
##     SBM + SBS + TI, family = binomial, data = BS)
## 
## Coefficients:
##            (Intercept)                  GDMale                 AG25-60  
##               -0.55298                 0.30378                 0.40819  
##                  AG>60      OCOfficers/Workers  OCHousewife/Unemployed  
##                1.48900                -0.64592                -0.44226  
##           OCFree labor                OCOthers                   NCYes  
##               -0.98629                -0.01543                 0.21170  
##                  MCYes                   CCYes                   MOYes  
##               -0.84386                 0.13362                -0.22179  
##                    NM1                     NM2                   NM>=3  
##                0.75807                 0.47658                 1.05291  
##            TMMotorbike                   TMCar       TMHichhiking/Taxi  
##               -0.09731                 0.23282                 0.36714  
##     TPPicking Children    TPEntertainment/Food                TPOthers  
##               -1.27376                -0.87081                -1.27200  
##                  TSYes                   SPYes            SPDon't know  
##               -0.27580                -0.19116                -1.17003  
##         WIAt Bus Stops     WIInternet/Facebook          WIFrom Friends  
##                0.72940                 1.41480                 0.98769  
##               WIOthers                   BCYes                   CAYes  
##                0.82602                -0.79790                -0.78183  
##              BSCNormal                BSCAgree               BSANormal  
##               -0.27922                -0.21726                -0.57199  
##               BSAAgree               BRCNormal                BRCAgree  
##                0.09555                -0.43554                -0.40906  
##          MRComfortable             MRLow price         MRAccessibility  
##               -0.02688                 0.84610                 0.76702  
##          MRReliability                MRothers                  BUBYes  
##               -0.19950                 0.19721                -0.04493  
##              SBMNormal                SBMAgree               SBSNormal  
##                0.34803                 1.96718                -0.02342  
##               SBSAgree                      TI  
##                0.76157                 0.01238  
## 
## Degrees of Freedom: 723 Total (i.e. Null);  677 Residual
## Null Deviance:       841.6 
## Residual Deviance: 612.3     AIC: 706.3
summary(mg) 
## 
## Call:
## glm(formula = MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + 
##     TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + 
##     SBM + SBS + TI, family = binomial, data = BS)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4126  -0.6578  -0.3798   0.4445   3.0253  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.552975   1.040959  -0.531 0.595268    
## GDMale                  0.303781   0.233306   1.302 0.192891    
## AG25-60                 0.408189   0.417731   0.977 0.328491    
## AG>60                   1.488998   0.819418   1.817 0.069196 .  
## OCOfficers/Workers     -0.645925   0.443333  -1.457 0.145124    
## OCHousewife/Unemployed -0.442259   0.686302  -0.644 0.519311    
## OCFree labor           -0.986294   0.436473  -2.260 0.023841 *  
## OCOthers               -0.015426   0.577683  -0.027 0.978697    
## NCYes                   0.211699   0.232343   0.911 0.362217    
## MCYes                  -0.843858   0.426952  -1.976 0.048101 *  
## CCYes                   0.133620   0.358990   0.372 0.709736    
## MOYes                  -0.221792   0.377558  -0.587 0.556910    
## NM1                     0.758073   0.828629   0.915 0.360269    
## NM2                     0.476576   0.800777   0.595 0.551749    
## NM>=3                   1.052914   0.801632   1.313 0.189027    
## TMMotorbike            -0.097308   0.435607  -0.223 0.823235    
## TMCar                   0.232824   0.587499   0.396 0.691887    
## TMHichhiking/Taxi       0.367141   0.500856   0.733 0.463541    
## TPPicking Children     -1.273759   0.431232  -2.954 0.003139 ** 
## TPEntertainment/Food   -0.870810   0.290887  -2.994 0.002757 ** 
## TPOthers               -1.272000   0.912520  -1.394 0.163335    
## TSYes                  -0.275798   0.213677  -1.291 0.196800    
## SPYes                  -0.191164   0.455921  -0.419 0.675003    
## SPDon't know           -1.170028   0.598844  -1.954 0.050724 .  
## WIAt Bus Stops          0.729396   0.308264   2.366 0.017975 *  
## WIInternet/Facebook     1.414802   0.366220   3.863 0.000112 ***
## WIFrom Friends          0.987690   0.400673   2.465 0.013698 *  
## WIOthers                0.826023   0.459715   1.797 0.072365 .  
## BCYes                  -0.797902   0.232726  -3.429 0.000607 ***
## CAYes                  -0.781828   0.235347  -3.322 0.000894 ***
## BSCNormal              -0.279221   0.406630  -0.687 0.492289    
## BSCAgree               -0.217257   0.368406  -0.590 0.555378    
## BSANormal              -0.571989   0.445914  -1.283 0.199585    
## BSAAgree                0.095555   0.413196   0.231 0.817114    
## BRCNormal              -0.435540   0.431687  -1.009 0.313011    
## BRCAgree               -0.409063   0.342847  -1.193 0.232817    
## MRComfortable          -0.026879   0.395853  -0.068 0.945864    
## MRLow price             0.846104   0.558206   1.516 0.129581    
## MRAccessibility         0.767021   0.438945   1.747 0.080565 .  
## MRReliability          -0.199500   0.526415  -0.379 0.704704    
## MRothers                0.197207   0.671097   0.294 0.768866    
## BUBYes                 -0.044926   0.236129  -0.190 0.849106    
## SBMNormal               0.348029   0.290242   1.199 0.230489    
## SBMAgree                1.967184   0.335713   5.860 4.64e-09 ***
## SBSNormal              -0.023419   0.278773  -0.084 0.933051    
## SBSAgree                0.761571   0.387746   1.964 0.049519 *  
## TI                      0.012377   0.007423   1.667 0.095445 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 612.33  on 677  degrees of freedom
## AIC: 706.33
## 
## Number of Fisher Scoring iterations: 5
## Remove insignificant variables: Occupation (19), Number.of.Children (21), Car.Certificate (23), Car.Owning (26), Number.of.Motors (28), Number.of.Car (29), Distance (6) --> 12 variables
#mg1 = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
## Way 2: Use function lmr in package "rms" - library(rms) -ml = lrm(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI)

# Find Parsimonious model - Tìm mô hình tối ưu - GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.5-5)
xvars <- BS[, c(34,35,36,38,39,40,42,45,3,4,11,30,32,2,1,21,22,28,12, 17, 18, 19, 8)]
y <- BS$MSI
bma.search <- bic.glm (xvars, y, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search)
## 
## Call:
## bic.glm.data.frame(x = xvars, y = y, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   7  models were selected
##  Best  5  models (cumulative posterior probability =  0.9391 ): 
## 
##                          p!=0    EV         SD        model 1     model 2   
## Intercept                100    -0.3451553  0.392538  -2.548e-01  -5.234e-01
## GD                         5.2                                              
##   .Male                          0.0163310  0.083048       .           .    
## AG                         0.0                                              
##   .25-60                         0.0000000  0.000000       .           .    
##   .>60                           0.0000000  0.000000       .           .    
## OC                         0.0                                              
##   .Officers/Workers              0.0000000  0.000000       .           .    
##   .Housewife/Unemployed          0.0000000  0.000000       .           .    
##   .Free labor                    0.0000000  0.000000       .           .    
##   .Others                        0.0000000  0.000000       .           .    
## NC                         0.0                                              
##   .Yes                           0.0000000  0.000000       .           .    
## MC                        96.7                                              
##   .Yes                          -1.0981072  0.350068  -1.111e+00  -1.190e+00
## CC                         0.0                                              
##   .Yes                           0.0000000  0.000000       .           .    
## MO                         6.2                                              
##   .Yes                          -0.0360873  0.164014       .           .    
## NM                         0.0                                              
##   .1                             0.0000000  0.000000       .           .    
##   .2                             0.0000000  0.000000       .           .    
##   .>=3                           0.0000000  0.000000       .           .    
## TM                         0.0                                              
##   .Motorbike                     0.0000000  0.000000       .           .    
##   .Car                           0.0000000  0.000000       .           .    
##   .Hichhiking/Taxi               0.0000000  0.000000       .           .    
## TP                         0.0                                              
##   .Picking Children              0.0000000  0.000000       .           .    
##   .Entertainment/Food            0.0000000  0.000000       .           .    
##   .Others                        0.0000000  0.000000       .           .    
## TS                        10.6                                              
##   .Yes                          -0.0405234  0.133809       .           .    
## SP                         0.0                                              
##   .Yes                           0.0000000  0.000000       .           .    
##   .Don't know                    0.0000000  0.000000       .           .    
## WI                       100.0                                              
##   .At Bus Stops                  0.6996884  0.244735   6.713e-01   7.465e-01
##   .Internet/Facebook             1.3859142  0.292753   1.351e+00   1.429e+00
##   .From Friends                  1.1013938  0.330802   1.068e+00   1.156e+00
##   .Others                        0.6575057  0.390179   6.257e-01   6.780e-01
## BC                       100.0                                              
##   .Yes                          -0.8550115  0.199569  -8.642e-01  -8.458e-01
## CA                       100.0                                              
##   .Yes                          -0.8789633  0.207461  -8.995e-01  -8.335e-01
## BSC                        0.0                                              
##    .Normal                       0.0000000  0.000000       .           .    
##    .Agree                        0.0000000  0.000000       .           .    
## BSA                        3.2                                              
##    .Normal                      -0.0210880  0.132381       .           .    
##    .Agree                        0.0003411  0.054078       .           .    
## BRC                        0.0                                              
##    .Normal                       0.0000000  0.000000       .           .    
##    .Agree                        0.0000000  0.000000       .           .    
## MR                         0.0                                              
##   .Comfortable                   0.0000000  0.000000       .           .    
##   .Low price                     0.0000000  0.000000       .           .    
##   .Accessibility                 0.0000000  0.000000       .           .    
##   .Reliability                   0.0000000  0.000000       .           .    
##   .others                        0.0000000  0.000000       .           .    
## BUB                        0.0                                              
##    .Yes                          0.0000000  0.000000       .           .    
## SBM                      100.0                                              
##    .Normal                       0.1316721  0.221303   1.283e-01   1.210e-01
##    .Agree                        1.9435843  0.254496   1.948e+00   1.943e+00
## SBS                        0.0                                              
##    .Normal                       0.0000000  0.000000       .           .    
##    .Agree                        0.0000000  0.000000       .           .    
## TI                        35.1   0.0052631  0.007962       .       1.499e-02
##                                                                             
## nVar                                                     5           6      
## BIC                                                   -4.009e+03  -4.009e+03
## post prob                                              0.397       0.351    
##                          model 3     model 4     model 5   
## Intercept                -8.546e-02  -4.498e-01  -6.185e-01
## GD                                                         
##   .Male                       .       3.133e-01       .    
## AG                                                         
##   .25-60                      .           .           .    
##   .>60                        .           .           .    
## OC                                                         
##   .Officers/Workers           .           .           .    
##   .Housewife/Unemployed       .           .           .    
##   .Free labor                 .           .           .    
##   .Others                     .           .           .    
## NC                                                         
##   .Yes                        .           .           .    
## MC                                                         
##   .Yes                   -1.135e+00  -1.125e+00       .    
## CC                                                         
##   .Yes                        .           .           .    
## MO                                                         
##   .Yes                        .           .      -7.785e-01
## NM                                                         
##   .1                          .           .           .    
##   .2                          .           .           .    
##   .>=3                        .           .           .    
## TM                                                         
##   .Motorbike                  .           .           .    
##   .Car                        .           .           .    
##   .Hichhiking/Taxi            .           .           .    
## TP                                                         
##   .Picking Children           .           .           .    
##   .Entertainment/Food         .           .           .    
##   .Others                     .           .           .    
## TS                                                         
##   .Yes                   -3.840e-01       .           .    
## SP                                                         
##   .Yes                        .           .           .    
##   .Don't know                 .           .           .    
## WI                                                         
##   .At Bus Stops           6.916e-01   6.768e-01   6.772e-01
##   .Internet/Facebook      1.417e+00   1.343e+00   1.342e+00
##   .From Friends           1.059e+00   1.089e+00   1.182e+00
##   .Others                 7.489e-01   6.735e-01   5.862e-01
## BC                                                         
##   .Yes                   -8.485e-01  -8.256e-01  -8.283e-01
## CA                                                         
##   .Yes                   -9.045e-01  -9.303e-01  -8.677e-01
## BSC                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## BSA                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## BRC                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## MR                                                         
##   .Comfortable                .           .           .    
##   .Low price                  .           .           .    
##   .Accessibility              .           .           .    
##   .Reliability                .           .           .    
##   .others                     .           .           .    
## BUB                                                        
##    .Yes                       .           .           .    
## SBM                                                        
##    .Normal                1.500e-01   1.546e-01   1.288e-01
##    .Agree                 1.959e+00   1.947e+00   1.899e+00
## SBS                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## TI                            .           .           .    
##                                                            
## nVar                        6           6           5      
## BIC                      -4.007e+03  -4.005e+03  -4.005e+03
## post prob                 0.106       0.052       0.033
# Remove ns variables (GD, NC, CC, TS) and change DI (7) by TI (8) --> 19 variables
xvars1 <- BS[, c(35,36,39,42,45,3,4,30,32,2,1,21,22,28,12, 17, 18, 19, 7)]
y1 <- BS$MSI
bma.search1 <- bic.glm (xvars1, y1, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search1)
## 
## Call:
## bic.glm.data.frame(x = xvars1, y = y1, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   3  models were selected
##  Best  3  models (cumulative posterior probability =  1 ): 
## 
##                          p!=0    EV         SD       model 1     model 2   
## Intercept                100    -0.2687921  0.36471  -2.548e-01  -6.185e-01
## AG                         0.0                                             
##   .25-60                         0.0000000  0.00000       .           .    
##   .>60                           0.0000000  0.00000       .           .    
## OC                         0.0                                             
##   .Officers/Workers              0.0000000  0.00000       .           .    
##   .Housewife/Unemployed          0.0000000  0.00000       .           .    
##   .Free labor                    0.0000000  0.00000       .           .    
##   .Others                        0.0000000  0.00000       .           .    
## MC                        92.8                                             
##   .Yes                          -1.0312792  0.39381  -1.111e+00       .    
## MO                         7.2                                             
##   .Yes                          -0.0558463  0.21086       .      -7.785e-01
## NM                         0.0                                             
##   .1                             0.0000000  0.00000       .           .    
##   .2                             0.0000000  0.00000       .           .    
##   .>=3                           0.0000000  0.00000       .           .    
## TM                         0.0                                             
##   .Motorbike                     0.0000000  0.00000       .           .    
##   .Car                           0.0000000  0.00000       .           .    
##   .Hichhiking/Taxi               0.0000000  0.00000       .           .    
## TP                         0.0                                             
##   .Picking Children              0.0000000  0.00000       .           .    
##   .Entertainment/Food            0.0000000  0.00000       .           .    
##   .Others                        0.0000000  0.00000       .           .    
## SP                         0.0                                             
##   .Yes                           0.0000000  0.00000       .           .    
##   .Don't know                    0.0000000  0.00000       .           .    
## WI                       100.0                                             
##   .At Bus Stops                  0.6702762  0.24048   6.713e-01   6.772e-01
##   .Internet/Facebook             1.3523836  0.28913   1.351e+00   1.342e+00
##   .From Friends                  1.0733555  0.32789   1.068e+00   1.182e+00
##   .Others                        0.6239739  0.38837   6.257e-01   5.862e-01
## BC                       100.0                                             
##   .Yes                          -0.8654120  0.19908  -8.642e-01  -8.283e-01
## CA                       100.0                                             
##   .Yes                          -0.8998859  0.20415  -8.995e-01  -8.677e-01
## BSC                        0.0                                             
##    .Normal                       0.0000000  0.00000       .           .    
##    .Agree                        0.0000000  0.00000       .           .    
## BSA                        6.8                                             
##    .Normal                      -0.0456443  0.19186       .           .    
##    .Agree                        0.0007384  0.07956       .           .    
## BRC                        0.0                                             
##    .Normal                       0.0000000  0.00000       .           .    
##    .Agree                        0.0000000  0.00000       .           .    
## MR                         0.0                                             
##   .Comfortable                   0.0000000  0.00000       .           .    
##   .Low price                     0.0000000  0.00000       .           .    
##   .Accessibility                 0.0000000  0.00000       .           .    
##   .Reliability                   0.0000000  0.00000       .           .    
##   .others                        0.0000000  0.00000       .           .    
## BUB                        0.0                                             
##    .Yes                          0.0000000  0.00000       .           .    
## SBM                      100.0                                             
##    .Normal                       0.1332786  0.22109   1.283e-01   1.288e-01
##    .Agree                        1.9406127  0.25385   1.948e+00   1.899e+00
## SBS                        0.0                                             
##    .Normal                       0.0000000  0.00000       .           .    
##    .Agree                        0.0000000  0.00000       .           .    
## DI                         0.0   0.0000000  0.00000       .           .    
##                                                                            
## nVar                                                    5           5      
## BIC                                                  -4.009e+03  -4.005e+03
## post prob                                             0.860       0.072    
##                          model 3   
## Intercept                -7.800e-02
## AG                                 
##   .25-60                      .    
##   .>60                        .    
## OC                                 
##   .Officers/Workers           .    
##   .Housewife/Unemployed       .    
##   .Free labor                 .    
##   .Others                     .    
## MC                                 
##   .Yes                   -1.108e+00
## MO                                 
##   .Yes                        .    
## NM                                 
##   .1                          .    
##   .2                          .    
##   .>=3                        .    
## TM                                 
##   .Motorbike                  .    
##   .Car                        .    
##   .Hichhiking/Taxi            .    
## TP                                 
##   .Picking Children           .    
##   .Entertainment/Food         .    
##   .Others                     .    
## SP                                 
##   .Yes                        .    
##   .Don't know                 .    
## WI                                 
##   .At Bus Stops           6.502e-01
##   .Internet/Facebook      1.377e+00
##   .From Friends           1.026e+00
##   .Others                 6.418e-01
## BC                                 
##   .Yes                   -9.198e-01
## CA                                 
##   .Yes                   -9.378e-01
## BSC                                
##    .Normal                    .    
##    .Agree                     .    
## BSA                                
##    .Normal               -6.665e-01
##    .Agree                 1.078e-02
## BRC                                
##    .Normal                    .    
##    .Agree                     .    
## MR                                 
##   .Comfortable                .    
##   .Low price                  .    
##   .Accessibility              .    
##   .Reliability                .    
##   .others                     .    
## BUB                                
##    .Yes                       .    
## SBM                                
##    .Normal                2.000e-01
##    .Agree                 1.888e+00
## SBS                                
##    .Normal                    .    
##    .Agree                     .    
## DI                            .    
##                                    
## nVar                        6      
## BIC                      -4.004e+03
## post prob                 0.068
#imageplot.bma(bma.search1)

## Model 1 from bma.search1 : Remove ns variables (GD, NC, CC, TS) and change DI (7) by TI (8) --> 19 variables
mg1 = glm(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial, data = BS)
mg1
## 
## Call:  glm(formula = MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + 
##     BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial, 
##     data = BS)
## 
## Coefficients:
##            (Intercept)                 AG25-60                   AG>60  
##              -0.460888                0.509909                1.600156  
##     OCOfficers/Workers  OCHousewife/Unemployed            OCFree labor  
##              -0.601602               -0.672492               -1.067818  
##               OCOthers                   MCYes                   MOYes  
##               0.145226               -0.803585               -0.142332  
##                    NM1                     NM2                   NM>=3  
##               0.797740                0.584359                1.153470  
##            TMMotorbike                   TMCar       TMHichhiking/Taxi  
##               0.006570                0.810212                0.504619  
##     TPPicking Children    TPEntertainment/Food                TPOthers  
##              -1.452278               -0.855917               -1.357762  
##                  SPYes            SPDon't know          WIAt Bus Stops  
##              -0.040310               -1.119875                0.622671  
##    WIInternet/Facebook          WIFrom Friends                WIOthers  
##               1.294873                0.905878                0.670218  
##                  BCYes                   CAYes               BSCNormal  
##              -0.888957               -0.915710               -0.227395  
##               BSCAgree               BSANormal                BSAAgree  
##              -0.194065               -0.596242                0.096531  
##              BRCNormal                BRCAgree           MRComfortable  
##              -0.343956               -0.263950                0.052656  
##            MRLow price         MRAccessibility           MRReliability  
##               0.969812                0.804047               -0.097818  
##               MRothers                  BUBYes               SBMNormal  
##               0.231989               -0.050910                0.252138  
##               SBMAgree               SBSNormal                SBSAgree  
##               1.963412                0.008038                0.775681  
##                     DI  
##              -0.032412  
## 
## Degrees of Freedom: 723 Total (i.e. Null);  681 Residual
## Null Deviance:       841.6 
## Residual Deviance: 618.4     AIC: 704.4
summary(mg1)
## 
## Call:
## glm(formula = MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + 
##     BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial, 
##     data = BS)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6346  -0.6513  -0.3886   0.4517   3.0879  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.460888   1.014052  -0.455 0.649468    
## AG25-60                 0.509909   0.408317   1.249 0.211735    
## AG>60                   1.600156   0.809458   1.977 0.048062 *  
## OCOfficers/Workers     -0.601602   0.444097  -1.355 0.175525    
## OCHousewife/Unemployed -0.672492   0.670571  -1.003 0.315927    
## OCFree labor           -1.067818   0.435640  -2.451 0.014240 *  
## OCOthers                0.145226   0.567793   0.256 0.798126    
## MCYes                  -0.803585   0.424113  -1.895 0.058127 .  
## MOYes                  -0.142332   0.381606  -0.373 0.709162    
## NM1                     0.797740   0.828571   0.963 0.335653    
## NM2                     0.584359   0.799689   0.731 0.464942    
## NM>=3                   1.153470   0.800814   1.440 0.149762    
## TMMotorbike             0.006570   0.439137   0.015 0.988063    
## TMCar                   0.810212   0.563283   1.438 0.150328    
## TMHichhiking/Taxi       0.504619   0.497789   1.014 0.310716    
## TPPicking Children     -1.452278   0.431442  -3.366 0.000762 ***
## TPEntertainment/Food   -0.855917   0.284101  -3.013 0.002589 ** 
## TPOthers               -1.357762   0.918309  -1.479 0.139262    
## SPYes                  -0.040310   0.446021  -0.090 0.927988    
## SPDon't know           -1.119875   0.598265  -1.872 0.061224 .  
## WIAt Bus Stops          0.622671   0.304066   2.048 0.040578 *  
## WIInternet/Facebook     1.294873   0.357963   3.617 0.000298 ***
## WIFrom Friends          0.905878   0.394216   2.298 0.021566 *  
## WIOthers                0.670218   0.463461   1.446 0.148145    
## BCYes                  -0.888957   0.231589  -3.839 0.000124 ***
## CAYes                  -0.915710   0.236190  -3.877 0.000106 ***
## BSCNormal              -0.227395   0.404108  -0.563 0.573633    
## BSCAgree               -0.194065   0.362292  -0.536 0.592195    
## BSANormal              -0.596242   0.443520  -1.344 0.178838    
## BSAAgree                0.096531   0.408890   0.236 0.813369    
## BRCNormal              -0.343956   0.431132  -0.798 0.424988    
## BRCAgree               -0.263950   0.341203  -0.774 0.439175    
## MRComfortable           0.052656   0.393148   0.134 0.893455    
## MRLow price             0.969812   0.556347   1.743 0.081302 .  
## MRAccessibility         0.804047   0.436812   1.841 0.065663 .  
## MRReliability          -0.097818   0.520557  -0.188 0.850948    
## MRothers                0.231989   0.656438   0.353 0.723784    
## BUBYes                 -0.050910   0.235854  -0.216 0.829101    
## SBMNormal               0.252138   0.286773   0.879 0.379279    
## SBMAgree                1.963412   0.329928   5.951 2.66e-09 ***
## SBSNormal               0.008038   0.273902   0.029 0.976590    
## SBSAgree                0.775681   0.384369   2.018 0.043585 *  
## DI                     -0.032412   0.022708  -1.427 0.153481    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 618.39  on 681  degrees of freedom
## AIC: 704.39
## 
## Number of Fisher Scoring iterations: 5
## CHON: Model 2 --> 5 variables (bma.search1) - MC, WI, BC, CA, SBM + some variables significant and choose by AIC
mg2 = glm(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial, data = BS)
mg2
## 
## Call:  glm(formula = MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial, 
##     data = BS)
## 
## Coefficients:
##            (Intercept)      OCOfficers/Workers  OCHousewife/Unemployed  
##                0.04216                -0.31020                -0.29222  
##           OCFree labor                OCOthers                   MCYes  
##               -0.66363                 0.91856                -0.85258  
##     TPPicking Children    TPEntertainment/Food                TPOthers  
##               -1.05006                -0.80345                -1.11305  
##                  SPYes            SPDon't know          WIAt Bus Stops  
##               -0.02669                -1.10368                 0.60130  
##    WIInternet/Facebook          WIFrom Friends                WIOthers  
##                1.18500                 0.98890                 0.60877  
##                  BCYes                   CAYes               SBMNormal  
##               -0.81650                -0.92689                 0.18031  
##               SBMAgree  
##                2.13985  
## 
## Degrees of Freedom: 723 Total (i.e. Null);  705 Residual
## Null Deviance:       841.6 
## Residual Deviance: 656.2     AIC: 694.2
summary(mg2)
## 
## Call:
## glm(formula = MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial, 
##     data = BS)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0246  -0.7121  -0.4431   0.5256   2.7444  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             0.04216    0.48436   0.087 0.930642    
## OCOfficers/Workers     -0.31020    0.27507  -1.128 0.259441    
## OCHousewife/Unemployed -0.29222    0.52541  -0.556 0.578092    
## OCFree labor           -0.66363    0.28470  -2.331 0.019753 *  
## OCOthers                0.91856    0.39809   2.307 0.021032 *  
## MCYes                  -0.85258    0.31772  -2.683 0.007286 ** 
## TPPicking Children     -1.05006    0.38954  -2.696 0.007026 ** 
## TPEntertainment/Food   -0.80345    0.26544  -3.027 0.002471 ** 
## TPOthers               -1.11305    0.81779  -1.361 0.173497    
## SPYes                  -0.02669    0.41726  -0.064 0.948998    
## SPDon't know           -1.10368    0.55322  -1.995 0.046040 *  
## WIAt Bus Stops          0.60130    0.28481   2.111 0.034751 *  
## WIInternet/Facebook     1.18500    0.33140   3.576 0.000349 ***
## WIFrom Friends          0.98890    0.36275   2.726 0.006408 ** 
## WIOthers                0.60877    0.42965   1.417 0.156513    
## BCYes                  -0.81650    0.20905  -3.906 9.39e-05 ***
## CAYes                  -0.92689    0.21197  -4.373 1.23e-05 ***
## SBMNormal               0.18031    0.22877   0.788 0.430593    
## SBMAgree                2.13985    0.27266   7.848 4.22e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 656.17  on 705  degrees of freedom
## AIC: 694.17
## 
## Number of Fisher Scoring iterations: 5
## Calculate OR (odd ratio)
exp(mg2$coefficients)
##            (Intercept)     OCOfficers/Workers OCHousewife/Unemployed 
##              1.0430586              0.7332997              0.7466038 
##           OCFree labor               OCOthers                  MCYes 
##              0.5149797              2.5056898              0.4263137 
##     TPPicking Children   TPEntertainment/Food               TPOthers 
##              0.3499178              0.4477802              0.3285540 
##                  SPYes           SPDon't know         WIAt Bus Stops 
##              0.9736630              0.3316474              1.8244835 
##    WIInternet/Facebook         WIFrom Friends               WIOthers 
##              3.2706852              2.6882733              1.8381742 
##                  BCYes                  CAYes              SBMNormal 
##              0.4419760              0.3957832              1.1975866 
##               SBMAgree 
##              8.4981677
exp(coef(mg2))
##            (Intercept)     OCOfficers/Workers OCHousewife/Unemployed 
##              1.0430586              0.7332997              0.7466038 
##           OCFree labor               OCOthers                  MCYes 
##              0.5149797              2.5056898              0.4263137 
##     TPPicking Children   TPEntertainment/Food               TPOthers 
##              0.3499178              0.4477802              0.3285540 
##                  SPYes           SPDon't know         WIAt Bus Stops 
##              0.9736630              0.3316474              1.8244835 
##    WIInternet/Facebook         WIFrom Friends               WIOthers 
##              3.2706852              2.6882733              1.8381742 
##                  BCYes                  CAYes              SBMNormal 
##              0.4419760              0.3957832              1.1975866 
##               SBMAgree 
##              8.4981677
library(epiDisplay)
## Loading required package: foreign
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: nnet
## 
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
## 
##     alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
## 
##     alpha
logistic.display(mg2)
## 
## Logistic regression predicting MSI : Yes vs No 
##  
##                          crude OR(95%CI)   adj. OR(95%CI)    P(Wald's test)
## OC: ref.=Pupils/Students                                                   
##    Officers/Workers      0.44 (0.28,0.67)  0.73 (0.43,1.26)  0.259         
##    Housewife/Unemployed  0.31 (0.13,0.78)  0.75 (0.27,2.09)  0.578         
##    Free labor            0.46 (0.3,0.73)   0.51 (0.29,0.9)   0.02          
##    Others                1.15 (0.62,2.13)  2.51 (1.15,5.47)  0.021         
##                                                                            
## MC: Yes vs No            0.44 (0.28,0.7)   0.43 (0.23,0.79)  0.007         
##                                                                            
## TP: ref.=Work/Study                                                        
##    Picking Children      0.42 (0.22,0.78)  0.35 (0.16,0.75)  0.007         
##    Entertainment/Food    0.68 (0.44,1.06)  0.45 (0.27,0.75)  0.002         
##    Others                0.53 (0.15,1.9)   0.33 (0.07,1.63)  0.173         
##                                                                            
## SP: ref.=No                                                                
##    Yes                   1.88 (0.98,3.6)   0.97 (0.43,2.21)  0.949         
##    Don't know            0.41 (0.15,1.11)  0.33 (0.11,0.98)  0.046         
##                                                                            
## WI: ref.=No                                                                
##    At Bus Stops          2.54 (1.66,3.86)  1.82 (1.04,3.19)  0.035         
##    Internet/Facebook     3.4 (2.02,5.72)   3.27 (1.71,6.26)  < 0.001       
##    From Friends          4.4 (2.45,7.89)   2.69 (1.32,5.47)  0.006         
##    Others                2.29 (1.16,4.5)   1.84 (0.79,4.27)  0.157         
##                                                                            
## BC: Yes vs No            0.52 (0.37,0.73)  0.44 (0.29,0.67)  < 0.001       
##                                                                            
## CA: Yes vs No            0.63 (0.45,0.88)  0.4 (0.26,0.6)    < 0.001       
##                                                                            
## SBM: ref.=Don't agree                                                      
##    Normal                1.19 (0.79,1.79)  1.2 (0.76,1.88)   0.431         
##    Agree                 6.34 (4.07,9.88)  8.5 (4.98,14.5)   < 0.001       
##                                                                            
##                          P(LR-test)
## OC: ref.=Pupils/Students 0.002     
##    Officers/Workers                
##    Housewife/Unemployed            
##    Free labor                      
##    Others                          
##                                    
## MC: Yes vs No            0.007     
##                                    
## TP: ref.=Work/Study      < 0.001   
##    Picking Children                
##    Entertainment/Food              
##    Others                          
##                                    
## SP: ref.=No              0.034     
##    Yes                             
##    Don't know                      
##                                    
## WI: ref.=No              0.004     
##    At Bus Stops                    
##    Internet/Facebook               
##    From Friends                    
##    Others                          
##                                    
## BC: Yes vs No            < 0.001   
##                                    
## CA: Yes vs No            < 0.001   
##                                    
## SBM: ref.=Don't agree    < 0.001   
##    Normal                          
##    Agree                           
##                                    
## Log-likelihood = -328.0847
## No. of observations = 724
## AIC value = 694.1694
## Thay vì sử dụng hàm glm dùng cách 2 với hàm lrm trong gói rms: mg2-8 variables, mg1-19 variables, mg-23 variables
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
## 
##     dotplot
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:table1':
## 
##     label, label<-, units
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## Registered S3 method overwritten by 'rms':
##   method       from      
##   print.lrtest epiDisplay
## 
## Attaching package: 'rms'
## The following object is masked from 'package:epiDisplay':
## 
##     lrtest
## The following objects are masked from 'package:car':
## 
##     Predict, vif
mg2.c2 = lrm(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, data = BS)
mg2.c2
## Logistic Regression Model
##  
##  lrm(formula = MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, data = BS)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           724    LR chi2     185.43    R2       0.329    C       0.807    
##   No           530    d.f.            18    g        1.549    Dxy     0.614    
##   Yes          194    Pr(> chi2) <0.0001    gr       4.705    gamma   0.615    
##  max |deriv| 3e-09                          gp       0.239    tau-a   0.241    
##                                             Brier    0.146                     
##  
##                          Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept                0.0422 0.4844  0.09  0.9306  
##  OC=Officers/Workers     -0.3102 0.2751 -1.13  0.2594  
##  OC=Housewife/Unemployed -0.2922 0.5254 -0.56  0.5781  
##  OC=Free labor           -0.6636 0.2847 -2.33  0.0198  
##  OC=Others                0.9186 0.3981  2.31  0.0210  
##  MC=Yes                  -0.8526 0.3177 -2.68  0.0073  
##  TP=Picking Children     -1.0501 0.3895 -2.70  0.0070  
##  TP=Entertainment/Food   -0.8035 0.2654 -3.03  0.0025  
##  TP=Others               -1.1131 0.8178 -1.36  0.1735  
##  SP=Yes                  -0.0267 0.4173 -0.06  0.9490  
##  SP=Don't know           -1.1037 0.5532 -2.00  0.0460  
##  WI=At Bus Stops          0.6013 0.2848  2.11  0.0348  
##  WI=Internet/Facebook     1.1850 0.3314  3.58  0.0003  
##  WI=From Friends          0.9889 0.3627  2.73  0.0064  
##  WI=Others                0.6088 0.4297  1.42  0.1565  
##  BC=Yes                  -0.8165 0.2090 -3.91  <0.0001 
##  CA=Yes                  -0.9269 0.2120 -4.37  <0.0001 
##  SBM=Normal               0.1803 0.2288  0.79  0.4306  
##  SBM=Agree                2.1399 0.2727  7.85  <0.0001 
## 
mg1.c2 = lrm(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, data = BS)
mg1.c2
## Logistic Regression Model
##  
##  lrm(formula = MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + 
##      BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, data = BS)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           724    LR chi2     223.21    R2       0.386    C       0.838    
##   No           530    d.f.            42    g        1.783    Dxy     0.676    
##   Yes          194    Pr(> chi2) <0.0001    gr       5.949    gamma   0.676    
##  max |deriv| 6e-07                          gp       0.261    tau-a   0.266    
##                                             Brier    0.136                     
##  
##                          Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept               -0.4609 1.0141 -0.45  0.6495  
##  AG=25-60                 0.5099 0.4083  1.25  0.2117  
##  AG=>60                   1.6002 0.8095  1.98  0.0481  
##  OC=Officers/Workers     -0.6016 0.4441 -1.35  0.1755  
##  OC=Housewife/Unemployed -0.6725 0.6706 -1.00  0.3159  
##  OC=Free labor           -1.0678 0.4356 -2.45  0.0142  
##  OC=Others                0.1452 0.5678  0.26  0.7981  
##  MC=Yes                  -0.8036 0.4241 -1.89  0.0581  
##  MO=Yes                  -0.1423 0.3816 -0.37  0.7092  
##  NM=1                     0.7977 0.8286  0.96  0.3357  
##  NM=2                     0.5844 0.7997  0.73  0.4649  
##  NM=>=3                   1.1535 0.8008  1.44  0.1498  
##  TM=Motorbike             0.0066 0.4391  0.01  0.9881  
##  TM=Car                   0.8102 0.5633  1.44  0.1503  
##  TM=Hichhiking/Taxi       0.5046 0.4978  1.01  0.3107  
##  TP=Picking Children     -1.4523 0.4314 -3.37  0.0008  
##  TP=Entertainment/Food   -0.8559 0.2841 -3.01  0.0026  
##  TP=Others               -1.3578 0.9183 -1.48  0.1393  
##  SP=Yes                  -0.0403 0.4460 -0.09  0.9280  
##  SP=Don't know           -1.1199 0.5983 -1.87  0.0612  
##  WI=At Bus Stops          0.6227 0.3041  2.05  0.0406  
##  WI=Internet/Facebook     1.2949 0.3580  3.62  0.0003  
##  WI=From Friends          0.9059 0.3942  2.30  0.0216  
##  WI=Others                0.6702 0.4635  1.45  0.1481  
##  BC=Yes                  -0.8890 0.2316 -3.84  0.0001  
##  CA=Yes                  -0.9157 0.2362 -3.88  0.0001  
##  BSC=Normal              -0.2274 0.4041 -0.56  0.5736  
##  BSC=Agree               -0.1941 0.3623 -0.54  0.5922  
##  BSA=Normal              -0.5962 0.4435 -1.34  0.1788  
##  BSA=Agree                0.0965 0.4089  0.24  0.8134  
##  BRC=Normal              -0.3440 0.4311 -0.80  0.4250  
##  BRC=Agree               -0.2640 0.3412 -0.77  0.4392  
##  MR=Comfortable           0.0527 0.3932  0.13  0.8935  
##  MR=Low price             0.9698 0.5564  1.74  0.0813  
##  MR=Accessibility         0.8040 0.4368  1.84  0.0657  
##  MR=Reliability          -0.0978 0.5206 -0.19  0.8509  
##  MR=others                0.2320 0.6564  0.35  0.7238  
##  BUB=Yes                 -0.0509 0.2359 -0.22  0.8291  
##  SBM=Normal               0.2521 0.2868  0.88  0.3793  
##  SBM=Agree                1.9634 0.3299  5.95  <0.0001 
##  SBS=Normal               0.0080 0.2739  0.03  0.9766  
##  SBS=Agree                0.7757 0.3844  2.02  0.0436  
##  DI                      -0.0324 0.0227 -1.43  0.1535  
## 
mg.c2 = lrm(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI, data = BS)
mg.c2
## Logistic Regression Model
##  
##  lrm(formula = MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + 
##      TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + 
##      SBM + SBS + TI, data = BS)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           724    LR chi2     229.27    R2       0.395    C       0.841    
##   No           530    d.f.            46    g        1.803    Dxy     0.682    
##   Yes          194    Pr(> chi2) <0.0001    gr       6.069    gamma   0.682    
##  max |deriv| 1e-06                          gp       0.263    tau-a   0.268    
##                                             Brier    0.135                     
##  
##                          Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept               -0.5530 1.0410 -0.53  0.5953  
##  GD=Male                  0.3038 0.2333  1.30  0.1929  
##  AG=25-60                 0.4082 0.4177  0.98  0.3285  
##  AG=>60                   1.4890 0.8194  1.82  0.0692  
##  OC=Officers/Workers     -0.6459 0.4433 -1.46  0.1451  
##  OC=Housewife/Unemployed -0.4423 0.6863 -0.64  0.5193  
##  OC=Free labor           -0.9863 0.4365 -2.26  0.0238  
##  OC=Others               -0.0154 0.5777 -0.03  0.9787  
##  NC=Yes                   0.2117 0.2323  0.91  0.3622  
##  MC=Yes                  -0.8439 0.4270 -1.98  0.0481  
##  CC=Yes                   0.1336 0.3590  0.37  0.7097  
##  MO=Yes                  -0.2218 0.3776 -0.59  0.5569  
##  NM=1                     0.7581 0.8286  0.91  0.3603  
##  NM=2                     0.4766 0.8008  0.60  0.5518  
##  NM=>=3                   1.0529 0.8016  1.31  0.1890  
##  TM=Motorbike            -0.0973 0.4356 -0.22  0.8232  
##  TM=Car                   0.2328 0.5875  0.40  0.6919  
##  TM=Hichhiking/Taxi       0.3671 0.5009  0.73  0.4635  
##  TP=Picking Children     -1.2738 0.4312 -2.95  0.0031  
##  TP=Entertainment/Food   -0.8708 0.2909 -2.99  0.0028  
##  TP=Others               -1.2720 0.9125 -1.39  0.1633  
##  TS=Yes                  -0.2758 0.2137 -1.29  0.1968  
##  SP=Yes                  -0.1912 0.4559 -0.42  0.6750  
##  SP=Don't know           -1.1700 0.5989 -1.95  0.0507  
##  WI=At Bus Stops          0.7294 0.3083  2.37  0.0180  
##  WI=Internet/Facebook     1.4148 0.3662  3.86  0.0001  
##  WI=From Friends          0.9877 0.4007  2.47  0.0137  
##  WI=Others                0.8260 0.4597  1.80  0.0724  
##  BC=Yes                  -0.7979 0.2327 -3.43  0.0006  
##  CA=Yes                  -0.7818 0.2353 -3.32  0.0009  
##  BSC=Normal              -0.2792 0.4066 -0.69  0.4923  
##  BSC=Agree               -0.2173 0.3684 -0.59  0.5554  
##  BSA=Normal              -0.5720 0.4459 -1.28  0.1996  
##  BSA=Agree                0.0956 0.4132  0.23  0.8171  
##  BRC=Normal              -0.4355 0.4317 -1.01  0.3130  
##  BRC=Agree               -0.4091 0.3428 -1.19  0.2328  
##  MR=Comfortable          -0.0269 0.3959 -0.07  0.9459  
##  MR=Low price             0.8461 0.5582  1.52  0.1296  
##  MR=Accessibility         0.7670 0.4389  1.75  0.0806  
##  MR=Reliability          -0.1995 0.5264 -0.38  0.7047  
##  MR=others                0.1972 0.6711  0.29  0.7689  
##  BUB=Yes                 -0.0449 0.2361 -0.19  0.8491  
##  SBM=Normal               0.3480 0.2902  1.20  0.2305  
##  SBM=Agree                1.9672 0.3357  5.86  <0.0001 
##  SBS=Normal              -0.0234 0.2788 -0.08  0.9331  
##  SBS=Agree                0.7616 0.3877  1.96  0.0495  
##  TI                       0.0124 0.0074  1.67  0.0954  
## 

5. Test models

# 4.1. Deviance - càng nhỏ càng tốt (sai khác giữa giá trị thực tế và giá trị ước lượng theo mô hình): mg2 tốt nhất
deviance(mg)
## [1] 612.3312
deviance(mg1)
## [1] 618.3861
deviance(mg2)
## [1] 656.1694
# 4.2. LRT (Likelihood Ratio Test) - Chênh lệch deviance giữa mô hình đơn giản và mô hình phức tạp --> có ý nghĩa thống kê - đều có ý nghĩa tk
library(rms)
lrtest(mg2, mg)
## 
## Model 1: MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM
## Model 2: MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + 
##     SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + 
##     TI
## 
##  L.R. Chisq        d.f.           P 
## 43.83826440 28.00000000  0.02882647
lrtest(mg,mg2)
## 
## Model 1: MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + 
##     SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + 
##     TI
## Model 2: MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM
## 
##  L.R. Chisq        d.f.           P 
## 43.83826440 28.00000000  0.02882647
lrtest(mg2, mg1)
## 
## Model 1: MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM
## Model 2: MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + 
##     BSC + BSA + BRC + MR + BUB + SBM + SBS + DI
## 
## L.R. Chisq       d.f.          P 
##  37.783318  24.000000   0.036485
# 4.3. AIC (Aikake Information Criterion) - càng nhỏ càng tốt - mg2 tốt nhất
AIC(mg)
## [1] 706.3312
AIC(mg1)
## [1] 704.3861
AIC(mg2)
## [1] 694.1694
## Calculate MacFadden's R2 voi packages "pscl" - mg tốt nhất
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(mg2)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -328.0847125 -420.7996718  185.4299185    0.2203304    0.2259499    0.3287621
pR2(mg1)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -309.1930534 -420.7996718  223.2132367    0.2652251    0.2653092    0.3860308
pR2(mg)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -306.1655803 -420.7996718  229.2681829    0.2724196    0.2714280    0.3949337
# 4.4. Calcule the number of observation that model predict correctly = Accuracy (Độ chính xác của mô hình)
## Model mg2
contrasts(BS$MSI) # Qs là không chuyển đổi nhưng phân loại thành chuyển đổi thì gán giá trị là 0 và ngược lại
##     Yes
## No    0
## Yes   1
dim(BS)
## [1] 724  46
glm.probs <- predict(mg2, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg2 (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng). Có thể dùng hàm probmg2 <- fitted(mg2), head(probmg2), tail(probmg2)
glm.probs[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên 
##          1          2          3          4          5          6          7 
## 0.24692122 0.04561607 0.38494464 0.25878074 0.04561607 0.05414121 0.04561607 
##          8          9         10 
## 0.27849642 0.24084863 0.35468576
glm.pred <- rep("No", 724)
glm.pred[glm.probs >= 0.5] = "Yes"
table(glm.pred, BS$MSI)
##         
## glm.pred  No Yes
##      No  495 110
##      Yes  35  84
mean(glm.pred == BS$MSI) # Tỷ lệ dự báo chính xác của mô hình 80% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-80 = 20%
## [1] 0.7997238
round(prop.table(table(BS$MSI))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không chuyển đổi xe buýt 73.2% < 80% --> KL: mức độ chính xác dự báo của mô hình 2 > lớp có tỷ lệ cao nhất ở biến phụ thuộc
## 
##   No  Yes 
## 73.2 26.8
## Model mg
contrasts(BS$MSI)
##     Yes
## No    0
## Yes   1
glm.probs.mg1 <- predict(mg1, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng)
glm.probs.mg1[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên 
##          1          2          3          4          5          6          7 
## 0.47252684 0.03151333 0.58623739 0.23954668 0.02832060 0.04888496 0.04486295 
##          8          9         10 
## 0.19156063 0.13865798 0.40021393
glm.pred.mg1 <- rep("No", 724)
glm.pred.mg1[glm.probs.mg1 >= 0.5] = "Yes"
table(glm.pred.mg1, BS$MSI)
##             
## glm.pred.mg1  No Yes
##          No  493 101
##          Yes  37  93
mean(glm.pred.mg1 == BS$MSI) # Tỷ lệ dự báo chính xác của mô hình 81% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-81 = 19%
## [1] 0.8093923
round(prop.table(table(BS$MSI))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không chuyển đổi sang xe buýt 73.2% < 81% --> KL: mức độ chính xác dự báo của mô hình > lớp có tỷ lệ cao nhất ở biến phụ thuộc
## 
##   No  Yes 
## 73.2 26.8
# 4.5. Calculate Sensitivity - Độ nhạy
493/(493+101) # Đạt 82% --> Rất lý tưởng: Mô hình xác định tới 82% người không chuyển đổi sang xe buýt
## [1] 0.8299663
# 4.6. Calculate Specificity - Độ đặc hiệu
93/(93+37) # Đạt 71% --> Mô hình có khả năng dự báo 71% người chuyển đổi sang xe buýt
## [1] 0.7153846
# 4.7. Hosmer-Lemeshow = Goodness of fit test - Kiểm định sự phù hợp của mô hình với library(MKmisc) và hàm HLgof.test(fit = fitted(mg2), obs = BSs$MSI) # trong package "MKmisc" (Version 3.6.3) : Nếu p-value < 0.05 --> MH không phù hợp --> Ít sử dụng
# 4.8. Kiểm tra tính phổ quát của mô hình
   ## Chạy lại hai mô hình với hàm train thuộc package "caret"
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
## The following object is masked from 'package:purrr':
## 
##     lift
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10, classProbs = TRUE, summaryFunction = defaultSummary) # sử dụng kiểm tra chéo 10 lớp (number=10), lặp lại 10 lần (repeats=10) với lựa chọn method="repeatedcv, lựa chọn classProbs = TRUE áp dụng cho mô hình phân loại và lựa chọn summaryFunction = defaultSummary để chỉ thị R tính toán chỉ 2 thông tin về phẩm chất của mô hình là Accuracy và Kappa.
mg1train = train(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, data = BS, method = "glm", family = "binomial", trControl = ctrl)
summary(mg1train)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6346  -0.6513  -0.3886   0.4517   3.0879  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -0.460888   1.014052  -0.455 0.649468    
## `AG25-60`                 0.509909   0.408317   1.249 0.211735    
## `AG>60`                   1.600156   0.809458   1.977 0.048062 *  
## `OCOfficers/Workers`     -0.601602   0.444097  -1.355 0.175525    
## `OCHousewife/Unemployed` -0.672492   0.670571  -1.003 0.315927    
## `OCFree labor`           -1.067818   0.435640  -2.451 0.014240 *  
## OCOthers                  0.145226   0.567793   0.256 0.798126    
## MCYes                    -0.803585   0.424113  -1.895 0.058127 .  
## MOYes                    -0.142332   0.381606  -0.373 0.709162    
## NM1                       0.797740   0.828571   0.963 0.335653    
## NM2                       0.584359   0.799689   0.731 0.464942    
## `NM>=3`                   1.153470   0.800814   1.440 0.149762    
## TMMotorbike               0.006570   0.439137   0.015 0.988063    
## TMCar                     0.810212   0.563283   1.438 0.150328    
## `TMHichhiking/Taxi`       0.504619   0.497789   1.014 0.310716    
## `TPPicking Children`     -1.452278   0.431442  -3.366 0.000762 ***
## `TPEntertainment/Food`   -0.855917   0.284101  -3.013 0.002589 ** 
## TPOthers                 -1.357762   0.918309  -1.479 0.139262    
## SPYes                    -0.040310   0.446021  -0.090 0.927988    
## `SPDon't know`           -1.119875   0.598265  -1.872 0.061224 .  
## `WIAt Bus Stops`          0.622671   0.304066   2.048 0.040578 *  
## `WIInternet/Facebook`     1.294873   0.357963   3.617 0.000298 ***
## `WIFrom Friends`          0.905878   0.394216   2.298 0.021566 *  
## WIOthers                  0.670218   0.463461   1.446 0.148145    
## BCYes                    -0.888957   0.231589  -3.839 0.000124 ***
## CAYes                    -0.915710   0.236190  -3.877 0.000106 ***
## BSCNormal                -0.227395   0.404108  -0.563 0.573633    
## BSCAgree                 -0.194065   0.362292  -0.536 0.592195    
## BSANormal                -0.596242   0.443520  -1.344 0.178838    
## BSAAgree                  0.096531   0.408890   0.236 0.813369    
## BRCNormal                -0.343956   0.431132  -0.798 0.424988    
## BRCAgree                 -0.263950   0.341203  -0.774 0.439175    
## MRComfortable             0.052656   0.393148   0.134 0.893455    
## `MRLow price`             0.969812   0.556347   1.743 0.081302 .  
## MRAccessibility           0.804047   0.436812   1.841 0.065663 .  
## MRReliability            -0.097818   0.520557  -0.188 0.850948    
## MRothers                  0.231989   0.656438   0.353 0.723784    
## BUBYes                   -0.050910   0.235854  -0.216 0.829101    
## SBMNormal                 0.252138   0.286773   0.879 0.379279    
## SBMAgree                  1.963412   0.329928   5.951 2.66e-09 ***
## SBSNormal                 0.008038   0.273902   0.029 0.976590    
## SBSAgree                  0.775681   0.384369   2.018 0.043585 *  
## DI                       -0.032412   0.022708  -1.427 0.153481    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 618.39  on 681  degrees of freedom
## AIC: 704.39
## 
## Number of Fisher Scoring iterations: 5
mg2train = train(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, data = BS, method = "glm", family = "binomial", trControl = ctrl)
summary(mg2train)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0246  -0.7121  -0.4431   0.5256   2.7444  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               0.04216    0.48436   0.087 0.930642    
## `OCOfficers/Workers`     -0.31020    0.27507  -1.128 0.259441    
## `OCHousewife/Unemployed` -0.29222    0.52541  -0.556 0.578092    
## `OCFree labor`           -0.66363    0.28470  -2.331 0.019753 *  
## OCOthers                  0.91856    0.39809   2.307 0.021032 *  
## MCYes                    -0.85258    0.31772  -2.683 0.007286 ** 
## `TPPicking Children`     -1.05006    0.38954  -2.696 0.007026 ** 
## `TPEntertainment/Food`   -0.80345    0.26544  -3.027 0.002471 ** 
## TPOthers                 -1.11305    0.81779  -1.361 0.173497    
## SPYes                    -0.02669    0.41726  -0.064 0.948998    
## `SPDon't know`           -1.10368    0.55322  -1.995 0.046040 *  
## `WIAt Bus Stops`          0.60130    0.28481   2.111 0.034751 *  
## `WIInternet/Facebook`     1.18500    0.33140   3.576 0.000349 ***
## `WIFrom Friends`          0.98890    0.36275   2.726 0.006408 ** 
## WIOthers                  0.60877    0.42965   1.417 0.156513    
## BCYes                    -0.81650    0.20905  -3.906 9.39e-05 ***
## CAYes                    -0.92689    0.21197  -4.373 1.23e-05 ***
## SBMNormal                 0.18031    0.22877   0.788 0.430593    
## SBMAgree                  2.13985    0.27266   7.848 4.22e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 656.17  on 705  degrees of freedom
## AIC: 694.17
## 
## Number of Fisher Scoring iterations: 5
  ## Các thông tin về khả năng dự báo của mô hình bằng confusionMatrix()
predmg1 <- predict(mg1train, newdata = BS)
confusionMatrix(data = predmg1, BS$MSI)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  493 101
##        Yes  37  93
##                                           
##                Accuracy : 0.8094          
##                  95% CI : (0.7789, 0.8374)
##     No Information Rate : 0.732           
##     P-Value [Acc > NIR] : 7.289e-07       
##                                           
##                   Kappa : 0.4574          
##                                           
##  Mcnemar's Test P-Value : 8.189e-08       
##                                           
##             Sensitivity : 0.9302          
##             Specificity : 0.4794          
##          Pos Pred Value : 0.8300          
##          Neg Pred Value : 0.7154          
##              Prevalence : 0.7320          
##          Detection Rate : 0.6809          
##    Detection Prevalence : 0.8204          
##       Balanced Accuracy : 0.7048          
##                                           
##        'Positive' Class : No              
## 
predmg2 <- predict(mg2train, newdata = BS)
confusionMatrix(data = predmg2, BS$MSI)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  495 110
##        Yes  35  84
##                                           
##                Accuracy : 0.7997          
##                  95% CI : (0.7687, 0.8283)
##     No Information Rate : 0.732           
##     P-Value [Acc > NIR] : 1.412e-05       
##                                           
##                   Kappa : 0.4182          
##                                           
##  Mcnemar's Test P-Value : 7.978e-10       
##                                           
##             Sensitivity : 0.9340          
##             Specificity : 0.4330          
##          Pos Pred Value : 0.8182          
##          Neg Pred Value : 0.7059          
##              Prevalence : 0.7320          
##          Detection Rate : 0.6837          
##    Detection Prevalence : 0.8356          
##       Balanced Accuracy : 0.6835          
##                                           
##        'Positive' Class : No              
## 
  ## Lấy dataframe với thông tin Accuracy của mô hình
a = mg1train$resample
a = a[, -3] # Trừ cột resample
a$Mohinh = "Logitmg1" # Thêm biến mô hình
a
##      Accuracy      Kappa   Mohinh
## 1   0.8055556 0.44309392 Logitmg1
## 2   0.7777778 0.25097529 Logitmg1
## 3   0.7945205 0.42032822 Logitmg1
## 4   0.7222222 0.26002055 Logitmg1
## 5   0.7083333 0.14864865 Logitmg1
## 6   0.8194444 0.49240781 Logitmg1
## 7   0.8082192 0.46826223 Logitmg1
## 8   0.7534247 0.33903421 Logitmg1
## 9   0.8082192 0.42905028 Logitmg1
## 10  0.7500000 0.33401850 Logitmg1
## 11  0.7534247 0.26592179 Logitmg1
## 12  0.7916667 0.34146341 Logitmg1
## 13  0.7671233 0.34303864 Logitmg1
## 14  0.8194444 0.49240781 Logitmg1
## 15  0.8611111 0.60220994 Logitmg1
## 16  0.7500000 0.25602755 Logitmg1
## 17  0.7777778 0.38658147 Logitmg1
## 18  0.7671233 0.34303864 Logitmg1
## 19  0.7638889 0.38181818 Logitmg1
## 20  0.7397260 0.33540968 Logitmg1
## 21  0.7083333 0.18004338 Logitmg1
## 22  0.8194444 0.47297297 Logitmg1
## 23  0.7945205 0.43989770 Logitmg1
## 24  0.8611111 0.60220994 Logitmg1
## 25  0.7916667 0.34146341 Logitmg1
## 26  0.7777778 0.36353591 Logitmg1
## 27  0.7945205 0.43989770 Logitmg1
## 28  0.7916667 0.41431670 Logitmg1
## 29  0.7671233 0.36521739 Logitmg1
## 30  0.7534247 0.31633715 Logitmg1
## 31  0.7083333 0.07804878 Logitmg1
## 32  0.7671233 0.31925398 Logitmg1
## 33  0.7808219 0.34748603 Logitmg1
## 34  0.7500000 0.28397790 Logitmg1
## 35  0.7500000 0.35650447 Logitmg1
## 36  0.7945205 0.42032822 Logitmg1
## 37  0.7945205 0.39934174 Logitmg1
## 38  0.8750000 0.66108787 Logitmg1
## 39  0.8194444 0.51046025 Logitmg1
## 40  0.7222222 0.23322684 Logitmg1
## 41  0.7083333 0.14864865 Logitmg1
## 42  0.8472222 0.58577406 Logitmg1
## 43  0.7534247 0.29202586 Logitmg1
## 44  0.7260274 0.21336207 Logitmg1
## 45  0.8194444 0.47297297 Logitmg1
## 46  0.7083333 0.20920502 Logitmg1
## 47  0.7808219 0.34748603 Logitmg1
## 48  0.8082192 0.44935345 Logitmg1
## 49  0.7777778 0.40801644 Logitmg1
## 50  0.8472222 0.57049892 Logitmg1
## 51  0.7083333 0.11475410 Logitmg1
## 52  0.7397260 0.35578263 Logitmg1
## 53  0.7260274 0.21336207 Logitmg1
## 54  0.7500000 0.35650447 Logitmg1
## 55  0.7916667 0.41431670 Logitmg1
## 56  0.8472222 0.58577406 Logitmg1
## 57  0.7397260 0.31370609 Logitmg1
## 58  0.7638889 0.22137405 Logitmg1
## 59  0.8082192 0.44935345 Logitmg1
## 60  0.7500000 0.15734720 Logitmg1
## 61  0.7397260 0.21058623 Logitmg1
## 62  0.7638889 0.28337237 Logitmg1
## 63  0.7808219 0.41247485 Logitmg1
## 64  0.8194444 0.45199063 Logitmg1
## 65  0.8055556 0.48201439 Logitmg1
## 66  0.7777778 0.38658147 Logitmg1
## 67  0.7945205 0.45818902 Logitmg1
## 68  0.7500000 0.25602755 Logitmg1
## 69  0.7638889 0.35983264 Logitmg1
## 70  0.7671233 0.34303864 Logitmg1
## 71  0.7916667 0.41431670 Logitmg1
## 72  0.7916667 0.45454545 Logitmg1
## 73  0.7671233 0.34303864 Logitmg1
## 74  0.8194444 0.45199063 Logitmg1
## 75  0.7945205 0.39934174 Logitmg1
## 76  0.7500000 0.28397790 Logitmg1
## 77  0.7397260 0.33540968 Logitmg1
## 78  0.7222222 0.23322684 Logitmg1
## 79  0.7671233 0.29368241 Logitmg1
## 80  0.7638889 0.35983264 Logitmg1
## 81  0.7534247 0.26592179 Logitmg1
## 82  0.7361111 0.28451883 Logitmg1
## 83  0.7260274 0.24037461 Logitmg1
## 84  0.7777778 0.40801644 Logitmg1
## 85  0.7945205 0.43989770 Logitmg1
## 86  0.8194444 0.49240781 Logitmg1
## 87  0.8219178 0.51457801 Logitmg1
## 88  0.7916667 0.45454545 Logitmg1
## 89  0.8055556 0.44309392 Logitmg1
## 90  0.8333333 0.46201743 Logitmg1
## 91  0.7397260 0.29053708 Logitmg1
## 92  0.7500000 0.33401850 Logitmg1
## 93  0.7808219 0.39229969 Logitmg1
## 94  0.8055556 0.37235367 Logitmg1
## 95  0.7916667 0.39189189 Logitmg1
## 96  0.7500000 0.22580645 Logitmg1
## 97  0.8055556 0.46325879 Logitmg1
## 98  0.8082192 0.46826223 Logitmg1
## 99  0.7916667 0.43514644 Logitmg1
## 100 0.7808219 0.32250580 Logitmg1
b = mg2train$resample
b = b[, -3]
b$Mohinh = "Logitmg2"
b
##      Accuracy     Kappa   Mohinh
## 1   0.7808219 0.2955368 Logitmg2
## 2   0.8055556 0.4632588 Logitmg2
## 3   0.7916667 0.3676815 Logitmg2
## 4   0.7222222 0.2044199 Logitmg2
## 5   0.8493151 0.6026719 Logitmg2
## 6   0.8194444 0.4292683 Logitmg2
## 7   0.7260274 0.1843575 Logitmg2
## 8   0.7777778 0.3635359 Logitmg2
## 9   0.8219178 0.4794295 Logitmg2
## 10  0.7500000 0.2258065 Logitmg2
## 11  0.8055556 0.4213548 Logitmg2
## 12  0.7945205 0.3993417 Logitmg2
## 13  0.7945205 0.3993417 Logitmg2
## 14  0.8194444 0.4729730 Logitmg2
## 15  0.7945205 0.4203282 Logitmg2
## 16  0.7777778 0.2826899 Logitmg2
## 17  0.8333333 0.5560123 Logitmg2
## 18  0.7397260 0.1797753 Logitmg2
## 19  0.8055556 0.4213548 Logitmg2
## 20  0.7222222 0.2332268 Logitmg2
## 21  0.7397260 0.2905371 Logitmg2
## 22  0.7397260 0.2657491 Logitmg2
## 23  0.7808219 0.3922997 Logitmg2
## 24  0.7808219 0.3225058 Logitmg2
## 25  0.7916667 0.3414634 Logitmg2
## 26  0.7916667 0.3414634 Logitmg2
## 27  0.7916667 0.3918919 Logitmg2
## 28  0.7916667 0.4143167 Logitmg2
## 29  0.8194444 0.4519906 Logitmg2
## 30  0.7500000 0.2258065 Logitmg2
## 31  0.7916667 0.3414634 Logitmg2
## 32  0.6986301 0.1644121 Logitmg2
## 33  0.8333333 0.5040184 Logitmg2
## 34  0.7671233 0.2936824 Logitmg2
## 35  0.7638889 0.2833724 Logitmg2
## 36  0.8219178 0.5145780 Logitmg2
## 37  0.7397260 0.1797753 Logitmg2
## 38  0.8333333 0.5399361 Logitmg2
## 39  0.7777778 0.3635359 Logitmg2
## 40  0.8194444 0.4924078 Logitmg2
## 41  0.7638889 0.2536585 Logitmg2
## 42  0.7777778 0.3386912 Logitmg2
## 43  0.7083333 0.1147541 Logitmg2
## 44  0.8082192 0.4290503 Logitmg2
## 45  0.7916667 0.3414634 Logitmg2
## 46  0.7222222 0.1733639 Logitmg2
## 47  0.7777778 0.4080164 Logitmg2
## 48  0.8082192 0.4493534 Logitmg2
## 49  0.7945205 0.4581890 Logitmg2
## 50  0.7808219 0.3225058 Logitmg2
## 51  0.8356164 0.5106145 Logitmg2
## 52  0.7500000 0.2258065 Logitmg2
## 53  0.7916667 0.4143167 Logitmg2
## 54  0.7671233 0.3859476 Logitmg2
## 55  0.7777778 0.4080164 Logitmg2
## 56  0.8356164 0.5442248 Logitmg2
## 57  0.7916667 0.2819149 Logitmg2
## 58  0.7916667 0.3676815 Logitmg2
## 59  0.7397260 0.2657491 Logitmg2
## 60  0.7638889 0.2536585 Logitmg2
## 61  0.7777778 0.3386912 Logitmg2
## 62  0.7500000 0.2560276 Logitmg2
## 63  0.8055556 0.4213548 Logitmg2
## 64  0.8055556 0.4213548 Logitmg2
## 65  0.7534247 0.2659218 Logitmg2
## 66  0.7916667 0.3676815 Logitmg2
## 67  0.7808219 0.3922997 Logitmg2
## 68  0.7945205 0.3767786 Logitmg2
## 69  0.7916667 0.3676815 Logitmg2
## 70  0.7397260 0.2657491 Logitmg2
## 71  0.7638889 0.3818182 Logitmg2
## 72  0.7945205 0.3524542 Logitmg2
## 73  0.7500000 0.2560276 Logitmg2
## 74  0.7945205 0.4581890 Logitmg2
## 75  0.7777778 0.3118280 Logitmg2
## 76  0.8333333 0.5040184 Logitmg2
## 77  0.7534247 0.2920259 Logitmg2
## 78  0.8493151 0.5749074 Logitmg2
## 79  0.7638889 0.2536585 Logitmg2
## 80  0.8194444 0.4292683 Logitmg2
## 81  0.7945205 0.3524542 Logitmg2
## 82  0.7671233 0.2936824 Logitmg2
## 83  0.7638889 0.3108108 Logitmg2
## 84  0.7500000 0.2560276 Logitmg2
## 85  0.8055556 0.4430939 Logitmg2
## 86  0.8082192 0.4859155 Logitmg2
## 87  0.7777778 0.3386912 Logitmg2
## 88  0.8055556 0.4430939 Logitmg2
## 89  0.7500000 0.2258065 Logitmg2
## 90  0.7808219 0.3474860 Logitmg2
## 91  0.8055556 0.4632588 Logitmg2
## 92  0.7916667 0.3918919 Logitmg2
## 93  0.7808219 0.3474860 Logitmg2
## 94  0.7123288 0.1590784 Logitmg2
## 95  0.8472222 0.5857741 Logitmg2
## 96  0.7534247 0.2920259 Logitmg2
## 97  0.8055556 0.3723537 Logitmg2
## 98  0.7638889 0.2833724 Logitmg2
## 99  0.8082192 0.4682622 Logitmg2
## 100 0.7777778 0.2509753 Logitmg2
resamplemod = rbind(a, b) # Hợp nhất 2 data frame a và b
resamplemod
##      Accuracy      Kappa   Mohinh
## 1   0.8055556 0.44309392 Logitmg1
## 2   0.7777778 0.25097529 Logitmg1
## 3   0.7945205 0.42032822 Logitmg1
## 4   0.7222222 0.26002055 Logitmg1
## 5   0.7083333 0.14864865 Logitmg1
## 6   0.8194444 0.49240781 Logitmg1
## 7   0.8082192 0.46826223 Logitmg1
## 8   0.7534247 0.33903421 Logitmg1
## 9   0.8082192 0.42905028 Logitmg1
## 10  0.7500000 0.33401850 Logitmg1
## 11  0.7534247 0.26592179 Logitmg1
## 12  0.7916667 0.34146341 Logitmg1
## 13  0.7671233 0.34303864 Logitmg1
## 14  0.8194444 0.49240781 Logitmg1
## 15  0.8611111 0.60220994 Logitmg1
## 16  0.7500000 0.25602755 Logitmg1
## 17  0.7777778 0.38658147 Logitmg1
## 18  0.7671233 0.34303864 Logitmg1
## 19  0.7638889 0.38181818 Logitmg1
## 20  0.7397260 0.33540968 Logitmg1
## 21  0.7083333 0.18004338 Logitmg1
## 22  0.8194444 0.47297297 Logitmg1
## 23  0.7945205 0.43989770 Logitmg1
## 24  0.8611111 0.60220994 Logitmg1
## 25  0.7916667 0.34146341 Logitmg1
## 26  0.7777778 0.36353591 Logitmg1
## 27  0.7945205 0.43989770 Logitmg1
## 28  0.7916667 0.41431670 Logitmg1
## 29  0.7671233 0.36521739 Logitmg1
## 30  0.7534247 0.31633715 Logitmg1
## 31  0.7083333 0.07804878 Logitmg1
## 32  0.7671233 0.31925398 Logitmg1
## 33  0.7808219 0.34748603 Logitmg1
## 34  0.7500000 0.28397790 Logitmg1
## 35  0.7500000 0.35650447 Logitmg1
## 36  0.7945205 0.42032822 Logitmg1
## 37  0.7945205 0.39934174 Logitmg1
## 38  0.8750000 0.66108787 Logitmg1
## 39  0.8194444 0.51046025 Logitmg1
## 40  0.7222222 0.23322684 Logitmg1
## 41  0.7083333 0.14864865 Logitmg1
## 42  0.8472222 0.58577406 Logitmg1
## 43  0.7534247 0.29202586 Logitmg1
## 44  0.7260274 0.21336207 Logitmg1
## 45  0.8194444 0.47297297 Logitmg1
## 46  0.7083333 0.20920502 Logitmg1
## 47  0.7808219 0.34748603 Logitmg1
## 48  0.8082192 0.44935345 Logitmg1
## 49  0.7777778 0.40801644 Logitmg1
## 50  0.8472222 0.57049892 Logitmg1
## 51  0.7083333 0.11475410 Logitmg1
## 52  0.7397260 0.35578263 Logitmg1
## 53  0.7260274 0.21336207 Logitmg1
## 54  0.7500000 0.35650447 Logitmg1
## 55  0.7916667 0.41431670 Logitmg1
## 56  0.8472222 0.58577406 Logitmg1
## 57  0.7397260 0.31370609 Logitmg1
## 58  0.7638889 0.22137405 Logitmg1
## 59  0.8082192 0.44935345 Logitmg1
## 60  0.7500000 0.15734720 Logitmg1
## 61  0.7397260 0.21058623 Logitmg1
## 62  0.7638889 0.28337237 Logitmg1
## 63  0.7808219 0.41247485 Logitmg1
## 64  0.8194444 0.45199063 Logitmg1
## 65  0.8055556 0.48201439 Logitmg1
## 66  0.7777778 0.38658147 Logitmg1
## 67  0.7945205 0.45818902 Logitmg1
## 68  0.7500000 0.25602755 Logitmg1
## 69  0.7638889 0.35983264 Logitmg1
## 70  0.7671233 0.34303864 Logitmg1
## 71  0.7916667 0.41431670 Logitmg1
## 72  0.7916667 0.45454545 Logitmg1
## 73  0.7671233 0.34303864 Logitmg1
## 74  0.8194444 0.45199063 Logitmg1
## 75  0.7945205 0.39934174 Logitmg1
## 76  0.7500000 0.28397790 Logitmg1
## 77  0.7397260 0.33540968 Logitmg1
## 78  0.7222222 0.23322684 Logitmg1
## 79  0.7671233 0.29368241 Logitmg1
## 80  0.7638889 0.35983264 Logitmg1
## 81  0.7534247 0.26592179 Logitmg1
## 82  0.7361111 0.28451883 Logitmg1
## 83  0.7260274 0.24037461 Logitmg1
## 84  0.7777778 0.40801644 Logitmg1
## 85  0.7945205 0.43989770 Logitmg1
## 86  0.8194444 0.49240781 Logitmg1
## 87  0.8219178 0.51457801 Logitmg1
## 88  0.7916667 0.45454545 Logitmg1
## 89  0.8055556 0.44309392 Logitmg1
## 90  0.8333333 0.46201743 Logitmg1
## 91  0.7397260 0.29053708 Logitmg1
## 92  0.7500000 0.33401850 Logitmg1
## 93  0.7808219 0.39229969 Logitmg1
## 94  0.8055556 0.37235367 Logitmg1
## 95  0.7916667 0.39189189 Logitmg1
## 96  0.7500000 0.22580645 Logitmg1
## 97  0.8055556 0.46325879 Logitmg1
## 98  0.8082192 0.46826223 Logitmg1
## 99  0.7916667 0.43514644 Logitmg1
## 100 0.7808219 0.32250580 Logitmg1
## 101 0.7808219 0.29553679 Logitmg2
## 102 0.8055556 0.46325879 Logitmg2
## 103 0.7916667 0.36768150 Logitmg2
## 104 0.7222222 0.20441989 Logitmg2
## 105 0.8493151 0.60267194 Logitmg2
## 106 0.8194444 0.42926829 Logitmg2
## 107 0.7260274 0.18435754 Logitmg2
## 108 0.7777778 0.36353591 Logitmg2
## 109 0.8219178 0.47942951 Logitmg2
## 110 0.7500000 0.22580645 Logitmg2
## 111 0.8055556 0.42135476 Logitmg2
## 112 0.7945205 0.39934174 Logitmg2
## 113 0.7945205 0.39934174 Logitmg2
## 114 0.8194444 0.47297297 Logitmg2
## 115 0.7945205 0.42032822 Logitmg2
## 116 0.7777778 0.28268991 Logitmg2
## 117 0.8333333 0.55601233 Logitmg2
## 118 0.7397260 0.17977528 Logitmg2
## 119 0.8055556 0.42135476 Logitmg2
## 120 0.7222222 0.23322684 Logitmg2
## 121 0.7397260 0.29053708 Logitmg2
## 122 0.7397260 0.26574907 Logitmg2
## 123 0.7808219 0.39229969 Logitmg2
## 124 0.7808219 0.32250580 Logitmg2
## 125 0.7916667 0.34146341 Logitmg2
## 126 0.7916667 0.34146341 Logitmg2
## 127 0.7916667 0.39189189 Logitmg2
## 128 0.7916667 0.41431670 Logitmg2
## 129 0.8194444 0.45199063 Logitmg2
## 130 0.7500000 0.22580645 Logitmg2
## 131 0.7916667 0.34146341 Logitmg2
## 132 0.6986301 0.16441207 Logitmg2
## 133 0.8333333 0.50401837 Logitmg2
## 134 0.7671233 0.29368241 Logitmg2
## 135 0.7638889 0.28337237 Logitmg2
## 136 0.8219178 0.51457801 Logitmg2
## 137 0.7397260 0.17977528 Logitmg2
## 138 0.8333333 0.53993610 Logitmg2
## 139 0.7777778 0.36353591 Logitmg2
## 140 0.8194444 0.49240781 Logitmg2
## 141 0.7638889 0.25365854 Logitmg2
## 142 0.7777778 0.33869116 Logitmg2
## 143 0.7083333 0.11475410 Logitmg2
## 144 0.8082192 0.42905028 Logitmg2
## 145 0.7916667 0.34146341 Logitmg2
## 146 0.7222222 0.17336395 Logitmg2
## 147 0.7777778 0.40801644 Logitmg2
## 148 0.8082192 0.44935345 Logitmg2
## 149 0.7945205 0.45818902 Logitmg2
## 150 0.7808219 0.32250580 Logitmg2
## 151 0.8356164 0.51061453 Logitmg2
## 152 0.7500000 0.22580645 Logitmg2
## 153 0.7916667 0.41431670 Logitmg2
## 154 0.7671233 0.38594755 Logitmg2
## 155 0.7777778 0.40801644 Logitmg2
## 156 0.8356164 0.54422477 Logitmg2
## 157 0.7916667 0.28191489 Logitmg2
## 158 0.7916667 0.36768150 Logitmg2
## 159 0.7397260 0.26574907 Logitmg2
## 160 0.7638889 0.25365854 Logitmg2
## 161 0.7777778 0.33869116 Logitmg2
## 162 0.7500000 0.25602755 Logitmg2
## 163 0.8055556 0.42135476 Logitmg2
## 164 0.8055556 0.42135476 Logitmg2
## 165 0.7534247 0.26592179 Logitmg2
## 166 0.7916667 0.36768150 Logitmg2
## 167 0.7808219 0.39229969 Logitmg2
## 168 0.7945205 0.37677860 Logitmg2
## 169 0.7916667 0.36768150 Logitmg2
## 170 0.7397260 0.26574907 Logitmg2
## 171 0.7638889 0.38181818 Logitmg2
## 172 0.7945205 0.35245417 Logitmg2
## 173 0.7500000 0.25602755 Logitmg2
## 174 0.7945205 0.45818902 Logitmg2
## 175 0.7777778 0.31182796 Logitmg2
## 176 0.8333333 0.50401837 Logitmg2
## 177 0.7534247 0.29202586 Logitmg2
## 178 0.8493151 0.57490736 Logitmg2
## 179 0.7638889 0.25365854 Logitmg2
## 180 0.8194444 0.42926829 Logitmg2
## 181 0.7945205 0.35245417 Logitmg2
## 182 0.7671233 0.29368241 Logitmg2
## 183 0.7638889 0.31081081 Logitmg2
## 184 0.7500000 0.25602755 Logitmg2
## 185 0.8055556 0.44309392 Logitmg2
## 186 0.8082192 0.48591549 Logitmg2
## 187 0.7777778 0.33869116 Logitmg2
## 188 0.8055556 0.44309392 Logitmg2
## 189 0.7500000 0.22580645 Logitmg2
## 190 0.7808219 0.34748603 Logitmg2
## 191 0.8055556 0.46325879 Logitmg2
## 192 0.7916667 0.39189189 Logitmg2
## 193 0.7808219 0.34748603 Logitmg2
## 194 0.7123288 0.15907844 Logitmg2
## 195 0.8472222 0.58577406 Logitmg2
## 196 0.7534247 0.29202586 Logitmg2
## 197 0.8055556 0.37235367 Logitmg2
## 198 0.7638889 0.28337237 Logitmg2
## 199 0.8082192 0.46826223 Logitmg2
## 200 0.7777778 0.25097529 Logitmg2
   ## Phân tích hình ảnh
# install.packages("ggplot2")
library(ggplot2)
library(ggthemes)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
h1 <- ggplot(resamplemod) + geom_boxplot(aes(x = Mohinh, y = Accuracy, fill = Mohinh)) + coord_flip() + geom_hline(yintercept = 0.7320, color = "blue") + theme_wsj() # Giá trị 0.7320 = 73,20% là tỷ lệ nhóm chiếm ưu thế (không chuyển đổi xe buýt) 
h1 # Nhận xét: Cả 2 mô hình đều có khả năng phân loại tốt - độ chính xác accuracy của cả 2 mô hình đều lớn hơn 73.20% (đường màu xanh)

h2 <- ggplot(resamplemod, aes(Accuracy, fill = Mohinh)) + geom_density(alpha = 0.3) + theme_wsj() + facet_wrap(~ Mohinh)
h2

grid.arrange(h1, h2, ncol = 2, nrow = 1)

   ## Kiểm định t-test để đánh giá sự khác biệt về Accuracy và Kappa của 2 mô hình: p<0.05 có ý nghĩa thống kê về sự khác biệt Accuracy và Kappa của 2 mô hình
t.test(resamplemod$Accuracy ~ resamplemod$Mohinh)
## 
##  Welch Two Sample t-test
## 
## data:  resamplemod$Accuracy by resamplemod$Mohinh
## t = -1.1687, df = 194.25, p-value = 0.244
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.015555007  0.003979665
## sample estimates:
## mean in group Logitmg1 mean in group Logitmg2 
##              0.7769616              0.7827492
summary(a)
##     Accuracy          Kappa            Mohinh         
##  Min.   :0.7083   Min.   :0.07805   Length:100        
##  1st Qu.:0.7500   1st Qu.:0.28903   Class :character  
##  Median :0.7778   Median :0.36168   Mode  :character  
##  Mean   :0.7770   Mean   :0.36674                     
##  3rd Qu.:0.8056   3rd Qu.:0.44466                     
##  Max.   :0.8750   Max.   :0.66109
summary(b)
##     Accuracy          Kappa           Mohinh         
##  Min.   :0.6986   Min.   :0.1148   Length:100        
##  1st Qu.:0.7639   1st Qu.:0.2825   Class :character  
##  Median :0.7862   Median :0.3635   Mode  :character  
##  Mean   :0.7827   Mean   :0.3586                     
##  3rd Qu.:0.8056   3rd Qu.:0.4291                     
##  Max.   :0.8493   Max.   :0.6027
t.test(resamplemod$Kappa ~ resamplemod$Mohinh)
## 
##  Welch Two Sample t-test
## 
## data:  resamplemod$Kappa by resamplemod$Mohinh
## t = 0.52532, df = 197.18, p-value = 0.6
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02231013  0.03851178
## sample estimates:
## mean in group Logitmg1 mean in group Logitmg2 
##              0.3667391              0.3586382
# 4.9. Calculate AUC (Area under Curve) - Diện tích dưới đường cong ROC (Receiver Operating Characteristics), đánh giá mức độ lồi về phía trên của đường ROC (Tiêu chí thường được sử dụng khi so sánh các mô hình phân loại với nhau): sử dụng hàm roc trong gói "pROC": AUC càng gần 1 càng tốt, >0.7 xem như chấp nhận được
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
## 
##     ci
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
   ## Mô hình mg1 
mgtrain1 <- train(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, method = "glm", family = "binomial", data = BS)
summary(mgtrain1)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6346  -0.6513  -0.3886   0.4517   3.0879  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -0.460888   1.014052  -0.455 0.649468    
## `AG25-60`                 0.509909   0.408317   1.249 0.211735    
## `AG>60`                   1.600156   0.809458   1.977 0.048062 *  
## `OCOfficers/Workers`     -0.601602   0.444097  -1.355 0.175525    
## `OCHousewife/Unemployed` -0.672492   0.670571  -1.003 0.315927    
## `OCFree labor`           -1.067818   0.435640  -2.451 0.014240 *  
## OCOthers                  0.145226   0.567793   0.256 0.798126    
## MCYes                    -0.803585   0.424113  -1.895 0.058127 .  
## MOYes                    -0.142332   0.381606  -0.373 0.709162    
## NM1                       0.797740   0.828571   0.963 0.335653    
## NM2                       0.584359   0.799689   0.731 0.464942    
## `NM>=3`                   1.153470   0.800814   1.440 0.149762    
## TMMotorbike               0.006570   0.439137   0.015 0.988063    
## TMCar                     0.810212   0.563283   1.438 0.150328    
## `TMHichhiking/Taxi`       0.504619   0.497789   1.014 0.310716    
## `TPPicking Children`     -1.452278   0.431442  -3.366 0.000762 ***
## `TPEntertainment/Food`   -0.855917   0.284101  -3.013 0.002589 ** 
## TPOthers                 -1.357762   0.918309  -1.479 0.139262    
## SPYes                    -0.040310   0.446021  -0.090 0.927988    
## `SPDon't know`           -1.119875   0.598265  -1.872 0.061224 .  
## `WIAt Bus Stops`          0.622671   0.304066   2.048 0.040578 *  
## `WIInternet/Facebook`     1.294873   0.357963   3.617 0.000298 ***
## `WIFrom Friends`          0.905878   0.394216   2.298 0.021566 *  
## WIOthers                  0.670218   0.463461   1.446 0.148145    
## BCYes                    -0.888957   0.231589  -3.839 0.000124 ***
## CAYes                    -0.915710   0.236190  -3.877 0.000106 ***
## BSCNormal                -0.227395   0.404108  -0.563 0.573633    
## BSCAgree                 -0.194065   0.362292  -0.536 0.592195    
## BSANormal                -0.596242   0.443520  -1.344 0.178838    
## BSAAgree                  0.096531   0.408890   0.236 0.813369    
## BRCNormal                -0.343956   0.431132  -0.798 0.424988    
## BRCAgree                 -0.263950   0.341203  -0.774 0.439175    
## MRComfortable             0.052656   0.393148   0.134 0.893455    
## `MRLow price`             0.969812   0.556347   1.743 0.081302 .  
## MRAccessibility           0.804047   0.436812   1.841 0.065663 .  
## MRReliability            -0.097818   0.520557  -0.188 0.850948    
## MRothers                  0.231989   0.656438   0.353 0.723784    
## BUBYes                   -0.050910   0.235854  -0.216 0.829101    
## SBMNormal                 0.252138   0.286773   0.879 0.379279    
## SBMAgree                  1.963412   0.329928   5.951 2.66e-09 ***
## SBSNormal                 0.008038   0.273902   0.029 0.976590    
## SBSAgree                  0.775681   0.384369   2.018 0.043585 *  
## DI                       -0.032412   0.022708  -1.427 0.153481    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 618.39  on 681  degrees of freedom
## AIC: 704.39
## 
## Number of Fisher Scoring iterations: 5
pred1mg <- predict(mgtrain1, newdata = BS, type = "prob")
rmg <- roc(BS$MSI, pred1mg$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg$auc
## Area under the curve: 0.838
plot(rmg, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)

plot(smooth(rmg), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)

   ## Có thể dùng các lệnh đơn giản
auc(rmg)
## Area under the curve: 0.838
plot.roc(rmg)

ci(rmg)
## 95% CI: 0.8053-0.8708 (DeLong)
   ## Mô hình mg2 
mg2train1 <- train(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, data = BS, method = "glm", family = "binomial")
summary(mg2train1)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0246  -0.7121  -0.4431   0.5256   2.7444  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               0.04216    0.48436   0.087 0.930642    
## `OCOfficers/Workers`     -0.31020    0.27507  -1.128 0.259441    
## `OCHousewife/Unemployed` -0.29222    0.52541  -0.556 0.578092    
## `OCFree labor`           -0.66363    0.28470  -2.331 0.019753 *  
## OCOthers                  0.91856    0.39809   2.307 0.021032 *  
## MCYes                    -0.85258    0.31772  -2.683 0.007286 ** 
## `TPPicking Children`     -1.05006    0.38954  -2.696 0.007026 ** 
## `TPEntertainment/Food`   -0.80345    0.26544  -3.027 0.002471 ** 
## TPOthers                 -1.11305    0.81779  -1.361 0.173497    
## SPYes                    -0.02669    0.41726  -0.064 0.948998    
## `SPDon't know`           -1.10368    0.55322  -1.995 0.046040 *  
## `WIAt Bus Stops`          0.60130    0.28481   2.111 0.034751 *  
## `WIInternet/Facebook`     1.18500    0.33140   3.576 0.000349 ***
## `WIFrom Friends`          0.98890    0.36275   2.726 0.006408 ** 
## WIOthers                  0.60877    0.42965   1.417 0.156513    
## BCYes                    -0.81650    0.20905  -3.906 9.39e-05 ***
## CAYes                    -0.92689    0.21197  -4.373 1.23e-05 ***
## SBMNormal                 0.18031    0.22877   0.788 0.430593    
## SBMAgree                  2.13985    0.27266   7.848 4.22e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 656.17  on 705  degrees of freedom
## AIC: 694.17
## 
## Number of Fisher Scoring iterations: 5
pred1mg2 <- predict(mg2train1, newdata = BS, type = "prob")
rmg2 <- roc(BS$MSI, pred1mg2$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg2$auc
## Area under the curve: 0.8071
auc(rmg2)
## Area under the curve: 0.8071
plot.roc(rmg2)

ci(rmg2)
## 95% CI: 0.7711-0.8431 (DeLong)
plot(rmg2, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)

plot(smooth(rmg2), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)

   ## Tính hệ số Gini = 2AUC-1
Ginimg <- 2*(rmg$auc)-1
Ginimg
## [1] 0.6760941
Ginimg2 <- 2*(rmg2$auc)-1
Ginimg2
## [1] 0.6141509

6. Importance of variables in models

library(caret)
varImp(mg2)
##                          Overall
## OCOfficers/Workers     1.1277131
## OCHousewife/Unemployed 0.5561732
## OCFree labor           2.3310002
## OCOthers               2.3074161
## MCYes                  2.6834709
## TPPicking Children     2.6956221
## TPEntertainment/Food   3.0268242
## TPOthers               1.3610535
## SPYes                  0.0639657
## SPDon't know           1.9950247
## WIAt Bus Stops         2.1112429
## WIInternet/Facebook    3.5757098
## WIFrom Friends         2.7261485
## WIOthers               1.4168956
## BCYes                  3.9058054
## CAYes                  4.3728317
## SBMNormal              0.7881773
## SBMAgree               7.8481853
# plot(varImp(mg2))

7. Probility of bus use - Prohibit model

# Prohibit Models
library(aod)
## 
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
## 
##     rats
   ## Model mg1
prohibitmg1 <- glm(MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial(link = "probit"), data = BS)
summary(prohibitmg1)
## 
## Call:
## glm(formula = MSI ~ AG + OC + MC + MO + NM + TM + TP + SP + WI + 
##     BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + DI, family = binomial(link = "probit"), 
##     data = BS)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6222  -0.6786  -0.3974   0.4737   3.1306  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.409066   0.576083  -0.710 0.477653    
## AG25-60                 0.327883   0.230042   1.425 0.154065    
## AG>60                   0.872810   0.453828   1.923 0.054453 .  
## OCOfficers/Workers     -0.385970   0.251363  -1.536 0.124659    
## OCHousewife/Unemployed -0.330146   0.365637  -0.903 0.366560    
## OCFree labor           -0.655956   0.247240  -2.653 0.007975 ** 
## OCOthers                0.022091   0.322865   0.068 0.945450    
## MCYes                  -0.470112   0.243446  -1.931 0.053473 .  
## MOYes                  -0.060611   0.219026  -0.277 0.781987    
## NM1                     0.497254   0.473525   1.050 0.293667    
## NM2                     0.366264   0.458343   0.799 0.424230    
## NM>=3                   0.686721   0.459089   1.496 0.134697    
## TMMotorbike            -0.025545   0.250540  -0.102 0.918789    
## TMCar                   0.377005   0.320117   1.178 0.238912    
## TMHichhiking/Taxi       0.250315   0.287445   0.871 0.383849    
## TPPicking Children     -0.726259   0.230270  -3.154 0.001611 ** 
## TPEntertainment/Food   -0.465732   0.159769  -2.915 0.003557 ** 
## TPOthers               -0.805282   0.503666  -1.599 0.109856    
## SPYes                   0.037535   0.246830   0.152 0.879134    
## SPDon't know           -0.592327   0.320357  -1.849 0.064464 .  
## WIAt Bus Stops          0.316128   0.169090   1.870 0.061541 .  
## WIInternet/Facebook     0.701176   0.202539   3.462 0.000536 ***
## WIFrom Friends          0.502028   0.226176   2.220 0.026444 *  
## WIOthers                0.330956   0.257032   1.288 0.197883    
## BCYes                  -0.486766   0.130179  -3.739 0.000185 ***
## CAYes                  -0.495117   0.132620  -3.733 0.000189 ***
## BSCNormal              -0.078621   0.228104  -0.345 0.730343    
## BSCAgree               -0.096139   0.206924  -0.465 0.642212    
## BSANormal              -0.336870   0.252355  -1.335 0.181908    
## BSAAgree                0.046502   0.236533   0.197 0.844144    
## BRCNormal              -0.162264   0.242802  -0.668 0.503945    
## BRCAgree               -0.140211   0.197327  -0.711 0.477361    
## MRComfortable           0.048848   0.220095   0.222 0.824360    
## MRLow price             0.553298   0.316626   1.747 0.080554 .  
## MRAccessibility         0.462671   0.245769   1.883 0.059762 .  
## MRReliability          -0.038754   0.291157  -0.133 0.894112    
## MRothers                0.180552   0.378096   0.478 0.632986    
## BUBYes                 -0.001546   0.133257  -0.012 0.990742    
## SBMNormal               0.145600   0.163637   0.890 0.373588    
## SBMAgree                1.126078   0.189087   5.955  2.6e-09 ***
## SBSNormal              -0.006763   0.156507  -0.043 0.965531    
## SBSAgree                0.438031   0.223619   1.959 0.050133 .  
## DI                     -0.016295   0.012740  -1.279 0.200879    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 621.07  on 681  degrees of freedom
## AIC: 707.07
## 
## Number of Fisher Scoring iterations: 6
   ## Model mg2
prohibitmg2 <- glm(MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial(link = "probit"), data = BS)
summary(prohibitmg2)
## 
## Call:
## glm(formula = MSI ~ OC + MC + TP + SP + WI + BC + CA + SBM, family = binomial(link = "probit"), 
##     data = BS)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9872  -0.7276  -0.4385   0.5471   2.8378  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.02832    0.27459  -0.103 0.917853    
## OCOfficers/Workers     -0.19292    0.15721  -1.227 0.219764    
## OCHousewife/Unemployed -0.13386    0.28889  -0.463 0.643099    
## OCFree labor           -0.39711    0.16316  -2.434 0.014940 *  
## OCOthers                0.49647    0.23159   2.144 0.032054 *  
## MCYes                  -0.48138    0.18515  -2.600 0.009325 ** 
## TPPicking Children     -0.56673    0.21358  -2.654 0.007965 ** 
## TPEntertainment/Food   -0.45503    0.15033  -3.027 0.002470 ** 
## TPOthers               -0.67399    0.44839  -1.503 0.132808    
## SPYes                   0.02745    0.23117   0.119 0.905494    
## SPDon't know           -0.62736    0.29901  -2.098 0.035892 *  
## WIAt Bus Stops          0.30447    0.16024   1.900 0.057412 .  
## WIInternet/Facebook     0.64101    0.19027   3.369 0.000755 ***
## WIFrom Friends          0.53193    0.21178   2.512 0.012014 *  
## WIOthers                0.30776    0.24237   1.270 0.204152    
## BCYes                  -0.45796    0.11899  -3.849 0.000119 ***
## CAYes                  -0.51245    0.11987  -4.275 1.91e-05 ***
## SBMNormal               0.12153    0.13025   0.933 0.350802    
## SBMAgree                1.26007    0.15521   8.118 4.72e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 841.60  on 723  degrees of freedom
## Residual deviance: 657.32  on 705  degrees of freedom
## AIC: 695.32
## 
## Number of Fisher Scoring iterations: 6
# Đánh giá tác động tổng thể Purpose (Total effect of Travel Purpose) trong mô hinhg mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với Purpose là từ 7:9)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 7:9) # Giá trị 15.2 ứng với p-value < 0.05 --> KL: tác động tổng thể về TP (mục đich chuyến đi) của người sử dụng lên khả năng chuyển đổi xe buýt là tồn tại và có ý nghĩa thống kê
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 15.2, df = 3, P(> X2) = 0.0016
# Đánh giá tác động tổng thể WI (Total effect of WI) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với WI là từ 12:15)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 12:15)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 13.4, df = 4, P(> X2) = 0.0093
# Đánh giá tác động tổng thể SBM (Total effect of SBM) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với SBM là từ 18:19)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 18:19)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 70.2, df = 2, P(> X2) = 5.6e-16
# Đánh giá tác động tổng thể SP (Total effect of SP) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với SP là từ 10:11)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 10:11)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 7.2, df = 2, P(> X2) = 0.028
# Đánh giá tác động tổng thể OC (Total effect of OC) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với OC là từ 2:5)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 2:5)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 16.4, df = 4, P(> X2) = 0.0025

8. Perception of bus in two groups people: Shift to bus and non ship to bus 8.1. Tạo dữ liệu biểu đồ likert tương ứng với 2 nhóm : Chuyển đổi BP_MS và không chuyển đổi BP_nMS

# Create data for Likert graph by BS (remove ID and TM =7-Bus) 
head(MS)
##   ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1  1  0  1  6  3  3  1  2 10  1  1  0  4  1  0   1 15   1   3   3   3   4   4
## 2  2  0  1  3  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5
## 3  3  0  1  3  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4
## 4  4  0  1  3  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4
## 5  5  0  1  3  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4
## 6  6  0  1  4  1  4  1 20 30  1  1  0  5  3  0   2 50   0   3   2   4   4   4
##   BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1   4   4   3   4   4   4   4  1  1  1   0  0  5  4  2  2  0  0  0  0  0  1  2
## 2   2   2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3
## 3   3   3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3
## 4   4   4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3
## 5   2   2   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2
## 6   3   3   3   2   4   4   4  2  0  0   0  1  4  3  4  2  1  1  1  1  1  1  2
##   NR
## 1  1
## 2  0
## 3  0
## 4  0
## 5  0
## 6  1
str(MS)
## 'data.frame':    810 obs. of  47 variables:
##  $ ID : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CA : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TM : int  6 3 3 3 3 4 4 3 3 3 ...
##  $ TP : int  3 1 1 1 1 1 1 1 1 1 ...
##  $ FR : int  3 4 4 4 4 4 4 4 4 4 ...
##  $ DT : int  1 1 3 1 3 1 1 1 2 1 ...
##  $ DI : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ TI : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ SC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LS : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TS : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ MR : int  4 2 4 2 2 5 5 2 2 2 ...
##  $ WE : int  1 3 1 1 3 3 3 3 1 3 ...
##  $ WK : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ NBR: int  1 1 2 4 2 2 4 2 1 4 ...
##  $ CO : num  15 5 3 10 5 50 40 5 7 7 ...
##  $ BUB: int  1 0 1 1 1 0 0 1 1 1 ...
##  $ SBM: int  3 2 2 2 2 3 2 3 3 3 ...
##  $ SBS: int  3 2 2 2 2 2 2 2 3 2 ...
##  $ BST: int  3 2 2 3 3 4 4 5 4 2 ...
##  $ BSC: int  4 4 4 4 4 4 4 5 4 5 ...
##  $ BSA: int  4 5 4 4 4 4 4 5 4 5 ...
##  $ BRE: int  4 2 3 4 2 3 2 3 3 3 ...
##  $ BCO: int  4 2 3 4 2 3 2 3 3 2 ...
##  $ BIN: int  3 4 4 4 3 3 3 3 3 4 ...
##  $ BHE: int  4 2 2 3 2 2 2 3 3 2 ...
##  $ BEN: int  4 5 4 3 4 4 5 5 4 5 ...
##  $ BRC: int  4 5 4 3 4 4 5 5 4 5 ...
##  $ BWE: int  4 3 4 5 4 4 5 5 4 4 ...
##  $ SP : int  1 2 1 1 2 2 2 1 1 1 ...
##  $ BI : int  1 0 1 1 0 0 0 1 1 1 ...
##  $ WI : int  1 0 2 1 0 0 0 2 3 2 ...
##  $ MSI: int  0 0 0 0 0 0 0 1 0 0 ...
##  $ GD : int  0 0 1 1 0 1 1 0 0 1 ...
##  $ AG : int  5 3 3 3 4 4 5 4 3 4 ...
##  $ OC : int  4 6 2 2 6 3 3 7 7 3 ...
##  $ IN : int  2 3 4 3 3 4 4 2 3 3 ...
##  $ NC : int  2 1 0 0 1 2 2 0 1 0 ...
##  $ MC : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ CC : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ BO : int  0 1 0 1 0 1 0 0 1 0 ...
##  $ MO : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ RO : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ NB : int  1 1 0 1 0 1 0 0 1 0 ...
##  $ NM : int  2 3 3 3 2 2 2 2 3 3 ...
##  $ NR : int  1 0 0 0 0 1 1 0 0 0 ...
dim(MS)
## [1] 810  47
BS_likert = subset(MS,TM < 7)
head(BS_likert)
##   ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1  1  0  1  6  3  3  1  2 10  1  1  0  4  1  0   1 15   1   3   3   3   4   4
## 2  2  0  1  3  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5
## 3  3  0  1  3  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4
## 4  4  0  1  3  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4
## 5  5  0  1  3  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4
## 6  6  0  1  4  1  4  1 20 30  1  1  0  5  3  0   2 50   0   3   2   4   4   4
##   BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1   4   4   3   4   4   4   4  1  1  1   0  0  5  4  2  2  0  0  0  0  0  1  2
## 2   2   2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3
## 3   3   3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3
## 4   4   4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3
## 5   2   2   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2
## 6   3   3   3   2   4   4   4  2  0  0   0  1  4  3  4  2  1  1  1  1  1  1  2
##   NR
## 1  1
## 2  0
## 3  0
## 4  0
## 5  0
## 6  1
str(BS_likert)
## 'data.frame':    724 obs. of  47 variables:
##  $ ID : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CA : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TM : int  6 3 3 3 3 4 4 3 3 3 ...
##  $ TP : int  3 1 1 1 1 1 1 1 1 1 ...
##  $ FR : int  3 4 4 4 4 4 4 4 4 4 ...
##  $ DT : int  1 1 3 1 3 1 1 1 2 1 ...
##  $ DI : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ TI : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ SC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LS : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TS : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ MR : int  4 2 4 2 2 5 5 2 2 2 ...
##  $ WE : int  1 3 1 1 3 3 3 3 1 3 ...
##  $ WK : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ NBR: int  1 1 2 4 2 2 4 2 1 4 ...
##  $ CO : num  15 5 3 10 5 50 40 5 7 7 ...
##  $ BUB: int  1 0 1 1 1 0 0 1 1 1 ...
##  $ SBM: int  3 2 2 2 2 3 2 3 3 3 ...
##  $ SBS: int  3 2 2 2 2 2 2 2 3 2 ...
##  $ BST: int  3 2 2 3 3 4 4 5 4 2 ...
##  $ BSC: int  4 4 4 4 4 4 4 5 4 5 ...
##  $ BSA: int  4 5 4 4 4 4 4 5 4 5 ...
##  $ BRE: int  4 2 3 4 2 3 2 3 3 3 ...
##  $ BCO: int  4 2 3 4 2 3 2 3 3 2 ...
##  $ BIN: int  3 4 4 4 3 3 3 3 3 4 ...
##  $ BHE: int  4 2 2 3 2 2 2 3 3 2 ...
##  $ BEN: int  4 5 4 3 4 4 5 5 4 5 ...
##  $ BRC: int  4 5 4 3 4 4 5 5 4 5 ...
##  $ BWE: int  4 3 4 5 4 4 5 5 4 4 ...
##  $ SP : int  1 2 1 1 2 2 2 1 1 1 ...
##  $ BI : int  1 0 1 1 0 0 0 1 1 1 ...
##  $ WI : int  1 0 2 1 0 0 0 2 3 2 ...
##  $ MSI: int  0 0 0 0 0 0 0 1 0 0 ...
##  $ GD : int  0 0 1 1 0 1 1 0 0 1 ...
##  $ AG : int  5 3 3 3 4 4 5 4 3 4 ...
##  $ OC : int  4 6 2 2 6 3 3 7 7 3 ...
##  $ IN : int  2 3 4 3 3 4 4 2 3 3 ...
##  $ NC : int  2 1 0 0 1 2 2 0 1 0 ...
##  $ MC : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ CC : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ BO : int  0 1 0 1 0 1 0 0 1 0 ...
##  $ MO : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ RO : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ NB : int  1 1 0 1 0 1 0 0 1 0 ...
##  $ NM : int  2 3 3 3 2 2 2 2 3 3 ...
##  $ NR : int  1 0 0 0 0 1 1 0 0 0 ...
dim(BS_likert)
## [1] 724  47
## Create dat_BST
dat_BST <- BS_likert[ , c(18,19,20,21,34)]
head(dat_BST)
##   BUB SBM SBS BST MSI
## 1   1   3   3   3   0
## 2   0   2   2   2   0
## 3   1   2   2   2   0
## 4   1   2   2   3   0
## 5   1   2   2   3   0
## 6   0   3   2   4   0
dim(dat_BST)
## [1] 724   5
size <- 724
dat_BST$BPE <- rep("BST", size = size) # Tạo biến mới BPE
names(dat_BST)[names(dat_BST) == "BST"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BST)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   3   0 BST
## 2   0   2   2   2   0 BST
## 3   1   2   2   2   0 BST
## 4   1   2   2   3   0 BST
## 5   1   2   2   3   0 BST
## 6   0   3   2   4   0 BST
## Create dat_BSC
dat_BSC <- BS_likert[ , c(18,19,20,22,34)]
head(dat_BSC)
##   BUB SBM SBS BSC MSI
## 1   1   3   3   4   0
## 2   0   2   2   4   0
## 3   1   2   2   4   0
## 4   1   2   2   4   0
## 5   1   2   2   4   0
## 6   0   3   2   4   0
dim(dat_BSC)
## [1] 724   5
str(dat_BSC)
## 'data.frame':    724 obs. of  5 variables:
##  $ BUB: int  1 0 1 1 1 0 0 1 1 1 ...
##  $ SBM: int  3 2 2 2 2 3 2 3 3 3 ...
##  $ SBS: int  3 2 2 2 2 2 2 2 3 2 ...
##  $ BSC: int  4 4 4 4 4 4 4 5 4 5 ...
##  $ MSI: int  0 0 0 0 0 0 0 1 0 0 ...
dat_BSC$BPE <- rep("BSC", size = size) # Tạo biến mới BPE
names(dat_BSC)[names(dat_BSC) == "BSC"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BSC)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   4   0 BSC
## 2   0   2   2   4   0 BSC
## 3   1   2   2   4   0 BSC
## 4   1   2   2   4   0 BSC
## 5   1   2   2   4   0 BSC
## 6   0   3   2   4   0 BSC
## Create dat_BSA
dat_BSA <- BS_likert[ , c(18,19,20,23,34)]
head(dat_BSA)
##   BUB SBM SBS BSA MSI
## 1   1   3   3   4   0
## 2   0   2   2   5   0
## 3   1   2   2   4   0
## 4   1   2   2   4   0
## 5   1   2   2   4   0
## 6   0   3   2   4   0
dim(dat_BSA)
## [1] 724   5
dat_BSA$BPE <- rep("BSA", size = size) # Tạo biến mới BPE
names(dat_BSA)[names(dat_BSA) == "BSA"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BSA)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   4   0 BSA
## 2   0   2   2   5   0 BSA
## 3   1   2   2   4   0 BSA
## 4   1   2   2   4   0 BSA
## 5   1   2   2   4   0 BSA
## 6   0   3   2   4   0 BSA
## Create dat_BRE
dat_BRE <- BS_likert[ , c(18,19,20,24,34)]
head(dat_BRE)
##   BUB SBM SBS BRE MSI
## 1   1   3   3   4   0
## 2   0   2   2   2   0
## 3   1   2   2   3   0
## 4   1   2   2   4   0
## 5   1   2   2   2   0
## 6   0   3   2   3   0
dim(dat_BRE)
## [1] 724   5
dat_BRE$BPE <- rep("BRE", size = size) # Tạo biến mới BPE
names(dat_BRE)[names(dat_BRE) == "BRE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BRE)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   4   0 BRE
## 2   0   2   2   2   0 BRE
## 3   1   2   2   3   0 BRE
## 4   1   2   2   4   0 BRE
## 5   1   2   2   2   0 BRE
## 6   0   3   2   3   0 BRE
## Create dat_BCO
dat_BCO <- BS_likert[ , c(18,19,20,25,34)]
head(dat_BCO)
##   BUB SBM SBS BCO MSI
## 1   1   3   3   4   0
## 2   0   2   2   2   0
## 3   1   2   2   3   0
## 4   1   2   2   4   0
## 5   1   2   2   2   0
## 6   0   3   2   3   0
dim(dat_BCO)
## [1] 724   5
dat_BCO$BPE <- rep("BCO", size = size) # Tạo biến mới BPE
names(dat_BCO)[names(dat_BCO) == "BCO"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BCO)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   4   0 BCO
## 2   0   2   2   2   0 BCO
## 3   1   2   2   3   0 BCO
## 4   1   2   2   4   0 BCO
## 5   1   2   2   2   0 BCO
## 6   0   3   2   3   0 BCO
## Create dat_BIN
dat_BIN <- BS_likert[ , c(18,19,20,26,34)]
head(dat_BIN)
##   BUB SBM SBS BIN MSI
## 1   1   3   3   3   0
## 2   0   2   2   4   0
## 3   1   2   2   4   0
## 4   1   2   2   4   0
## 5   1   2   2   3   0
## 6   0   3   2   3   0
dim(dat_BIN)
## [1] 724   5
dat_BIN$BPE <- rep("BIN", size = size) # Tạo biến mới BPE
names(dat_BIN)[names(dat_BIN) == "BIN"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BIN)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   3   0 BIN
## 2   0   2   2   4   0 BIN
## 3   1   2   2   4   0 BIN
## 4   1   2   2   4   0 BIN
## 5   1   2   2   3   0 BIN
## 6   0   3   2   3   0 BIN
## Create dat_BHE
dat_BHE <- BS_likert[ , c(18,19,20,27,34)]
head(dat_BHE)
##   BUB SBM SBS BHE MSI
## 1   1   3   3   4   0
## 2   0   2   2   2   0
## 3   1   2   2   2   0
## 4   1   2   2   3   0
## 5   1   2   2   2   0
## 6   0   3   2   2   0
dim(dat_BHE)
## [1] 724   5
dat_BHE$BPE <- rep("BHE", size = size) # Tạo biến mới BPE
names(dat_BHE)[names(dat_BHE) == "BHE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BHE)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   4   0 BHE
## 2   0   2   2   2   0 BHE
## 3   1   2   2   2   0 BHE
## 4   1   2   2   3   0 BHE
## 5   1   2   2   2   0 BHE
## 6   0   3   2   2   0 BHE
## Create dat_BEN
dat_BEN <- BS_likert[ , c(18,19,20,28,34)]
head(dat_BEN)
##   BUB SBM SBS BEN MSI
## 1   1   3   3   4   0
## 2   0   2   2   5   0
## 3   1   2   2   4   0
## 4   1   2   2   3   0
## 5   1   2   2   4   0
## 6   0   3   2   4   0
dim(dat_BEN)
## [1] 724   5
dat_BEN$BPE <- rep("BEN", size = size) # Tạo biến mới BPE
names(dat_BEN)[names(dat_BEN) == "BEN"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BEN)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   4   0 BEN
## 2   0   2   2   5   0 BEN
## 3   1   2   2   4   0 BEN
## 4   1   2   2   3   0 BEN
## 5   1   2   2   4   0 BEN
## 6   0   3   2   4   0 BEN
## Create dat_BRC
dat_BRC <- BS_likert[ , c(18,19,20,29,34)]
head(dat_BRC)
##   BUB SBM SBS BRC MSI
## 1   1   3   3   4   0
## 2   0   2   2   5   0
## 3   1   2   2   4   0
## 4   1   2   2   3   0
## 5   1   2   2   4   0
## 6   0   3   2   4   0
dim(dat_BRC)
## [1] 724   5
dat_BRC$BPE <- rep("BRC", size = size) # Tạo biến mới BPE
names(dat_BRC)[names(dat_BRC) == "BRC"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BRC)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   4   0 BRC
## 2   0   2   2   5   0 BRC
## 3   1   2   2   4   0 BRC
## 4   1   2   2   3   0 BRC
## 5   1   2   2   4   0 BRC
## 6   0   3   2   4   0 BRC
## Create dat_BWE
dat_BWE <- BS_likert[ , c(18,19,20,30,34)]
head(dat_BWE)
##   BUB SBM SBS BWE MSI
## 1   1   3   3   4   0
## 2   0   2   2   3   0
## 3   1   2   2   4   0
## 4   1   2   2   5   0
## 5   1   2   2   4   0
## 6   0   3   2   4   0
dim(dat_BWE)
## [1] 724   5
dat_BWE$BPE <- rep("BWE", size = size) # Tạo biến mới BPE
names(dat_BWE)[names(dat_BWE) == "BWE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BWE)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   4   0 BWE
## 2   0   2   2   3   0 BWE
## 3   1   2   2   4   0 BWE
## 4   1   2   2   5   0 BWE
## 5   1   2   2   4   0 BWE
## 6   0   3   2   4   0 BWE
## Tạo dữ liệu likert từ 10 dữ liệu: dat_BST, dat_BSC, dat_BSA, dat_BRE, dat_BCO, dat_BIN, dat_BHE, dat_BEN, dat_BRC, dat_BWE
B.Per <- rbind(dat_BST, dat_BSC, dat_BSA, dat_BRE, dat_BCO, dat_BIN, dat_BHE, dat_BEN, dat_BRC, dat_BWE)
head(B.Per)
##   BUB SBM SBS Res MSI BPE
## 1   1   3   3   3   0 BST
## 2   0   2   2   2   0 BST
## 3   1   2   2   2   0 BST
## 4   1   2   2   3   0 BST
## 5   1   2   2   3   0 BST
## 6   0   3   2   4   0 BST
str(B.Per)
## 'data.frame':    7240 obs. of  6 variables:
##  $ BUB: int  1 0 1 1 1 0 0 1 1 1 ...
##  $ SBM: int  3 2 2 2 2 3 2 3 3 3 ...
##  $ SBS: int  3 2 2 2 2 2 2 2 3 2 ...
##  $ Res: int  3 2 2 3 3 4 4 5 4 2 ...
##  $ MSI: int  0 0 0 0 0 0 0 1 0 0 ...
##  $ BPE: chr  "BST" "BST" "BST" "BST" ...
dim(B.Per)
## [1] 7240    6
# Cách 2: Tạo dữ liệu thủ công trên excel với các cột BPE, Res, BUB, MSI, SBM, SBS: t_likert = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 3-Mode shift\\So lieu mode choice tu phan 1\\8. MS-Bieu do Likert.csv" ; BP_likert = read.csv(t_likert, header = T, sep = ";")

# Data Bus perception of people shift to bus
BP_MS = subset(B.Per,MSI == 1)
str(BP_MS)
## 'data.frame':    1940 obs. of  6 variables:
##  $ BUB: int  1 1 1 1 1 1 1 1 0 0 ...
##  $ SBM: int  3 1 4 4 5 5 4 4 3 4 ...
##  $ SBS: int  2 1 3 2 4 4 4 4 2 2 ...
##  $ Res: int  5 3 2 3 1 1 4 4 2 2 ...
##  $ MSI: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ BPE: chr  "BST" "BST" "BST" "BST" ...
dim(BP_MS)
## [1] 1940    6
BP_MS = BP_MS[ , c(4, 6)]
dim(BP_MS)
## [1] 1940    2
head(BP_MS)
##    Res BPE
## 8    5 BST
## 12   3 BST
## 15   2 BST
## 16   3 BST
## 18   1 BST
## 19   1 BST
# Data Bus perception of people shift to bus (MSI = 1) and used to use bus before (BUB = 1) 
BP_MS_BU = subset(B.Per,BUB == 1 & MSI == 1)
str(BP_MS_BU)
## 'data.frame':    1400 obs. of  6 variables:
##  $ BUB: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ SBM: int  3 1 4 4 5 5 4 4 4 4 ...
##  $ SBS: int  2 1 3 2 4 4 4 4 2 3 ...
##  $ Res: int  5 3 2 3 1 1 4 4 4 4 ...
##  $ MSI: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ BPE: chr  "BST" "BST" "BST" "BST" ...
dim(BP_MS_BU)
## [1] 1400    6
BP_MS_BU = BP_MS_BU[ , c(4, 6)]
dim(BP_MS_BU)
## [1] 1400    2
head(BP_MS_BU)
##    Res BPE
## 8    5 BST
## 12   3 BST
## 15   2 BST
## 16   3 BST
## 18   1 BST
## 19   1 BST
# Data Bus perception of people don't shift to bus
BP_nMS = subset(B.Per,MSI == 0)
str(BP_nMS)
## 'data.frame':    5300 obs. of  6 variables:
##  $ BUB: int  1 0 1 1 1 0 0 1 1 1 ...
##  $ SBM: int  3 2 2 2 2 3 2 3 3 3 ...
##  $ SBS: int  3 2 2 2 2 2 2 3 2 2 ...
##  $ Res: int  3 2 2 3 3 4 4 4 2 2 ...
##  $ MSI: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BPE: chr  "BST" "BST" "BST" "BST" ...
dim(BP_nMS)
## [1] 5300    6
BP_nMS = BP_nMS[ , c(4, 6)]
dim(BP_nMS)
## [1] 5300    2
head(BP_nMS)
##   Res BPE
## 1   3 BST
## 2   2 BST
## 3   2 BST
## 4   3 BST
## 5   3 BST
## 6   4 BST

8.2. Vẽ biểu đồ likert cho nhóm đối tượng chuyển đổi - BP_MS

# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_MS)
##    Res BPE
## 8    5 BST
## 12   3 BST
## 15   2 BST
## 16   3 BST
## 18   1 BST
## 19   1 BST
attach(BP_MS)
BP_MS = within(BP_MS, {
  Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
  BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
  })
str(BP_MS)
## 'data.frame':    1940 obs. of  2 variables:
##  $ Res: Factor w/ 5 levels "Very Disagree",..: 5 3 2 3 1 1 4 4 2 2 ...
##  $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_MS)
##             Res           BPE     
##  Very Disagree:136   BST    :194  
##  Disagree     :334   BSC    :194  
##  Normal       :440   BSA    :194  
##  Agree        :827   BRE    :194  
##  Very Agree   :203   BCO    :194  
##                      BIN    :194  
##                      (Other):776
t = compareGroups(Res ~ BPE, data = BP_MS)
createTable(t)
## 
## --------Summary descriptives table by 'Res'---------
## 
## ____________________________________________________________________________ 
##         Very Disagree  Disagree    Normal      Agree    Very Agree p.overall 
##             N=136       N=334      N=440       N=827      N=203              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## BPE:                                                                <0.001   
##     BST  13 (9.56%)   53 (15.9%) 57 (13.0%) 62 (7.50%)  9 (4.43%)            
##     BSC  15 (11.0%)   7 (2.10%)  34 (7.73%) 93 (11.2%)  45 (22.2%)           
##     BSA  14 (10.3%)   63 (18.9%) 57 (13.0%) 54 (6.53%)  6 (2.96%)            
##     BRE   9 (6.62%)   57 (17.1%) 73 (16.6%) 49 (5.93%)  6 (2.96%)            
##     BCO  19 (14.0%)   10 (2.99%) 21 (4.77%) 96 (11.6%)  48 (23.6%)           
##     BIN  12 (8.82%)   29 (8.68%) 57 (13.0%) 83 (10.0%)  13 (6.40%)           
##     BHE  11 (8.09%)   12 (3.59%) 31 (7.05%) 122 (14.8%) 18 (8.87%)           
##     BEN  12 (8.82%)   18 (5.39%) 32 (7.27%) 116 (14.0%) 16 (7.88%)           
##     BRC  17 (12.5%)   75 (22.5%) 53 (12.0%) 46 (5.56%)  3 (1.48%)            
##     BWE  14 (10.3%)   10 (2.99%) 25 (5.68%) 106 (12.8%) 39 (19.2%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_MS %>% 
  group_by(BPE, Res) %>% 
  count() %>% 
  ungroup() %>% 
  group_by(BPE) %>% 
  mutate(percent = 100*n / sum(n)) %>% 
  mutate(percent = round(percent, 2)) %>% 
  mutate(bar_text = paste0(percent, "%")) %>% 
  ungroup() -> df_for_ploting
df_for_ploting %>% 
  filter(Res == Res[5]) %>% 
  arrange(percent) %>% 
  pull(BPE) -> order_x
order_x
##  [1] BRC BSA BRE BST BIN BEN BHE BWE BSC BCO
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot: 
my_colors <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font <- "Roboto Condensed"

df_for_ploting %>% 
  mutate(BPE = factor(BPE, levels = order_x), Res = factor(Res, levels = Res[5:1])) -> df_odered
df_odered
## # A tibble: 50 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree    13    6.7  6.7%    
##  2 BST   Disagree         53   27.3  27.32%  
##  3 BST   Normal           57   29.4  29.38%  
##  4 BST   Agree            62   32.0  31.96%  
##  5 BST   Very Agree        9    4.64 4.64%   
##  6 BSC   Very Disagree    15    7.73 7.73%   
##  7 BSC   Disagree          7    3.61 3.61%   
##  8 BSC   Normal           34   17.5  17.53%  
##  9 BSC   Agree            93   47.9  47.94%  
## 10 BSC   Very Agree       45   23.2  23.2%   
## # ... with 40 more rows
library(extrafont)
## Registering fonts with R
theme_set(theme_minimal())
gg <- df_odered %>% 
  ggplot(aes(x = BPE, y = percent, fill = Res)) + 
  geom_col(width = 0.8) + 
  coord_flip() + 
  scale_fill_manual(values = my_colors[5:1], name = "") + 
  theme(legend.position = "top") + 
  theme(text = element_text(family = my_font)) + 
  guides(fill = guide_legend(reverse = TRUE)) + 
  scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) + 
  theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) + 
  theme(axis.text = element_text(color = "grey20", size = 10.2)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) + 
  theme(legend.key.height = unit(0.15, "mm")) + 
  labs(x = NULL, y = NULL, 
       title = "Bus perception of people who have ability to shift to bus", 
       subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# For displaying percent of "Very Disagree": 
df_odered %>% 
  filter(Res == "Very Disagree") %>% 
  filter(percent >= 3) -> df_for_text1
df_for_text1
## # A tibble: 10 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree    13    6.7  6.7%    
##  2 BSC   Very Disagree    15    7.73 7.73%   
##  3 BSA   Very Disagree    14    7.22 7.22%   
##  4 BRE   Very Disagree     9    4.64 4.64%   
##  5 BCO   Very Disagree    19    9.79 9.79%   
##  6 BIN   Very Disagree    12    6.19 6.19%   
##  7 BHE   Very Disagree    11    5.67 5.67%   
##  8 BEN   Very Disagree    12    6.19 6.19%   
##  9 BRC   Very Disagree    17    8.76 8.76%   
## 10 BWE   Very Disagree    14    7.22 7.22%
# For displaying percent of "Disagree": 
df_odered %>% 
  filter(Res == "Disagree") %>% 
  filter(percent >= 3) -> df_for_text2
df_for_text2
## # A tibble: 10 x 5
##    BPE   Res          n percent bar_text
##    <fct> <fct>    <int>   <dbl> <chr>   
##  1 BST   Disagree    53   27.3  27.32%  
##  2 BSC   Disagree     7    3.61 3.61%   
##  3 BSA   Disagree    63   32.5  32.47%  
##  4 BRE   Disagree    57   29.4  29.38%  
##  5 BCO   Disagree    10    5.15 5.15%   
##  6 BIN   Disagree    29   15.0  14.95%  
##  7 BHE   Disagree    12    6.19 6.19%   
##  8 BEN   Disagree    18    9.28 9.28%   
##  9 BRC   Disagree    75   38.7  38.66%  
## 10 BWE   Disagree    10    5.15 5.15%
# For displaying percent of "Normal": 
df_odered %>% 
  filter(Res == "Normal") %>% 
  filter(percent >= 3) -> df_for_text3
df_for_text3
## # A tibble: 10 x 5
##    BPE   Res        n percent bar_text
##    <fct> <fct>  <int>   <dbl> <chr>   
##  1 BST   Normal    57    29.4 29.38%  
##  2 BSC   Normal    34    17.5 17.53%  
##  3 BSA   Normal    57    29.4 29.38%  
##  4 BRE   Normal    73    37.6 37.63%  
##  5 BCO   Normal    21    10.8 10.82%  
##  6 BIN   Normal    57    29.4 29.38%  
##  7 BHE   Normal    31    16.0 15.98%  
##  8 BEN   Normal    32    16.5 16.49%  
##  9 BRC   Normal    53    27.3 27.32%  
## 10 BWE   Normal    25    12.9 12.89%
# For displaying percent of "Agree": 
df_odered %>% 
  filter(Res == "Agree") %>% 
  filter(percent >= 3) -> df_for_text4
df_for_text4
## # A tibble: 10 x 5
##    BPE   Res       n percent bar_text
##    <fct> <fct> <int>   <dbl> <chr>   
##  1 BST   Agree    62    32.0 31.96%  
##  2 BSC   Agree    93    47.9 47.94%  
##  3 BSA   Agree    54    27.8 27.84%  
##  4 BRE   Agree    49    25.3 25.26%  
##  5 BCO   Agree    96    49.5 49.48%  
##  6 BIN   Agree    83    42.8 42.78%  
##  7 BHE   Agree   122    62.9 62.89%  
##  8 BEN   Agree   116    59.8 59.79%  
##  9 BRC   Agree    46    23.7 23.71%  
## 10 BWE   Agree   106    54.6 54.64%
# For displaying percent of "Very Agree": 
df_odered %>% 
  filter(Res == "Very Agree") %>% 
  filter(percent >= 3) -> df_for_text5
df_for_text5
## # A tibble: 9 x 5
##   BPE   Res            n percent bar_text
##   <fct> <fct>      <int>   <dbl> <chr>   
## 1 BST   Very Agree     9    4.64 4.64%   
## 2 BSC   Very Agree    45   23.2  23.2%   
## 3 BSA   Very Agree     6    3.09 3.09%   
## 4 BRE   Very Agree     6    3.09 3.09%   
## 5 BCO   Very Agree    48   24.7  24.74%  
## 6 BIN   Very Agree    13    6.7  6.7%    
## 7 BHE   Very Agree    18    9.28 9.28%   
## 8 BEN   Very Agree    16    8.25 8.25%   
## 9 BWE   Very Agree    39   20.1  20.1%
# For displaying percent of "Very Agree": 
df_odered %>% 
  filter(Res == "Very Agree") -> df_for_text
df_for_text
## # A tibble: 10 x 5
##    BPE   Res            n percent bar_text
##    <fct> <fct>      <int>   <dbl> <chr>   
##  1 BST   Very Agree     9    4.64 4.64%   
##  2 BSC   Very Agree    45   23.2  23.2%   
##  3 BSA   Very Agree     6    3.09 3.09%   
##  4 BRE   Very Agree     6    3.09 3.09%   
##  5 BCO   Very Agree    48   24.7  24.74%  
##  6 BIN   Very Agree    13    6.7  6.7%    
##  7 BHE   Very Agree    18    9.28 9.28%   
##  8 BEN   Very Agree    16    8.25 8.25%   
##  9 BRC   Very Agree     3    1.55 1.55%   
## 10 BWE   Very Agree    39   20.1  20.1%
# Ad text layers-thay đổi giá trị y để thể hiện vị trí ghi text phù hợp
gg + 
  geom_text(data = df_for_text1 %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font) + 
  geom_text(data = df_for_text1 %>% filter(BPE == "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font) +
  geom_text(data = df_for_text, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

8.3. Vẽ biểu đồ likert cho nhóm đối tượng chuyển đổi và đã từng sử dụng xe buýt trước đó - BP_MS_BU

# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_MS_BU)
##    Res BPE
## 8    5 BST
## 12   3 BST
## 15   2 BST
## 16   3 BST
## 18   1 BST
## 19   1 BST
attach(BP_MS_BU)
## The following objects are masked from BP_MS:
## 
##     BPE, Res
BP_MS_BU = within(BP_MS_BU, {
  Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
  BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
  })
str(BP_MS_BU)
## 'data.frame':    1400 obs. of  2 variables:
##  $ Res: Factor w/ 5 levels "Very Disagree",..: 5 3 2 3 1 1 4 4 4 4 ...
##  $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_MS_BU)
##             Res           BPE     
##  Very Disagree:108   BST    :140  
##  Disagree     :202   BSC    :140  
##  Normal       :339   BSA    :140  
##  Agree        :603   BRE    :140  
##  Very Agree   :148   BCO    :140  
##                      BIN    :140  
##                      (Other):560
t_BU = compareGroups(Res ~ BPE, data = BP_MS_BU)
createTable(t_BU)
## 
## --------Summary descriptives table by 'Res'---------
## 
## ___________________________________________________________________________ 
##         Very Disagree  Disagree    Normal     Agree    Very Agree p.overall 
##             N=108       N=202      N=339      N=603      N=148              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## BPE:                                                               <0.001   
##     BST   9 (8.33%)   33 (16.3%) 45 (13.3%) 45 (7.46%) 8 (5.41%)            
##     BSC  14 (13.0%)   3 (1.49%)  28 (8.26%) 67 (11.1%) 28 (18.9%)           
##     BSA  10 (9.26%)   47 (23.3%) 42 (12.4%) 36 (5.97%) 5 (3.38%)            
##     BRE   8 (7.41%)   33 (16.3%) 56 (16.5%) 37 (6.14%) 6 (4.05%)            
##     BCO  14 (13.0%)   5 (2.48%)  18 (5.31%) 72 (11.9%) 31 (20.9%)           
##     BIN   9 (8.33%)   18 (8.91%) 42 (12.4%) 59 (9.78%) 12 (8.11%)           
##     BHE  10 (9.26%)   4 (1.98%)  23 (6.78%) 89 (14.8%) 14 (9.46%)           
##     BEN  12 (11.1%)   9 (4.46%)  25 (7.37%) 82 (13.6%) 12 (8.11%)           
##     BRC  12 (11.1%)   47 (23.3%) 42 (12.4%) 36 (5.97%) 3 (2.03%)            
##     BWE  10 (9.26%)   3 (1.49%)  18 (5.31%) 80 (13.3%) 29 (19.6%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_MS_BU %>% 
  group_by(BPE, Res) %>% 
  count() %>% 
  ungroup() %>% 
  group_by(BPE) %>% 
  mutate(percent = 100*n / sum(n)) %>% 
  mutate(percent = round(percent, 2)) %>% 
  mutate(bar_text = paste0(percent, "%")) %>% 
  ungroup() -> df_for_ploting_BU
df_for_ploting_BU %>% 
  filter(Res == Res[5]) %>% 
  arrange(percent) %>% 
  pull(BPE) -> order_x_BU
order_x_BU
##  [1] BRC BSA BRE BST BIN BEN BHE BSC BWE BCO
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot: 
my_colors_BU <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font_BU <- "Roboto Condensed"

df_for_ploting_BU %>% 
  mutate(BPE = factor(BPE, levels = order_x_BU), Res = factor(Res, levels = Res[5:1])) -> df_odered_BU
df_odered_BU
## # A tibble: 50 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree     9    6.43 6.43%   
##  2 BST   Disagree         33   23.6  23.57%  
##  3 BST   Normal           45   32.1  32.14%  
##  4 BST   Agree            45   32.1  32.14%  
##  5 BST   Very Agree        8    5.71 5.71%   
##  6 BSC   Very Disagree    14   10    10%     
##  7 BSC   Disagree          3    2.14 2.14%   
##  8 BSC   Normal           28   20    20%     
##  9 BSC   Agree            67   47.9  47.86%  
## 10 BSC   Very Agree       28   20    20%     
## # ... with 40 more rows
library(extrafont)
theme_set(theme_minimal())
gg_BU <- df_odered_BU %>% 
  ggplot(aes(x = BPE, y = percent, fill = Res)) + 
  geom_col(width = 0.8) + 
  coord_flip() + 
  scale_fill_manual(values = my_colors[5:1], name = "") + 
  theme(legend.position = "top") + 
  theme(text = element_text(family = my_font)) + 
  guides(fill = guide_legend(reverse = TRUE)) + 
  scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) + 
  theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) + 
  theme(axis.text = element_text(color = "grey20", size = 10.2)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) + 
  theme(legend.key.height = unit(0.15, "mm")) + 
  labs(x = NULL, y = NULL, 
       title = "Bus perception of people shifting to bus and being used to use bus before", 
       subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg_BU
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# For displaying percent of "Very Disagree": 
df_odered_BU %>% 
  filter(Res == "Very Disagree") %>% 
  filter(percent >= 3) -> df_for_text1_BU
df_for_text1_BU
## # A tibble: 10 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree     9    6.43 6.43%   
##  2 BSC   Very Disagree    14   10    10%     
##  3 BSA   Very Disagree    10    7.14 7.14%   
##  4 BRE   Very Disagree     8    5.71 5.71%   
##  5 BCO   Very Disagree    14   10    10%     
##  6 BIN   Very Disagree     9    6.43 6.43%   
##  7 BHE   Very Disagree    10    7.14 7.14%   
##  8 BEN   Very Disagree    12    8.57 8.57%   
##  9 BRC   Very Disagree    12    8.57 8.57%   
## 10 BWE   Very Disagree    10    7.14 7.14%
# For displaying percent of "Disagree": 
df_odered_BU %>% 
  filter(Res == "Disagree") %>% 
  filter(percent >= 3) -> df_for_text2_BU
df_for_text2_BU
## # A tibble: 7 x 5
##   BPE   Res          n percent bar_text
##   <fct> <fct>    <int>   <dbl> <chr>   
## 1 BST   Disagree    33   23.6  23.57%  
## 2 BSA   Disagree    47   33.6  33.57%  
## 3 BRE   Disagree    33   23.6  23.57%  
## 4 BCO   Disagree     5    3.57 3.57%   
## 5 BIN   Disagree    18   12.9  12.86%  
## 6 BEN   Disagree     9    6.43 6.43%   
## 7 BRC   Disagree    47   33.6  33.57%
# For displaying percent of "Normal": 
df_odered_BU %>% 
  filter(Res == "Normal") %>% 
  filter(percent >= 3) -> df_for_text3_BU
df_for_text3_BU
## # A tibble: 10 x 5
##    BPE   Res        n percent bar_text
##    <fct> <fct>  <int>   <dbl> <chr>   
##  1 BST   Normal    45    32.1 32.14%  
##  2 BSC   Normal    28    20   20%     
##  3 BSA   Normal    42    30   30%     
##  4 BRE   Normal    56    40   40%     
##  5 BCO   Normal    18    12.9 12.86%  
##  6 BIN   Normal    42    30   30%     
##  7 BHE   Normal    23    16.4 16.43%  
##  8 BEN   Normal    25    17.9 17.86%  
##  9 BRC   Normal    42    30   30%     
## 10 BWE   Normal    18    12.9 12.86%
# For displaying percent of "Agree": 
df_odered_BU %>% 
  filter(Res == "Agree") %>% 
  filter(percent >= 3) -> df_for_text4_BU
df_for_text4_BU
## # A tibble: 10 x 5
##    BPE   Res       n percent bar_text
##    <fct> <fct> <int>   <dbl> <chr>   
##  1 BST   Agree    45    32.1 32.14%  
##  2 BSC   Agree    67    47.9 47.86%  
##  3 BSA   Agree    36    25.7 25.71%  
##  4 BRE   Agree    37    26.4 26.43%  
##  5 BCO   Agree    72    51.4 51.43%  
##  6 BIN   Agree    59    42.1 42.14%  
##  7 BHE   Agree    89    63.6 63.57%  
##  8 BEN   Agree    82    58.6 58.57%  
##  9 BRC   Agree    36    25.7 25.71%  
## 10 BWE   Agree    80    57.1 57.14%
# For displaying percent of "Very Agree": 
df_odered_BU %>% 
  filter(Res == "Very Agree") %>% 
  filter(percent >= 3) -> df_for_text5_BU
df_for_text5_BU
## # A tibble: 9 x 5
##   BPE   Res            n percent bar_text
##   <fct> <fct>      <int>   <dbl> <chr>   
## 1 BST   Very Agree     8    5.71 5.71%   
## 2 BSC   Very Agree    28   20    20%     
## 3 BSA   Very Agree     5    3.57 3.57%   
## 4 BRE   Very Agree     6    4.29 4.29%   
## 5 BCO   Very Agree    31   22.1  22.14%  
## 6 BIN   Very Agree    12    8.57 8.57%   
## 7 BHE   Very Agree    14   10    10%     
## 8 BEN   Very Agree    12    8.57 8.57%   
## 9 BWE   Very Agree    29   20.7  20.71%
# For displaying percent of "Very Agree": 
df_odered_BU %>% 
  filter(Res == "Very Agree") -> df_for_text_BU
df_for_text_BU
## # A tibble: 10 x 5
##    BPE   Res            n percent bar_text
##    <fct> <fct>      <int>   <dbl> <chr>   
##  1 BST   Very Agree     8    5.71 5.71%   
##  2 BSC   Very Agree    28   20    20%     
##  3 BSA   Very Agree     5    3.57 3.57%   
##  4 BRE   Very Agree     6    4.29 4.29%   
##  5 BCO   Very Agree    31   22.1  22.14%  
##  6 BIN   Very Agree    12    8.57 8.57%   
##  7 BHE   Very Agree    14   10    10%     
##  8 BEN   Very Agree    12    8.57 8.57%   
##  9 BRC   Very Agree     3    2.14 2.14%   
## 10 BWE   Very Agree    29   20.7  20.71%
# Ad text layers-thay đổi giá trị y để thể hiện vị trí ghi text phù hợp
gg_BU + 
  geom_text(data = df_for_text1_BU %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_BU) + 
  geom_text(data = df_for_text1_BU %>% filter(BPE == "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_BU) +
  geom_text(data = df_for_text_BU, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font_BU)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

8.4. Vẽ biểu đồ likert cho nhóm đối tượng không chuyển đổi - BP_nMS

# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_nMS)
##   Res BPE
## 1   3 BST
## 2   2 BST
## 3   2 BST
## 4   3 BST
## 5   3 BST
## 6   4 BST
attach(BP_nMS)
## The following objects are masked from BP_MS_BU:
## 
##     BPE, Res
## The following objects are masked from BP_MS:
## 
##     BPE, Res
BP_nMS = within(BP_nMS, {
  Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
  BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
  })
str(BP_nMS)
## 'data.frame':    5300 obs. of  2 variables:
##  $ Res: Factor w/ 5 levels "Very Disagree",..: 3 2 2 3 3 4 4 4 2 2 ...
##  $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_nMS)
##             Res            BPE      
##  Very Disagree: 207   BST    : 530  
##  Disagree     : 967   BSC    : 530  
##  Normal       :1435   BSA    : 530  
##  Agree        :2246   BRE    : 530  
##  Very Agree   : 445   BCO    : 530  
##                       BIN    : 530  
##                       (Other):2120
t_nMS = compareGroups(Res ~ BPE, data = BP_nMS)
createTable(t_nMS)
## 
## --------Summary descriptives table by 'Res'---------
## 
## _______________________________________________________________________________ 
##         Very Disagree  Disagree     Normal       Agree    Very Agree  p.overall 
##             N=207        N=967      N=1435      N=2246       N=445              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## BPE:                                                                   <0.001   
##     BST  15 (7.25%)   143 (14.8%) 166 (11.6%) 176 (7.84%) 30 (6.74%)            
##     BSC  18 (8.70%)   42 (4.34%)  119 (8.29%) 285 (12.7%) 66 (14.8%)            
##     BSA  18 (8.70%)   210 (21.7%) 177 (12.3%) 103 (4.59%) 22 (4.94%)            
##     BRE  22 (10.6%)   113 (11.7%) 223 (15.5%) 151 (6.72%) 21 (4.72%)            
##     BCO  17 (8.21%)   45 (4.65%)  95 (6.62%)  295 (13.1%) 78 (17.5%)            
##     BIN  12 (5.80%)   93 (9.62%)  158 (11.0%) 234 (10.4%) 33 (7.42%)            
##     BHE  20 (9.66%)   35 (3.62%)  147 (10.2%) 283 (12.6%) 45 (10.1%)            
##     BEN  25 (12.1%)   60 (6.20%)  136 (9.48%) 271 (12.1%) 38 (8.54%)            
##     BRC  47 (22.7%)   195 (20.2%) 161 (11.2%) 117 (5.21%) 10 (2.25%)            
##     BWE  13 (6.28%)   31 (3.21%)  53 (3.69%)  331 (14.7%) 102 (22.9%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_nMS %>% 
  group_by(BPE, Res) %>% 
  count() %>% 
  ungroup() %>% 
  group_by(BPE) %>% 
  mutate(percent = 100*n / sum(n)) %>% 
  mutate(percent = round(percent, 2)) %>% 
  mutate(bar_text = paste0(percent, "%")) %>% 
  ungroup() -> df_for_ploting_nMS
df_for_ploting_nMS %>% 
  filter(Res == Res[5]) %>% 
  arrange(percent) %>% 
  pull(BPE) -> order_x_nMS
order_x_nMS
##  [1] BRC BRE BSA BST BIN BEN BHE BSC BCO BWE
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot: 
my_colors_nMS <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font_nMS <- "Roboto Condensed"

df_for_ploting_nMS %>% 
  mutate(BPE = factor(BPE, levels = order_x_nMS), Res = factor(Res, levels = Res[5:1])) -> df_odered_nMS
df_odered_nMS
## # A tibble: 50 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree    15    2.83 2.83%   
##  2 BST   Disagree        143   27.0  26.98%  
##  3 BST   Normal          166   31.3  31.32%  
##  4 BST   Agree           176   33.2  33.21%  
##  5 BST   Very Agree       30    5.66 5.66%   
##  6 BSC   Very Disagree    18    3.4  3.4%    
##  7 BSC   Disagree         42    7.92 7.92%   
##  8 BSC   Normal          119   22.4  22.45%  
##  9 BSC   Agree           285   53.8  53.77%  
## 10 BSC   Very Agree       66   12.4  12.45%  
## # ... with 40 more rows
library(extrafont)
theme_set(theme_minimal())
gg_nMS <- df_odered_nMS %>% 
  ggplot(aes(x = BPE, y = percent, fill = Res)) + 
  geom_col(width = 0.8) + 
  coord_flip() + 
  scale_fill_manual(values = my_colors[5:1], name = "") + 
  theme(legend.position = "top") + 
  theme(text = element_text(family = my_font)) + 
  guides(fill = guide_legend(reverse = TRUE)) + 
  scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) + 
  theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) + 
  theme(axis.text = element_text(color = "grey20", size = 10.2)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) + 
  theme(legend.key.height = unit(0.15, "mm")) + 
  labs(x = NULL, y = NULL, 
       title = "Bus perception of people not shifting to bus", 
       subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg_nMS
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# For displaying percent of "Very Disagree": 
df_odered_nMS %>% 
  filter(Res == "Very Disagree") -> df_for_text1_nMS
df_for_text1_nMS
## # A tibble: 10 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree    15    2.83 2.83%   
##  2 BSC   Very Disagree    18    3.4  3.4%    
##  3 BSA   Very Disagree    18    3.4  3.4%    
##  4 BRE   Very Disagree    22    4.15 4.15%   
##  5 BCO   Very Disagree    17    3.21 3.21%   
##  6 BIN   Very Disagree    12    2.26 2.26%   
##  7 BHE   Very Disagree    20    3.77 3.77%   
##  8 BEN   Very Disagree    25    4.72 4.72%   
##  9 BRC   Very Disagree    47    8.87 8.87%   
## 10 BWE   Very Disagree    13    2.45 2.45%
# For displaying percent of "Disagree": 
df_odered_nMS %>% 
  filter(Res == "Disagree") %>% 
  filter(percent >= 3) -> df_for_text2_nMS
df_for_text2_nMS
## # A tibble: 10 x 5
##    BPE   Res          n percent bar_text
##    <fct> <fct>    <int>   <dbl> <chr>   
##  1 BST   Disagree   143   27.0  26.98%  
##  2 BSC   Disagree    42    7.92 7.92%   
##  3 BSA   Disagree   210   39.6  39.62%  
##  4 BRE   Disagree   113   21.3  21.32%  
##  5 BCO   Disagree    45    8.49 8.49%   
##  6 BIN   Disagree    93   17.6  17.55%  
##  7 BHE   Disagree    35    6.6  6.6%    
##  8 BEN   Disagree    60   11.3  11.32%  
##  9 BRC   Disagree   195   36.8  36.79%  
## 10 BWE   Disagree    31    5.85 5.85%
# For displaying percent of "Normal": 
df_odered_nMS %>% 
  filter(Res == "Normal") %>% 
  filter(percent >= 3) -> df_for_text3_nMS
df_for_text3_nMS
## # A tibble: 10 x 5
##    BPE   Res        n percent bar_text
##    <fct> <fct>  <int>   <dbl> <chr>   
##  1 BST   Normal   166    31.3 31.32%  
##  2 BSC   Normal   119    22.4 22.45%  
##  3 BSA   Normal   177    33.4 33.4%   
##  4 BRE   Normal   223    42.1 42.08%  
##  5 BCO   Normal    95    17.9 17.92%  
##  6 BIN   Normal   158    29.8 29.81%  
##  7 BHE   Normal   147    27.7 27.74%  
##  8 BEN   Normal   136    25.7 25.66%  
##  9 BRC   Normal   161    30.4 30.38%  
## 10 BWE   Normal    53    10   10%
# For displaying percent of "Agree": 
df_odered_nMS %>% 
  filter(Res == "Agree") %>% 
  filter(percent >= 3) -> df_for_text4_nMS
df_for_text4_nMS
## # A tibble: 10 x 5
##    BPE   Res       n percent bar_text
##    <fct> <fct> <int>   <dbl> <chr>   
##  1 BST   Agree   176    33.2 33.21%  
##  2 BSC   Agree   285    53.8 53.77%  
##  3 BSA   Agree   103    19.4 19.43%  
##  4 BRE   Agree   151    28.5 28.49%  
##  5 BCO   Agree   295    55.7 55.66%  
##  6 BIN   Agree   234    44.2 44.15%  
##  7 BHE   Agree   283    53.4 53.4%   
##  8 BEN   Agree   271    51.1 51.13%  
##  9 BRC   Agree   117    22.1 22.08%  
## 10 BWE   Agree   331    62.4 62.45%
# For displaying percent of "Very Agree": 
df_odered_nMS %>% 
  filter(Res == "Very Agree") %>% 
  filter(percent >= 3) -> df_for_text5_nMS
df_for_text5_nMS
## # A tibble: 9 x 5
##   BPE   Res            n percent bar_text
##   <fct> <fct>      <int>   <dbl> <chr>   
## 1 BST   Very Agree    30    5.66 5.66%   
## 2 BSC   Very Agree    66   12.4  12.45%  
## 3 BSA   Very Agree    22    4.15 4.15%   
## 4 BRE   Very Agree    21    3.96 3.96%   
## 5 BCO   Very Agree    78   14.7  14.72%  
## 6 BIN   Very Agree    33    6.23 6.23%   
## 7 BHE   Very Agree    45    8.49 8.49%   
## 8 BEN   Very Agree    38    7.17 7.17%   
## 9 BWE   Very Agree   102   19.2  19.25%
# For displaying percent of "Very Agree": 
df_odered_nMS %>% 
  filter(Res == "Very Agree") -> df_for_text_nMS
df_for_text_nMS
## # A tibble: 10 x 5
##    BPE   Res            n percent bar_text
##    <fct> <fct>      <int>   <dbl> <chr>   
##  1 BST   Very Agree    30    5.66 5.66%   
##  2 BSC   Very Agree    66   12.4  12.45%  
##  3 BSA   Very Agree    22    4.15 4.15%   
##  4 BRE   Very Agree    21    3.96 3.96%   
##  5 BCO   Very Agree    78   14.7  14.72%  
##  6 BIN   Very Agree    33    6.23 6.23%   
##  7 BHE   Very Agree    45    8.49 8.49%   
##  8 BEN   Very Agree    38    7.17 7.17%   
##  9 BRC   Very Agree    10    1.89 1.89%   
## 10 BWE   Very Agree   102   19.2  19.25%
# Ad text layers: 
gg_nMS + 
  geom_text(data = df_for_text1_nMS %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_nMS) + 
  geom_text(data = df_for_text_nMS, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font_nMS)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database