1. Read and Create Data

t = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 3-Mode shift\\So lieu mode choice tu phan 1\\7. Data MSI - 810.csv"
MS = read.csv(t, header = T, sep = ";")
head(MS)
##   ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1  1  0  1  6  3  3  1  2 10  1  1  0  4  1  0   1 15   1   3   3   3   4   4
## 2  2  0  1  3  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5
## 3  3  0  1  3  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4
## 4  4  0  1  3  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4
## 5  5  0  1  3  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4
## 6  6  0  1  4  1  4  1 20 30  1  1  0  5  3  0   2 50   0   3   2   4   4   4
##   BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1   4   4   3   4   4   4   4  1  1  1   0  0  5  4  2  2  0  0  0  0  0  1  2
## 2   2   2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3
## 3   3   3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3
## 4   4   4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3
## 5   2   2   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2
## 6   3   3   3   2   4   4   4  2  0  0   0  1  4  3  4  2  1  1  1  1  1  1  2
##   NR
## 1  1
## 2  0
## 3  0
## 4  0
## 5  0
## 6  1
str(MS$MSI)
##  int [1:810] 0 0 0 0 0 0 0 1 0 0 ...
# View data (number of rows and colume)
dim(MS) 
## [1] 810  47
# Data non bus
MotorS = subset(MS,TM == 3)
MotorS = MotorS[, 2:47]
head(MotorS)
##   CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE
## 2  0  1  3  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5   2
## 3  0  1  3  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4   3
## 4  0  1  3  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4   4
## 5  0  1  3  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4   2
## 8  0  1  3  1  4  1 10 25  1  1  0  2  3  1   2  5   1   3   2   5   5   5   3
## 9  0  1  3  1  4  2 12 30  1  1  1  2  1  0   1  7   1   3   3   4   4   4   3
##   BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 2   2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3  0
## 3   3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3  0
## 4   4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3  0
## 5   2   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2  0
## 8   3   3   3   5   5   5  1  1  2   1  0  4  7  2  0  1  0  0  1  0  0  2  0
## 9   3   3   3   4   4   4  1  1  3   0  0  3  7  3  1  1  0  1  1  0  1  3  0
dim(MotorS)
## [1] 516  46
# Sumerize Data
summary(MotorS) 
##        CA               BC               TM          TP              FR       
##  Min.   :0.0000   Min.   :0.0000   Min.   :3   Min.   :1.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:3   1st Qu.:1.000   1st Qu.:4.000  
##  Median :0.0000   Median :1.0000   Median :3   Median :1.000   Median :4.000  
##  Mean   :0.4516   Mean   :0.5116   Mean   :3   Mean   :1.527   Mean   :3.736  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:3   3rd Qu.:2.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :3   Max.   :4.000   Max.   :4.000  
##        DT              DI               TI               SC        
##  Min.   :1.000   Min.   : 0.200   Min.   :  0.00   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.: 3.000   1st Qu.: 10.00   1st Qu.:1.0000  
##  Median :1.000   Median : 5.000   Median : 15.00   Median :1.0000  
##  Mean   :1.804   Mean   : 6.289   Mean   : 15.84   Mean   :0.8469  
##  3rd Qu.:3.000   3rd Qu.:10.000   3rd Qu.: 20.00   3rd Qu.:1.0000  
##  Max.   :4.000   Max.   :30.000   Max.   :180.00   Max.   :1.0000  
##        LS               TS               MR              WE       
##  Min.   :0.0000   Min.   :0.0000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :1.0000   Median :1.0000   Median :2.000   Median :1.000  
##  Mean   :0.7868   Mean   :0.7403   Mean   :2.924   Mean   :1.616  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:3.000  
##  Max.   :1.0000   Max.   :3.0000   Max.   :6.000   Max.   :3.000  
##        WK              NBR              CO              BUB        
##  Min.   :0.0000   Min.   :1.000   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.: 5.000   1st Qu.:0.0000  
##  Median :0.0000   Median :2.000   Median :10.000   Median :1.0000  
##  Mean   :0.2229   Mean   :2.841   Mean   : 9.595   Mean   :0.6163  
##  3rd Qu.:0.0000   3rd Qu.:4.000   3rd Qu.:12.000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :6.000   Max.   :50.000   Max.   :1.0000  
##       SBM            SBS             BST             BSC            BSA       
##  Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  1st Qu.:2.00   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:3.00   1st Qu.:3.000  
##  Median :2.00   Median :2.000   Median :3.000   Median :4.00   Median :4.000  
##  Mean   :2.63   Mean   :2.471   Mean   :2.684   Mean   :3.45   Mean   :3.578  
##  3rd Qu.:3.00   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:4.00   3rd Qu.:4.000  
##  Max.   :5.00   Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000  
##       BRE             BCO             BIN             BHE       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :4.000   Median :3.000   Median :3.000   Median :3.000  
##  Mean   :3.351   Mean   :3.103   Mean   :3.025   Mean   :2.818  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:3.250  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##       BEN             BRC             BWE              SP       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :0.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:4.000   1st Qu.:1.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :1.000  
##  Mean   :3.715   Mean   :3.733   Mean   :3.895   Mean   :1.014  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:1.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :2.000  
##        BI               WI             MSI               GD        
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.0000   Median :1.000   Median :0.0000   Median :1.0000  
##  Mean   :0.5698   Mean   :1.163   Mean   :0.2422   Mean   :0.5717  
##  3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :6.000   Max.   :1.0000   Max.   :1.0000  
##        AG              OC              IN             NC        
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :0.0000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.00   1st Qu.:0.0000  
##  Median :4.000   Median :4.000   Median :2.00   Median :1.0000  
##  Mean   :3.669   Mean   :4.496   Mean   :2.18   Mean   :0.7209  
##  3rd Qu.:4.000   3rd Qu.:7.000   3rd Qu.:3.00   3rd Qu.:1.0000  
##  Max.   :6.000   Max.   :8.000   Max.   :4.00   Max.   :3.0000  
##        MC               CC               BO               MO        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :1.0000   Median :0.0000   Median :0.0000   Median :1.0000  
##  Mean   :0.9516   Mean   :0.1376   Mean   :0.3508   Mean   :0.9186  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##        RO                NB              NM              NR        
##  Min.   :0.00000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:2.000   1st Qu.:0.0000  
##  Median :0.00000   Median :1.000   Median :2.000   Median :0.0000  
##  Mean   :0.07946   Mean   :0.655   Mean   :2.271   Mean   :0.1705  
##  3rd Qu.:0.00000   3rd Qu.:1.000   3rd Qu.:3.000   3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :4.000   Max.   :4.000   Max.   :2.0000
head(MotorS$TM)
## [1] 3 3 3 3 3 3
str(MotorS)
## 'data.frame':    516 obs. of  46 variables:
##  $ CA : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ BC : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ TM : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ TP : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ FR : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ DT : int  1 3 1 3 1 2 1 1 1 1 ...
##  $ DI : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ TI : num  15 15 10 15 25 30 25 5 5 10 ...
##  $ SC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LS : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TS : int  1 1 1 0 0 1 1 0 1 0 ...
##  $ MR : int  2 4 2 2 2 2 2 2 4 3 ...
##  $ WE : int  3 1 1 3 3 1 3 1 3 1 ...
##  $ WK : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ NBR: int  1 2 4 2 2 1 4 4 4 2 ...
##  $ CO : num  5 3 10 5 5 7 7 3 3 10 ...
##  $ BUB: int  0 1 1 1 1 1 1 1 1 1 ...
##  $ SBM: int  2 2 2 2 3 3 3 1 4 4 ...
##  $ SBS: int  2 2 2 2 2 3 2 1 3 2 ...
##  $ BST: int  2 2 3 3 5 4 2 3 2 3 ...
##  $ BSC: int  4 4 4 4 5 4 5 3 4 3 ...
##  $ BSA: int  5 4 4 4 5 4 5 4 4 4 ...
##  $ BRE: int  2 3 4 2 3 3 3 1 4 4 ...
##  $ BCO: int  2 3 4 2 3 3 2 4 4 2 ...
##  $ BIN: int  4 4 4 3 3 3 4 3 4 4 ...
##  $ BHE: int  2 2 3 2 3 3 2 4 3 2 ...
##  $ BEN: int  5 4 3 4 5 4 5 4 4 4 ...
##  $ BRC: int  5 4 3 4 5 4 5 4 5 4 ...
##  $ BWE: int  3 4 5 4 5 4 4 4 5 4 ...
##  $ SP : int  2 1 1 2 1 1 1 1 1 1 ...
##  $ BI : int  0 1 1 0 1 1 1 1 1 1 ...
##  $ WI : int  0 2 1 0 2 3 2 1 1 1 ...
##  $ MSI: int  0 0 0 0 1 0 0 0 1 1 ...
##  $ GD : int  0 1 1 0 0 0 1 1 1 1 ...
##  $ AG : int  3 3 3 4 4 3 4 2 4 3 ...
##  $ OC : int  6 2 2 6 7 7 3 1 7 7 ...
##  $ IN : int  3 4 3 3 2 3 3 2 2 1 ...
##  $ NC : int  1 0 0 1 0 1 0 0 1 0 ...
##  $ MC : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ CC : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BO : int  1 0 1 0 0 1 0 1 0 0 ...
##  $ MO : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ RO : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NB : int  1 0 1 0 0 1 0 1 2 0 ...
##  $ NM : int  3 3 3 2 2 3 3 3 2 1 ...
##  $ NR : int  0 0 0 0 0 0 0 0 0 0 ...
MotorS = MotorS [ , -3] # Bỏ biến TM
dim(MotorS)
## [1] 516  45
head(MotorS)
##   CA BC TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE BCO
## 2  0  1  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5   2   2
## 3  0  1  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4   3   3
## 4  0  1  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4   4   4
## 5  0  1  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4   2   2
## 8  0  1  1  4  1 10 25  1  1  0  2  3  1   2  5   1   3   2   5   5   5   3   3
## 9  0  1  1  4  2 12 30  1  1  1  2  1  0   1  7   1   3   3   4   4   4   3   3
##   BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3  0
## 3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3  0
## 4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3  0
## 5   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2  0
## 8   3   3   5   5   5  1  1  2   1  0  4  7  2  0  1  0  0  1  0  0  2  0
## 9   3   3   4   4   4  1  1  3   0  0  3  7  3  1  1  0  1  1  0  1  3  0
# Đổi biến AG (1-<=24,2-24-60,3->60)
MotorS$AG[MotorS$AG == 2] <- 1
MotorS$AG[MotorS$AG == 3] <- 1
MotorS$AG[MotorS$AG == 4] <- 2
MotorS$AG[MotorS$AG == 5] <- 2
MotorS$AG[MotorS$AG == 6] <- 3
head(MotorS$AG)
## [1] 1 1 1 2 2 1
# Đổi biến OC (1-HS/SV,2-CNVC,3-Nội trợ,4-LĐ tự do,5-Khác)
MotorS$OC[MotorS$OC == 2] <- 1
MotorS$OC[MotorS$OC == 3] <- 2
MotorS$OC[MotorS$OC == 6] <- 2
MotorS$OC[MotorS$OC == 4] <- 3
MotorS$OC[MotorS$OC == 5] <- 3
MotorS$OC[MotorS$OC == 7] <- 4
MotorS$OC[MotorS$OC == 8] <- 5
head(MotorS$OC)
## [1] 2 1 1 2 4 4
# Đổi biến FR (1-1l/t,2-2l/t,3->=3l/t)
MotorS$FR[MotorS$FR == 4] <- 3
head(MotorS$FR)
## [1] 3 3 3 3 3 3
# Đổi biến DT (1-CĐ/6-8h/16-18h,0-Ngoài giờ CĐ)
MotorS$DT[MotorS$DT == 3] <- 1
MotorS$DT[MotorS$DT == 2] <- 0
MotorS$DT[MotorS$DT == 4] <- 0
head(MotorS$DT)
## [1] 1 1 1 1 1 0
# Đổi biến TS (1-có, 2-không)
MotorS$TS[MotorS$TS == 2] <- 1
MotorS$TS[MotorS$TS == 3] <- 1
head(MotorS$TS)
## [1] 1 1 1 0 0 1
# Đổi biến WE (1-có mưa, 2-không)
MotorS$WE[MotorS$WE == 1] <- 0
MotorS$WE[MotorS$WE == 2] <- 1
MotorS$WE[MotorS$WE == 3] <- 0
head(MotorS$WE)
## [1] 0 0 0 0 0 0
# Đổi biến SBM (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$SBM[MotorS$SBM == 2] <- 1
MotorS$SBM[MotorS$SBM == 3] <- 2
MotorS$SBM[MotorS$SBM == 4] <- 3
MotorS$SBM[MotorS$SBM == 5] <- 3
head(MotorS$SBM)
## [1] 1 1 1 1 2 2
# Đổi biến SBS (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$SBS[MotorS$SBS == 2] <- 1
MotorS$SBS[MotorS$SBS == 3] <- 2
MotorS$SBS[MotorS$SBS == 4] <- 3
MotorS$SBS[MotorS$SBS == 5] <- 3
head(MotorS$SBS)
## [1] 1 1 1 1 1 2
# Đổi biến BST (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BST[MotorS$BST == 2] <- 1
MotorS$BST[MotorS$BST == 3] <- 2
MotorS$BST[MotorS$BST == 4] <- 3
MotorS$BST[MotorS$BST == 5] <- 3
head(MotorS$BST)
## [1] 1 1 2 2 3 3
# Đổi biến BSC (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BSC[MotorS$BSC == 2] <- 1
MotorS$BSC[MotorS$BSC == 3] <- 2
MotorS$BSC[MotorS$BSC == 4] <- 3
MotorS$BSC[MotorS$BSC == 5] <- 3
head(MotorS$BSC)
## [1] 3 3 3 3 3 3
# Đổi biến BSA (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BSA[MotorS$BSA == 2] <- 1
MotorS$BSA[MotorS$BSA == 3] <- 2
MotorS$BSA[MotorS$BSA == 4] <- 3
MotorS$BSA[MotorS$BSA == 5] <- 3
head(MotorS$BSA)
## [1] 3 3 3 3 3 3
# Đổi biến BRE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BRE[MotorS$BRE == 2] <- 1
MotorS$BRE[MotorS$BRE == 3] <- 2
MotorS$BRE[MotorS$BRE == 4] <- 3
MotorS$BRE[MotorS$BRE == 5] <- 3
head(MotorS$BRE)
## [1] 1 2 3 1 2 2
# Đổi biến BCO (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BCO[MotorS$BCO == 2] <- 1
MotorS$BCO[MotorS$BCO == 3] <- 2
MotorS$BCO[MotorS$BCO == 4] <- 3
MotorS$BCO[MotorS$BCO == 5] <- 3
head(MotorS$BCO)
## [1] 1 2 3 1 2 2
# Đổi biến BIN (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BIN[MotorS$BIN == 2] <- 1
MotorS$BIN[MotorS$BIN == 3] <- 2
MotorS$BIN[MotorS$BIN == 4] <- 3
MotorS$BIN[MotorS$BIN == 5] <- 3
head(MotorS$BIN)
## [1] 3 3 3 2 2 2
# Đổi biến BHE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BHE[MotorS$BHE == 2] <- 1
MotorS$BHE[MotorS$BHE == 3] <- 2
MotorS$BHE[MotorS$BHE == 4] <- 3
MotorS$BHE[MotorS$BHE == 5] <- 3
head(MotorS$BHE)
## [1] 1 1 2 1 2 2
# Đổi biến BEN (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BEN[MotorS$BEN == 2] <- 1
MotorS$BEN[MotorS$BEN == 3] <- 2
MotorS$BEN[MotorS$BEN == 4] <- 3
MotorS$BEN[MotorS$BEN == 5] <- 3
head(MotorS$BEN)
## [1] 3 3 2 3 3 3
# Đổi biến BRC (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BRC[MotorS$BRC == 2] <- 1
MotorS$BRC[MotorS$BRC == 3] <- 2
MotorS$BRC[MotorS$BRC == 4] <- 3
MotorS$BRC[MotorS$BRC == 5] <- 3
head(MotorS$BRC)
## [1] 3 3 2 3 3 3
# Đổi biến BWE (1-Không thích/không đồng ý, 2-Bình thường/trung tính, 3-Thích/Đồng ý)
MotorS$BWE[MotorS$BWE == 2] <- 1
MotorS$BWE[MotorS$BWE == 3] <- 2
MotorS$BWE[MotorS$BWE == 4] <- 3
MotorS$BWE[MotorS$BWE == 5] <- 3
head(MotorS$BWE)
## [1] 2 3 3 3 3 3
# Đổi biến NB (0-0xe,1-1xe,2->=2xe)
MotorS$NB[MotorS$NB == 3] <- 2
MotorS$NB[MotorS$NB == 4] <- 2
head(MotorS$NB)
## [1] 1 0 1 0 0 1
# Đổi biến NM (0-0xe,1-1xe,2-2xe,3->=3xe)
MotorS$NM[MotorS$NM == 3] <- 2
MotorS$NM[MotorS$NM == 4] <- 2
head(MotorS$NM)
## [1] 2 2 2 2 2 2
# Đổi biến NR (0-0xe,1-Có xe)
MotorS$NR[MotorS$NR == 2] <- 1  
MotorS$NR[MotorS$NR == 3] <- 1
head(MotorS$NR)
## [1] 0 0 0 0 0 0
# Đổi biến NC (0-No,1-Yes)
MotorS$NC[MotorS$NC == 2] <- 1
MotorS$NC[MotorS$NC == 3] <- 1
head(MotorS$NC)
## [1] 1 0 0 1 0 1
# Đổi biến WI (0-No,1-Yes)
MotorS$WI[MotorS$WI >= 5] <- 4
MotorS$WI[MotorS$WI >= 6] <- 4
head(MotorS$WI)
## [1] 0 2 1 0 2 3
head(MotorS$BUB)
## [1] 0 1 1 1 1 1
head(MotorS)
##   CA BC TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA BRE BCO
## 2  0  1  1  3  1  8 15  1  1  1  2  0  0   1  5   0   1   1   1   3   3   1   1
## 3  0  1  1  3  1  5 15  1  1  1  4  0  0   2  3   1   1   1   1   3   3   2   2
## 4  0  1  1  3  1  5 10  1  1  1  2  0  0   4 10   1   1   1   2   3   3   3   3
## 5  0  1  1  3  1  8 15  1  1  0  2  0  0   2  5   1   1   1   2   3   3   1   1
## 8  0  1  1  3  1 10 25  1  1  0  2  0  1   2  5   1   2   1   3   3   3   2   2
## 9  0  1  1  3  0 12 30  1  1  1  2  0  0   1  7   1   2   2   3   3   3   2   2
##   BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM NR
## 2   3   1   3   3   2  2  0  0   0  0  1  2  3  1  1  0  1  1  0  1  2  0
## 3   3   1   3   3   3  1  1  2   0  1  1  1  4  0  1  0  0  1  0  0  2  0
## 4   3   2   2   2   3  1  1  1   0  1  1  1  3  0  1  0  1  1  0  1  2  0
## 5   2   1   3   3   3  2  0  0   0  0  2  2  3  1  1  0  0  1  0  0  2  0
## 8   2   2   3   3   3  1  1  2   1  0  2  4  2  0  1  0  0  1  0  0  2  0
## 9   2   2   3   3   3  1  1  3   0  0  1  4  3  1  1  0  1  1  0  1  2  0
dim(MotorS)
## [1] 516  45
str(MotorS)
## 'data.frame':    516 obs. of  45 variables:
##  $ CA : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ BC : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ TP : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ FR : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ DT : num  1 1 1 1 1 0 1 1 1 1 ...
##  $ DI : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ TI : num  15 15 10 15 25 30 25 5 5 10 ...
##  $ SC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LS : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TS : num  1 1 1 0 0 1 1 0 1 0 ...
##  $ MR : int  2 4 2 2 2 2 2 2 4 3 ...
##  $ WE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ WK : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ NBR: int  1 2 4 2 2 1 4 4 4 2 ...
##  $ CO : num  5 3 10 5 5 7 7 3 3 10 ...
##  $ BUB: int  0 1 1 1 1 1 1 1 1 1 ...
##  $ SBM: num  1 1 1 1 2 2 2 1 3 3 ...
##  $ SBS: num  1 1 1 1 1 2 1 1 2 1 ...
##  $ BST: num  1 1 2 2 3 3 1 2 1 2 ...
##  $ BSC: num  3 3 3 3 3 3 3 2 3 2 ...
##  $ BSA: num  3 3 3 3 3 3 3 3 3 3 ...
##  $ BRE: num  1 2 3 1 2 2 2 1 3 3 ...
##  $ BCO: num  1 2 3 1 2 2 1 3 3 1 ...
##  $ BIN: num  3 3 3 2 2 2 3 2 3 3 ...
##  $ BHE: num  1 1 2 1 2 2 1 3 2 1 ...
##  $ BEN: num  3 3 2 3 3 3 3 3 3 3 ...
##  $ BRC: num  3 3 2 3 3 3 3 3 3 3 ...
##  $ BWE: num  2 3 3 3 3 3 3 3 3 3 ...
##  $ SP : int  2 1 1 2 1 1 1 1 1 1 ...
##  $ BI : int  0 1 1 0 1 1 1 1 1 1 ...
##  $ WI : num  0 2 1 0 2 3 2 1 1 1 ...
##  $ MSI: int  0 0 0 0 1 0 0 0 1 1 ...
##  $ GD : int  0 1 1 0 0 0 1 1 1 1 ...
##  $ AG : num  1 1 1 2 2 1 2 1 2 1 ...
##  $ OC : num  2 1 1 2 4 4 2 1 4 4 ...
##  $ IN : int  3 4 3 3 2 3 3 2 2 1 ...
##  $ NC : num  1 0 0 1 0 1 0 0 1 0 ...
##  $ MC : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ CC : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BO : int  1 0 1 0 0 1 0 1 0 0 ...
##  $ MO : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ RO : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NB : num  1 0 1 0 0 1 0 1 2 0 ...
##  $ NM : num  2 2 2 2 2 2 2 2 2 1 ...
##  $ NR : num  0 0 0 0 0 0 0 0 0 0 ...
# Reformat variables in factor variables - Đổi định dạng biến
attach(MotorS)
MotorS = within(MotorS, {
  CA = factor(CA, labels = c("No", "Yes"))
  WE = factor(WE, labels = c("No", "Yes"))
  WK = factor(WK, labels = c("No", "Yes"))
  TP = factor(TP, labels = c("Work/Study", "Picking Children", "Entertainment/Food", "Others"))
  FR = factor(FR, labels = c("Once", "twice", ">=3 times"))
  DT = factor(DT, labels = c("Normal", "Rush hour"))
  TS = factor(TS, labels = c("No", "Yes"))
  SC = factor(SC, labels = c("No", "Yes"))
  LS = factor(LS, labels = c("No", "Yes"))
  BC = factor(BC,labels = c("No", "Yes"))
  SP = factor(SP, labels = c("No", "Yes", "Don't know"))
  BI = factor(BI, labels = c("No", "Yes"))
  WI = factor(WI, labels = c("No", "At Bus Stops", "Internet/Facebook", "From Friends", "Others"))
  MSI = factor(MSI, labels = c("No", "Yes"))
  BST = factor(BST, labels = c("Don't agree", "Normal", "Agree"))
  BSC = factor(BSC, labels = c("Don't agree", "Normal", "Agree"))
  BSA = factor(BSA, labels = c("Don't agree", "Normal", "Agree"))
  BRE = factor(BRE, labels = c("Don't agree", "Normal", "Agree"))
  BCO = factor(BCO, labels = c("Don't agree", "Normal", "Agree"))
  BIN = factor(BIN, labels = c("Don't agree", "Normal", "Agree"))
  BHE = factor(BHE, labels = c("Don't agree", "Normal", "Agree"))
  BEN = factor(BEN, labels = c("Don't agree", "Normal", "Agree"))
  BRC = factor(BRC, labels = c("Don't agree", "Normal", "Agree"))
  BWE = factor(BWE, labels = c("Don't agree", "Normal", "Agree"))
  SBM = factor(SBM, labels = c("Don't agree", "Normal", "Agree"))
  SBS = factor(SBS, labels = c("Don't agree", "Normal", "Agree"))
  BWE = factor(BWE, labels = c("Don't agree", "Normal", "Agree"))
  MR = factor(MR, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "Reliability", "others"))
  NBR = factor(NBR, labels = c("No Route", "Inconvenience", "Unsafety", "Long waiting time", "Unreliability", "others"))
  BUB = factor(BUB, labels = c("No", "Yes"))
  GD = factor(GD, labels = c("Female", "Male"))
  AG = factor(AG, labels = c("<=24", "25-60", ">60"))
  OC = factor(OC, labels = c("Pupils/Students", "Officers/Workers", "Housewife/Unemployed", "Free labor", "Others"))
  IN = factor(IN, labels = c("<8 millions", "(8-15) millions", "(15-25) millions",  ">25 millions"))
  NC = factor(NC, labels = c("No", "Yes"))
  MC = factor(MC, labels = c("No", "Yes"))
  CC = factor(CC, labels = c("No", "Yes"))
  BO = factor(BO, labels = c("No", "Yes"))
  MO = factor(MO, labels = c("No", "Yes"))
  RO = factor(RO, labels = c("No", "Yes"))
  NB = factor(NB, labels = c("None", "1", ">=2"))
  NM = factor(NM, labels = c("None", "1", ">=2"))
  NR = factor(NR, labels = c("No", "Yes"))
  DI = as.numeric(DI)
  CO = as.numeric(CO)
  TI = as.numeric(TI)
    } )
head(MotorS)
##   CA  BC         TP        FR        DT DI TI  SC  LS  TS            MR WE  WK
## 2 No Yes Work/Study >=3 times Rush hour  8 15 Yes Yes Yes   Comfortable No  No
## 3 No Yes Work/Study >=3 times Rush hour  5 15 Yes Yes Yes Accessibility No  No
## 4 No Yes Work/Study >=3 times Rush hour  5 10 Yes Yes Yes   Comfortable No  No
## 5 No Yes Work/Study >=3 times Rush hour  8 15 Yes Yes  No   Comfortable No  No
## 8 No Yes Work/Study >=3 times Rush hour 10 25 Yes Yes  No   Comfortable No Yes
## 9 No Yes Work/Study >=3 times    Normal 12 30 Yes Yes Yes   Comfortable No  No
##                 NBR CO BUB         SBM         SBS         BST   BSC   BSA
## 2          No Route  5  No Don't agree Don't agree Don't agree Agree Agree
## 3     Inconvenience  3 Yes Don't agree Don't agree Don't agree Agree Agree
## 4 Long waiting time 10 Yes Don't agree Don't agree      Normal Agree Agree
## 5     Inconvenience  5 Yes Don't agree Don't agree      Normal Agree Agree
## 8     Inconvenience  5 Yes      Normal Don't agree       Agree Agree Agree
## 9          No Route  7 Yes      Normal      Normal       Agree Agree Agree
##           BRE         BCO    BIN         BHE    BEN    BRC    BWE         SP
## 2 Don't agree Don't agree  Agree Don't agree  Agree  Agree Normal Don't know
## 3      Normal      Normal  Agree Don't agree  Agree  Agree  Agree        Yes
## 4       Agree       Agree  Agree      Normal Normal Normal  Agree        Yes
## 5 Don't agree Don't agree Normal Don't agree  Agree  Agree  Agree Don't know
## 8      Normal      Normal Normal      Normal  Agree  Agree  Agree        Yes
## 9      Normal      Normal Normal      Normal  Agree  Agree  Agree        Yes
##    BI                WI MSI     GD    AG               OC               IN  NC
## 2  No                No  No Female  <=24 Officers/Workers (15-25) millions Yes
## 3 Yes Internet/Facebook  No   Male  <=24  Pupils/Students     >25 millions  No
## 4 Yes      At Bus Stops  No   Male  <=24  Pupils/Students (15-25) millions  No
## 5  No                No  No Female 25-60 Officers/Workers (15-25) millions Yes
## 8 Yes Internet/Facebook Yes Female 25-60       Free labor  (8-15) millions  No
## 9 Yes      From Friends  No Female  <=24       Free labor (15-25) millions Yes
##    MC CC  BO  MO RO   NB  NM NR
## 2 Yes No Yes Yes No    1 >=2 No
## 3 Yes No  No Yes No None >=2 No
## 4 Yes No Yes Yes No    1 >=2 No
## 5 Yes No  No Yes No None >=2 No
## 8 Yes No  No Yes No None >=2 No
## 9 Yes No Yes Yes No    1 >=2 No
str(MotorS)
## 'data.frame':    516 obs. of  45 variables:
##  $ CA : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 2 ...
##  $ BC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 1 1 ...
##  $ TP : Factor w/ 4 levels "Work/Study","Picking Children",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ FR : Factor w/ 3 levels "Once","twice",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ DT : Factor w/ 2 levels "Normal","Rush hour": 2 2 2 2 2 1 2 2 2 2 ...
##  $ DI : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ TI : num  15 15 10 15 25 30 25 5 5 10 ...
##  $ SC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ LS : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ TS : Factor w/ 2 levels "No","Yes": 2 2 2 1 1 2 2 1 2 1 ...
##  $ MR : Factor w/ 6 levels "Safety","Comfortable",..: 2 4 2 2 2 2 2 2 4 3 ...
##  $ WE : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ WK : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
##  $ NBR: Factor w/ 6 levels "No Route","Inconvenience",..: 1 2 4 2 2 1 4 4 4 2 ...
##  $ CO : num  5 3 10 5 5 7 7 3 3 10 ...
##  $ BUB: Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
##  $ SBM: Factor w/ 3 levels "Don't agree",..: 1 1 1 1 2 2 2 1 3 3 ...
##  $ SBS: Factor w/ 3 levels "Don't agree",..: 1 1 1 1 1 2 1 1 2 1 ...
##  $ BST: Factor w/ 3 levels "Don't agree",..: 1 1 2 2 3 3 1 2 1 2 ...
##  $ BSC: Factor w/ 3 levels "Don't agree",..: 3 3 3 3 3 3 3 2 3 2 ...
##  $ BSA: Factor w/ 3 levels "Don't agree",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ BRE: Factor w/ 3 levels "Don't agree",..: 1 2 3 1 2 2 2 1 3 3 ...
##  $ BCO: Factor w/ 3 levels "Don't agree",..: 1 2 3 1 2 2 1 3 3 1 ...
##  $ BIN: Factor w/ 3 levels "Don't agree",..: 3 3 3 2 2 2 3 2 3 3 ...
##  $ BHE: Factor w/ 3 levels "Don't agree",..: 1 1 2 1 2 2 1 3 2 1 ...
##  $ BEN: Factor w/ 3 levels "Don't agree",..: 3 3 2 3 3 3 3 3 3 3 ...
##  $ BRC: Factor w/ 3 levels "Don't agree",..: 3 3 2 3 3 3 3 3 3 3 ...
##  $ BWE: Factor w/ 3 levels "Don't agree",..: 2 3 3 3 3 3 3 3 3 3 ...
##  $ SP : Factor w/ 3 levels "No","Yes","Don't know": 3 2 2 3 2 2 2 2 2 2 ...
##  $ BI : Factor w/ 2 levels "No","Yes": 1 2 2 1 2 2 2 2 2 2 ...
##  $ WI : Factor w/ 5 levels "No","At Bus Stops",..: 1 3 2 1 3 4 3 2 2 2 ...
##  $ MSI: Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 2 2 ...
##  $ GD : Factor w/ 2 levels "Female","Male": 1 2 2 1 1 1 2 2 2 2 ...
##  $ AG : Factor w/ 3 levels "<=24","25-60",..: 1 1 1 2 2 1 2 1 2 1 ...
##  $ OC : Factor w/ 5 levels "Pupils/Students",..: 2 1 1 2 4 4 2 1 4 4 ...
##  $ IN : Factor w/ 4 levels "<8 millions",..: 3 4 3 3 2 3 3 2 2 1 ...
##  $ NC : Factor w/ 2 levels "No","Yes": 2 1 1 2 1 2 1 1 2 1 ...
##  $ MC : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 2 2 ...
##  $ CC : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ BO : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 2 1 2 1 1 ...
##  $ MO : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 2 2 ...
##  $ RO : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ NB : Factor w/ 3 levels "None","1",">=2": 2 1 2 1 1 2 1 2 3 1 ...
##  $ NM : Factor w/ 3 levels "None","1",">=2": 3 3 3 3 3 3 3 3 3 2 ...
##  $ NR : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

2. Descriptive analyse

# Descriptive analyse with package "table 1" - sort by MSI
  ## install.packages("table1", dependencies = T)
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1 (~ GD + AG + OC + IN + NC + MC + CC + BO + MO + RO + NB + NM + NR + TP + FR + DT + TS + SP + BI + WI + LS + SC + BC + WE + WK + CA + BST + BSC + BSA + BRE + BCO + BIN + BHE + BEN + BRC + BWE + MR + NBR + BUB + SBM + SBS + CO + DI + TI| MSI , data = MotorS)
No
(N=391)
Yes
(N=125)
Overall
(N=516)
GD
Female 172 (44.0%) 49 (39.2%) 221 (42.8%)
Male 219 (56.0%) 76 (60.8%) 295 (57.2%)
AG
<=24 155 (39.6%) 59 (47.2%) 214 (41.5%)
25-60 231 (59.1%) 62 (49.6%) 293 (56.8%)
>60 5 (1.3%) 4 (3.2%) 9 (1.7%)
OC
Pupils/Students 117 (29.9%) 47 (37.6%) 164 (31.8%)
Officers/Workers 124 (31.7%) 29 (23.2%) 153 (29.7%)
Housewife/Unemployed 24 (6.1%) 6 (4.8%) 30 (5.8%)
Free labor 108 (27.6%) 31 (24.8%) 139 (26.9%)
Others 18 (4.6%) 12 (9.6%) 30 (5.8%)
IN
<8 millions 90 (23.0%) 23 (18.4%) 113 (21.9%)
(8-15) millions 173 (44.2%) 63 (50.4%) 236 (45.7%)
(15-25) millions 99 (25.3%) 29 (23.2%) 128 (24.8%)
>25 millions 29 (7.4%) 10 (8.0%) 39 (7.6%)
NC
No 171 (43.7%) 52 (41.6%) 223 (43.2%)
Yes 220 (56.3%) 73 (58.4%) 293 (56.8%)
MC
No 17 (4.3%) 8 (6.4%) 25 (4.8%)
Yes 374 (95.7%) 117 (93.6%) 491 (95.2%)
CC
No 335 (85.7%) 110 (88.0%) 445 (86.2%)
Yes 56 (14.3%) 15 (12.0%) 71 (13.8%)
BO
No 245 (62.7%) 90 (72.0%) 335 (64.9%)
Yes 146 (37.3%) 35 (28.0%) 181 (35.1%)
MO
No 32 (8.2%) 10 (8.0%) 42 (8.1%)
Yes 359 (91.8%) 115 (92.0%) 474 (91.9%)
RO
No 362 (92.6%) 113 (90.4%) 475 (92.1%)
Yes 29 (7.4%) 12 (9.6%) 41 (7.9%)
NB
None 187 (47.8%) 66 (52.8%) 253 (49.0%)
1 151 (38.6%) 46 (36.8%) 197 (38.2%)
>=2 53 (13.6%) 13 (10.4%) 66 (12.8%)
NM
None 5 (1.3%) 0 (0%) 5 (1.0%)
1 54 (13.8%) 14 (11.2%) 68 (13.2%)
>=2 332 (84.9%) 111 (88.8%) 443 (85.9%)
NR
No 332 (84.9%) 103 (82.4%) 435 (84.3%)
Yes 59 (15.1%) 22 (17.6%) 81 (15.7%)
TP
Work/Study 255 (65.2%) 95 (76.0%) 350 (67.8%)
Picking Children 57 (14.6%) 12 (9.6%) 69 (13.4%)
Entertainment/Food 72 (18.4%) 16 (12.8%) 88 (17.1%)
Others 7 (1.8%) 2 (1.6%) 9 (1.7%)
FR
Once 21 (5.4%) 2 (1.6%) 23 (4.5%)
twice 15 (3.8%) 2 (1.6%) 17 (3.3%)
>=3 times 355 (90.8%) 121 (96.8%) 476 (92.2%)
DT
Normal 85 (21.7%) 20 (16.0%) 105 (20.3%)
Rush hour 306 (78.3%) 105 (84.0%) 411 (79.7%)
TS
No 178 (45.5%) 69 (55.2%) 247 (47.9%)
Yes 213 (54.5%) 56 (44.8%) 269 (52.1%)
SP
No 40 (10.2%) 8 (6.4%) 48 (9.3%)
Yes 300 (76.7%) 113 (90.4%) 413 (80.0%)
Don't know 51 (13.0%) 4 (3.2%) 55 (10.7%)
BI
No 194 (49.6%) 28 (22.4%) 222 (43.0%)
Yes 197 (50.4%) 97 (77.6%) 294 (57.0%)
WI
No 194 (49.6%) 28 (22.4%) 222 (43.0%)
At Bus Stops 105 (26.9%) 39 (31.2%) 144 (27.9%)
Internet/Facebook 43 (11.0%) 30 (24.0%) 73 (14.1%)
From Friends 24 (6.1%) 18 (14.4%) 42 (8.1%)
Others 25 (6.4%) 10 (8.0%) 35 (6.8%)
LS
No 76 (19.4%) 34 (27.2%) 110 (21.3%)
Yes 315 (80.6%) 91 (72.8%) 406 (78.7%)
SC
No 66 (16.9%) 13 (10.4%) 79 (15.3%)
Yes 325 (83.1%) 112 (89.6%) 437 (84.7%)
BC
No 176 (45.0%) 76 (60.8%) 252 (48.8%)
Yes 215 (55.0%) 49 (39.2%) 264 (51.2%)
WE
No 389 (99.5%) 125 (100%) 514 (99.6%)
Yes 2 (0.5%) 0 (0%) 2 (0.4%)
WK
No 302 (77.2%) 99 (79.2%) 401 (77.7%)
Yes 89 (22.8%) 26 (20.8%) 115 (22.3%)
CA
No 209 (53.5%) 74 (59.2%) 283 (54.8%)
Yes 182 (46.5%) 51 (40.8%) 233 (45.2%)
BST
Don't agree 182 (46.5%) 66 (52.8%) 248 (48.1%)
Normal 121 (30.9%) 23 (18.4%) 144 (27.9%)
Agree 88 (22.5%) 36 (28.8%) 124 (24.0%)
BSC
Don't agree 61 (15.6%) 20 (16.0%) 81 (15.7%)
Normal 108 (27.6%) 21 (16.8%) 129 (25.0%)
Agree 222 (56.8%) 84 (67.2%) 306 (59.3%)
BSA
Don't agree 40 (10.2%) 12 (9.6%) 52 (10.1%)
Normal 113 (28.9%) 20 (16.0%) 133 (25.8%)
Agree 238 (60.9%) 93 (74.4%) 331 (64.1%)
BRE
Don't agree 81 (20.7%) 20 (16.0%) 101 (19.6%)
Normal 117 (29.9%) 33 (26.4%) 150 (29.1%)
Agree 193 (49.4%) 72 (57.6%) 265 (51.4%)
BCO
Don't agree 119 (30.4%) 39 (31.2%) 158 (30.6%)
Normal 117 (29.9%) 32 (25.6%) 149 (28.9%)
Agree 155 (39.6%) 54 (43.2%) 209 (40.5%)
BIN
Don't agree 104 (26.6%) 37 (29.6%) 141 (27.3%)
Normal 167 (42.7%) 46 (36.8%) 213 (41.3%)
Agree 120 (30.7%) 42 (33.6%) 162 (31.4%)
BHE
Don't agree 182 (46.5%) 45 (36.0%) 227 (44.0%)
Normal 123 (31.5%) 37 (29.6%) 160 (31.0%)
Agree 86 (22.0%) 43 (34.4%) 129 (25.0%)
BEN
Don't agree 37 (9.5%) 11 (8.8%) 48 (9.3%)
Normal 87 (22.3%) 21 (16.8%) 108 (20.9%)
Agree 267 (68.3%) 93 (74.4%) 360 (69.8%)
BRC
Don't agree 43 (11.0%) 14 (11.2%) 57 (11.0%)
Normal 70 (17.9%) 17 (13.6%) 87 (16.9%)
Agree 278 (71.1%) 94 (75.2%) 372 (72.1%)
BWE
Don't agree 32 (8.2%) 11 (8.8%) 43 (8.3%)
Normal 37 (9.5%) 13 (10.4%) 50 (9.7%)
Agree 322 (82.4%) 101 (80.8%) 423 (82.0%)
MR
Safety 21 (5.4%) 6 (4.8%) 27 (5.2%)
Comfortable 200 (51.2%) 54 (43.2%) 254 (49.2%)
Low price 19 (4.9%) 5 (4.0%) 24 (4.7%)
Accessibility 109 (27.9%) 52 (41.6%) 161 (31.2%)
Reliability 35 (9.0%) 7 (5.6%) 42 (8.1%)
others 7 (1.8%) 1 (0.8%) 8 (1.6%)
NBR
No Route 86 (22.0%) 36 (28.8%) 122 (23.6%)
Inconvenience 123 (31.5%) 36 (28.8%) 159 (30.8%)
Unsafety 6 (1.5%) 1 (0.8%) 7 (1.4%)
Long waiting time 120 (30.7%) 38 (30.4%) 158 (30.6%)
Unreliability 39 (10.0%) 8 (6.4%) 47 (9.1%)
others 17 (4.3%) 6 (4.8%) 23 (4.5%)
BUB
No 168 (43.0%) 30 (24.0%) 198 (38.4%)
Yes 223 (57.0%) 95 (76.0%) 318 (61.6%)
SBM
Don't agree 218 (55.8%) 45 (36.0%) 263 (51.0%)
Normal 136 (34.8%) 33 (26.4%) 169 (32.8%)
Agree 37 (9.5%) 47 (37.6%) 84 (16.3%)
SBS
Don't agree 235 (60.1%) 64 (51.2%) 299 (57.9%)
Normal 138 (35.3%) 35 (28.0%) 173 (33.5%)
Agree 18 (4.6%) 26 (20.8%) 44 (8.5%)
CO
Mean (SD) 9.42 (7.27) 10.1 (6.33) 9.60 (7.06)
Median [Min, Max] 8.00 [0, 50.0] 10.0 [0.800, 30.0] 10.0 [0, 50.0]
DI
Mean (SD) 6.19 (4.79) 6.58 (5.45) 6.29 (4.95)
Median [Min, Max] 5.00 [0.200, 30.0] 5.00 [0.200, 30.0] 5.00 [0.200, 30.0]
TI
Mean (SD) 16.0 (13.1) 15.3 (11.8) 15.8 (12.8)
Median [Min, Max] 15.0 [0, 180] 10.0 [2.00, 60.0] 15.0 [0, 180]
# Descriptive analyse with package "compareGroups" - sort by MSI (hon table1 la cung cap them tri so p. Doi voi bien phan loai, tri so p cung duoc tinh theo Chi-Square)
library(compareGroups)
t = compareGroups(MSI ~ GD + AG + OC + IN + NC + MC + CC + BO + MO + RO + NB + NM + NR + TP + FR + DT + TS + SP + BI + WI + LS + SC + BC + WE + WK + CA + BST + BSC + BSA + BRE + BCO + BIN + BHE + BEN + BRC + BWE + MR + NBR + BUB + SBM + SBS + CO + DI + TI, data = MotorS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t)
## 
## --------Summary descriptives table by 'MSI'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=391       N=125              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## GD:                                                0.402   
##     Female               172 (44.0%) 49 (39.2%)            
##     Male                 219 (56.0%) 76 (60.8%)            
## AG:                                                0.075   
##     <=24                 155 (39.6%) 59 (47.2%)            
##     25-60                231 (59.1%) 62 (49.6%)            
##     >60                   5 (1.28%)   4 (3.20%)            
## OC:                                                0.069   
##     Pupils/Students      117 (29.9%) 47 (37.6%)            
##     Officers/Workers     124 (31.7%) 29 (23.2%)            
##     Housewife/Unemployed 24 (6.14%)   6 (4.80%)            
##     Free labor           108 (27.6%) 31 (24.8%)            
##     Others               18 (4.60%)  12 (9.60%)            
## IN:                                                0.589   
##     <8 millions          90 (23.0%)  23 (18.4%)            
##     (8-15) millions      173 (44.2%) 63 (50.4%)            
##     (15-25) millions     99 (25.3%)  29 (23.2%)            
##     >25 millions         29 (7.42%)  10 (8.00%)            
## NC:                                                0.752   
##     No                   171 (43.7%) 52 (41.6%)            
##     Yes                  220 (56.3%) 73 (58.4%)            
## MC:                                                0.490   
##     No                   17 (4.35%)   8 (6.40%)            
##     Yes                  374 (95.7%) 117 (93.6%)           
## CC:                                                0.612   
##     No                   335 (85.7%) 110 (88.0%)           
##     Yes                  56 (14.3%)  15 (12.0%)            
## BO:                                                0.072   
##     No                   245 (62.7%) 90 (72.0%)            
##     Yes                  146 (37.3%) 35 (28.0%)            
## MO:                                                1.000   
##     No                   32 (8.18%)  10 (8.00%)            
##     Yes                  359 (91.8%) 115 (92.0%)           
## RO:                                                0.551   
##     No                   362 (92.6%) 113 (90.4%)           
##     Yes                  29 (7.42%)  12 (9.60%)            
## NB:                                                0.523   
##     None                 187 (47.8%) 66 (52.8%)            
##     1                    151 (38.6%) 46 (36.8%)            
##     >=2                  53 (13.6%)  13 (10.4%)            
## NM:                                                0.434   
##     None                  5 (1.28%)   0 (0.00%)            
##     1                    54 (13.8%)  14 (11.2%)            
##     >=2                  332 (84.9%) 111 (88.8%)           
## NR:                                                0.596   
##     No                   332 (84.9%) 103 (82.4%)           
##     Yes                  59 (15.1%)  22 (17.6%)            
## TP:                                                0.158   
##     Work/Study           255 (65.2%) 95 (76.0%)            
##     Picking Children     57 (14.6%)  12 (9.60%)            
##     Entertainment/Food   72 (18.4%)  16 (12.8%)            
##     Others                7 (1.79%)   2 (1.60%)            
## FR:                                                0.103   
##     Once                 21 (5.37%)   2 (1.60%)            
##     twice                15 (3.84%)   2 (1.60%)            
##     >=3 times            355 (90.8%) 121 (96.8%)           
## DT:                                                0.208   
##     Normal               85 (21.7%)  20 (16.0%)            
##     Rush hour            306 (78.3%) 105 (84.0%)           
## TS:                                                0.075   
##     No                   178 (45.5%) 69 (55.2%)            
##     Yes                  213 (54.5%) 56 (44.8%)            
## SP:                                                0.002   
##     No                   40 (10.2%)   8 (6.40%)            
##     Yes                  300 (76.7%) 113 (90.4%)           
##     Don't know           51 (13.0%)   4 (3.20%)            
## BI:                                               <0.001   
##     No                   194 (49.6%) 28 (22.4%)            
##     Yes                  197 (50.4%) 97 (77.6%)            
## WI:                                               <0.001   
##     No                   194 (49.6%) 28 (22.4%)            
##     At Bus Stops         105 (26.9%) 39 (31.2%)            
##     Internet/Facebook    43 (11.0%)  30 (24.0%)            
##     From Friends         24 (6.14%)  18 (14.4%)            
##     Others               25 (6.39%)  10 (8.00%)            
## LS:                                                0.086   
##     No                   76 (19.4%)  34 (27.2%)            
##     Yes                  315 (80.6%) 91 (72.8%)            
## SC:                                                0.108   
##     No                   66 (16.9%)  13 (10.4%)            
##     Yes                  325 (83.1%) 112 (89.6%)           
## BC:                                                0.003   
##     No                   176 (45.0%) 76 (60.8%)            
##     Yes                  215 (55.0%) 49 (39.2%)            
## WE:                                                1.000   
##     No                   389 (99.5%) 125 (100%)            
##     Yes                   2 (0.51%)   0 (0.00%)            
## WK:                                                0.737   
##     No                   302 (77.2%) 99 (79.2%)            
##     Yes                  89 (22.8%)  26 (20.8%)            
## CA:                                                0.307   
##     No                   209 (53.5%) 74 (59.2%)            
##     Yes                  182 (46.5%) 51 (40.8%)            
## BST:                                               0.022   
##     Don't agree          182 (46.5%) 66 (52.8%)            
##     Normal               121 (30.9%) 23 (18.4%)            
##     Agree                88 (22.5%)  36 (28.8%)            
## BSC:                                               0.045   
##     Don't agree          61 (15.6%)  20 (16.0%)            
##     Normal               108 (27.6%) 21 (16.8%)            
##     Agree                222 (56.8%) 84 (67.2%)            
## BSA:                                               0.012   
##     Don't agree          40 (10.2%)  12 (9.60%)            
##     Normal               113 (28.9%) 20 (16.0%)            
##     Agree                238 (60.9%) 93 (74.4%)            
## BRE:                                               0.255   
##     Don't agree          81 (20.7%)  20 (16.0%)            
##     Normal               117 (29.9%) 33 (26.4%)            
##     Agree                193 (49.4%) 72 (57.6%)            
## BCO:                                               0.629   
##     Don't agree          119 (30.4%) 39 (31.2%)            
##     Normal               117 (29.9%) 32 (25.6%)            
##     Agree                155 (39.6%) 54 (43.2%)            
## BIN:                                               0.504   
##     Don't agree          104 (26.6%) 37 (29.6%)            
##     Normal               167 (42.7%) 46 (36.8%)            
##     Agree                120 (30.7%) 42 (33.6%)            
## BHE:                                               0.016   
##     Don't agree          182 (46.5%) 45 (36.0%)            
##     Normal               123 (31.5%) 37 (29.6%)            
##     Agree                86 (22.0%)  43 (34.4%)            
## BEN:                                               0.387   
##     Don't agree          37 (9.46%)  11 (8.80%)            
##     Normal               87 (22.3%)  21 (16.8%)            
##     Agree                267 (68.3%) 93 (74.4%)            
## BRC:                                               0.531   
##     Don't agree          43 (11.0%)  14 (11.2%)            
##     Normal               70 (17.9%)  17 (13.6%)            
##     Agree                278 (71.1%) 94 (75.2%)            
## BWE:                                               0.925   
##     Don't agree          32 (8.18%)  11 (8.80%)            
##     Normal               37 (9.46%)  13 (10.4%)            
##     Agree                322 (82.4%) 101 (80.8%)           
## MR:                                                0.129   
##     Safety               21 (5.37%)   6 (4.80%)            
##     Comfortable          200 (51.2%) 54 (43.2%)            
##     Low price            19 (4.86%)   5 (4.00%)            
##     Accessibility        109 (27.9%) 52 (41.6%)            
##     Reliability          35 (8.95%)   7 (5.60%)            
##     others                7 (1.79%)   1 (0.80%)            
## NBR:                                               0.611   
##     No Route             86 (22.0%)  36 (28.8%)            
##     Inconvenience        123 (31.5%) 36 (28.8%)            
##     Unsafety              6 (1.53%)   1 (0.80%)            
##     Long waiting time    120 (30.7%) 38 (30.4%)            
##     Unreliability        39 (9.97%)   8 (6.40%)            
##     others               17 (4.35%)   6 (4.80%)            
## BUB:                                              <0.001   
##     No                   168 (43.0%) 30 (24.0%)            
##     Yes                  223 (57.0%) 95 (76.0%)            
## SBM:                                              <0.001   
##     Don't agree          218 (55.8%) 45 (36.0%)            
##     Normal               136 (34.8%) 33 (26.4%)            
##     Agree                37 (9.46%)  47 (37.6%)            
## SBS:                                              <0.001   
##     Don't agree          235 (60.1%) 64 (51.2%)            
##     Normal               138 (35.3%) 35 (28.0%)            
##     Agree                18 (4.60%)  26 (20.8%)            
## CO                       9.42 (7.27) 10.1 (6.33)   0.283   
## DI                       6.19 (4.79) 6.58 (5.45)   0.475   
## TI                       16.0 (13.1) 15.3 (11.8)   0.586   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05 (32 variables): GD, IN, NC, MC, CC, BO, RO, NB, NR, DT, SC, WE, WK, BRE, BCO, BIN, BEN, BRC, BWE, NBR, CO, DI
t1 = compareGroups(MSI ~ NM + SP + BI + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, data = MotorS)
createTable(t1)
## 
## --------Summary descriptives table by 'MSI'---------
## 
## _______________________________________________________ 
##                           No          Yes     p.overall 
##                          N=391       N=125              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## NM:                                             0.434   
##     None               5 (1.28%)   0 (0.00%)            
##     1                 54 (13.8%)  14 (11.2%)            
##     >=2               332 (84.9%) 111 (88.8%)           
## SP:                                             0.002   
##     No                40 (10.2%)   8 (6.40%)            
##     Yes               300 (76.7%) 113 (90.4%)           
##     Don't know        51 (13.0%)   4 (3.20%)            
## BI:                                            <0.001   
##     No                194 (49.6%) 28 (22.4%)            
##     Yes               197 (50.4%) 97 (77.6%)            
## WI:                                            <0.001   
##     No                194 (49.6%) 28 (22.4%)            
##     At Bus Stops      105 (26.9%) 39 (31.2%)            
##     Internet/Facebook 43 (11.0%)  30 (24.0%)            
##     From Friends      24 (6.14%)  18 (14.4%)            
##     Others            25 (6.39%)  10 (8.00%)            
## BC:                                             0.003   
##     No                176 (45.0%) 76 (60.8%)            
##     Yes               215 (55.0%) 49 (39.2%)            
## BST:                                            0.022   
##     Don't agree       182 (46.5%) 66 (52.8%)            
##     Normal            121 (30.9%) 23 (18.4%)            
##     Agree             88 (22.5%)  36 (28.8%)            
## BSC:                                            0.045   
##     Don't agree       61 (15.6%)  20 (16.0%)            
##     Normal            108 (27.6%) 21 (16.8%)            
##     Agree             222 (56.8%) 84 (67.2%)            
## BSA:                                            0.012   
##     Don't agree       40 (10.2%)  12 (9.60%)            
##     Normal            113 (28.9%) 20 (16.0%)            
##     Agree             238 (60.9%) 93 (74.4%)            
## BHE:                                            0.016   
##     Don't agree       182 (46.5%) 45 (36.0%)            
##     Normal            123 (31.5%) 37 (29.6%)            
##     Agree             86 (22.0%)  43 (34.4%)            
## BUB:                                           <0.001   
##     No                168 (43.0%) 30 (24.0%)            
##     Yes               223 (57.0%) 95 (76.0%)            
## SBM:                                           <0.001   
##     Don't agree       218 (55.8%) 45 (36.0%)            
##     Normal            136 (34.8%) 33 (26.4%)            
##     Agree             37 (9.46%)  47 (37.6%)            
## SBS:                                           <0.001   
##     Don't agree       235 (60.1%) 64 (51.2%)            
##     Normal            138 (35.3%) 35 (28.0%)            
##     Agree             18 (4.60%)  26 (20.8%)            
## TI                    16.0 (13.1) 15.3 (11.8)   0.586   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05 (22/32 variables): GD, IN, NC, MC, CC, BO, RO, NB, NR, DT, SC, WE, WK, BRE, BCO, BIN, BEN, BRC, BWE, NBR, CO, DI; remains 10/32 variables p-value to test (AG, OC, MO, TP, FR, TS, LS, CA, MR, TI); 1 Cor>0.7 (BI) --> 21 variables
t.c = compareGroups(MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI, data = MotorS)
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t.c)
## 
## --------Summary descriptives table by 'MSI'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=391       N=125              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## AG:                                                0.075   
##     <=24                 155 (39.6%) 59 (47.2%)            
##     25-60                231 (59.1%) 62 (49.6%)            
##     >60                   5 (1.28%)   4 (3.20%)            
## OC:                                                0.069   
##     Pupils/Students      117 (29.9%) 47 (37.6%)            
##     Officers/Workers     124 (31.7%) 29 (23.2%)            
##     Housewife/Unemployed 24 (6.14%)   6 (4.80%)            
##     Free labor           108 (27.6%) 31 (24.8%)            
##     Others               18 (4.60%)  12 (9.60%)            
## MO:                                                1.000   
##     No                   32 (8.18%)  10 (8.00%)            
##     Yes                  359 (91.8%) 115 (92.0%)           
## NM:                                                0.434   
##     None                  5 (1.28%)   0 (0.00%)            
##     1                    54 (13.8%)  14 (11.2%)            
##     >=2                  332 (84.9%) 111 (88.8%)           
## TP:                                                0.158   
##     Work/Study           255 (65.2%) 95 (76.0%)            
##     Picking Children     57 (14.6%)  12 (9.60%)            
##     Entertainment/Food   72 (18.4%)  16 (12.8%)            
##     Others                7 (1.79%)   2 (1.60%)            
## FR:                                                0.103   
##     Once                 21 (5.37%)   2 (1.60%)            
##     twice                15 (3.84%)   2 (1.60%)            
##     >=3 times            355 (90.8%) 121 (96.8%)           
## TS:                                                0.075   
##     No                   178 (45.5%) 69 (55.2%)            
##     Yes                  213 (54.5%) 56 (44.8%)            
## SP:                                                0.002   
##     No                   40 (10.2%)   8 (6.40%)            
##     Yes                  300 (76.7%) 113 (90.4%)           
##     Don't know           51 (13.0%)   4 (3.20%)            
## WI:                                               <0.001   
##     No                   194 (49.6%) 28 (22.4%)            
##     At Bus Stops         105 (26.9%) 39 (31.2%)            
##     Internet/Facebook    43 (11.0%)  30 (24.0%)            
##     From Friends         24 (6.14%)  18 (14.4%)            
##     Others               25 (6.39%)  10 (8.00%)            
## LS:                                                0.086   
##     No                   76 (19.4%)  34 (27.2%)            
##     Yes                  315 (80.6%) 91 (72.8%)            
## BC:                                                0.003   
##     No                   176 (45.0%) 76 (60.8%)            
##     Yes                  215 (55.0%) 49 (39.2%)            
## CA:                                                0.307   
##     No                   209 (53.5%) 74 (59.2%)            
##     Yes                  182 (46.5%) 51 (40.8%)            
## BST:                                               0.022   
##     Don't agree          182 (46.5%) 66 (52.8%)            
##     Normal               121 (30.9%) 23 (18.4%)            
##     Agree                88 (22.5%)  36 (28.8%)            
## BSC:                                               0.045   
##     Don't agree          61 (15.6%)  20 (16.0%)            
##     Normal               108 (27.6%) 21 (16.8%)            
##     Agree                222 (56.8%) 84 (67.2%)            
## BSA:                                               0.012   
##     Don't agree          40 (10.2%)  12 (9.60%)            
##     Normal               113 (28.9%) 20 (16.0%)            
##     Agree                238 (60.9%) 93 (74.4%)            
## BHE:                                               0.016   
##     Don't agree          182 (46.5%) 45 (36.0%)            
##     Normal               123 (31.5%) 37 (29.6%)            
##     Agree                86 (22.0%)  43 (34.4%)            
## MR:                                                0.129   
##     Safety               21 (5.37%)   6 (4.80%)            
##     Comfortable          200 (51.2%) 54 (43.2%)            
##     Low price            19 (4.86%)   5 (4.00%)            
##     Accessibility        109 (27.9%) 52 (41.6%)            
##     Reliability          35 (8.95%)   7 (5.60%)            
##     others                7 (1.79%)   1 (0.80%)            
## BUB:                                              <0.001   
##     No                   168 (43.0%) 30 (24.0%)            
##     Yes                  223 (57.0%) 95 (76.0%)            
## SBM:                                              <0.001   
##     Don't agree          218 (55.8%) 45 (36.0%)            
##     Normal               136 (34.8%) 33 (26.4%)            
##     Agree                37 (9.46%)  47 (37.6%)            
## SBS:                                              <0.001   
##     Don't agree          235 (60.1%) 64 (51.2%)            
##     Normal               138 (35.3%) 35 (28.0%)            
##     Agree                18 (4.60%)  26 (20.8%)            
## TI                       16.0 (13.1) 15.3 (11.8)   0.586   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

3. Descriptive analyse by graphs

library(magrittr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract()   masks magrittr::extract()
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(car)
## Warning: package 'car' was built under R version 4.0.4
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
MotorS %>%
  group_by(AG, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = AG)) +
  geom_col(position = "fill") +
  xlab("Mode shift intention") +
  ylab("Age") +
  ggtitle("Probability of Bus Shift ~ Age") # Đối tuổi học sinh và người già có khả năng chuyển đổi sang xe buýt cao hơn

MotorS %>%
  group_by(OC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = OC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Occupation") +
  ggtitle("Proportion of Bus Shift ~ Occupation") # Đối tượng học sinh và nghỉ hưu có khả năng chuyển đổi sang xe buýt cao hơn

MotorS %>%
  group_by(MO, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = MO)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Motor Owning") +
  ggtitle("Proportion of Bus Shift ~ Motor Owning") # Không có sự khác biệt giữa 2 nhóm 

MotorS %>%
  group_by(NM, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = NM)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Number of Motor") +
  ggtitle("Proportion of Bus Shift ~ Number of Motor")

MotorS %>%
  group_by(TP, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = TP)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Travel Purpose") +
  ggtitle("Proportion of Bus Shift ~ Travel Purpose") # Mục đích công việc có khả năng chuyển đổi nhiều hơn

MotorS %>%
  group_by(FR, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = FR)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Travel Frequency") +
  ggtitle("Proportion of Bus Shift ~ Travel Frequency") # Có vẻ không liên quan đến khả năng chuyển đổi

MotorS %>%
  group_by(TS, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = TS)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Temporary Stops") +
  ggtitle("Proportion of Bus Shift ~ Temporary Stops") # Có diểm dừng tạm thời ít khả năng chuyển đổi hơn

MotorS %>%
  group_by(SP, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = SP)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Bus Stop Present") +
  ggtitle("Proportion of Bus Shift ~ Bus Stop Present") # Gần trạm xe buýt có khẳ năng chuyển đổi cao hơn

MotorS %>%
  group_by(BI, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BI)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Bus Information") +
  ggtitle("Proportion of Bus Shift ~ Bus Information") # Biết thông tin về hệ thống buýt có khả năng chuyển đổi cao hơn

MotorS %>%
  group_by(WI, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = WI)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Way to have bus information") +
  ggtitle("Proportion of Bus Shift ~ Way to have bus information") # Không biết thông tin về buýt ít có khả năng chuyển dổi hơn

MotorS %>%
  group_by(LS, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = LS)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Lane Separate") +
  ggtitle("Proportion of Bus Shift ~ Lane Separate") # Yếu tố đường có phân làn dường như là tạo đk thuận lợi cho xe máy nên ít có khả năng chuyển đổi hơn

MotorS %>%
  group_by(BC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Bus Condition") +
  ggtitle("Proportion of Bus Shift ~ Bus Condition") # Điều kiện trạm dừng (có mái che) tỷ lệ nghich với khả năng chuyển đổi (Mâu thuẩn ??? Kiểm tra)

MotorS %>%
  group_by(CA, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = CA)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Center Area") +
  ggtitle("Proportion of Bus Shift ~ Center Area") # Chuyến đi ở khu vực trung tâm ít có khả năng chuyển đổi hơn

MotorS %>%
  group_by(BST, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BST)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Perception of bus time saving") +
  ggtitle("Proportion of Bus Shift ~ Perception of bus time saving") # Cảm nhận xe buýt tiết kiệm thời gian đi lại có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi

MotorS %>%
  group_by(BSC, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BSC)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Perception of cost saving") +
  ggtitle("Proportion of Bus Shift ~ Perception of cost saving") # Cảm nhận xe buýt tiết kiệm chi phí đi lại có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi

MotorS %>%
  group_by(BSA, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BSA)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Perception of Safe") +
  ggtitle("Proportion of Bus Shift ~ Perception of Safe") # Cảm nhận xe buýt an toàn có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi

MotorS %>%
  group_by(BHE, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BHE)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Perception of Healthy") +
  ggtitle("Proportion of Bus Shift ~ Perception of Healthy") # Cảm nhận xe buýt có lợi cho sức khỏe có khả năng ảnh hưởng tích cực đến khả năng chuyển đổi

MotorS %>%
  group_by(MR, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = MR)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Mode Choice Reason") +
  ggtitle("Proportion of Bus Shift ~ Mode Choice Reason")

MotorS %>%
  group_by(BUB, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = BUB)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Bus use before") +
  ggtitle("Proportion of Bus Shift ~ Bus use before") # Trải nghiệm xe buýt có ảnh hưởng tích cực đến khả năng chuyển đổi

MotorS %>%
  group_by(SBM, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = SBM)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Like to shift to bus for main trips") +
  ggtitle("Proportion of Bus Shift ~ Like to shift to bus for main trips")

MotorS %>%
  group_by(SBS, MSI) %>%
  count() %>% 
  ggplot(aes(MSI, n, fill = SBS)) +
  geom_col(position = "fill") +
  xlab("Mode Shift Intention") +
  ylab("Like to shift to bus for sub trips") +
  ggtitle("Proportion of Bus Shift ~ Like to shift to bus for sub trips")

## Travel Mode Choice ~ Countinuous Variables (Distance, Travel Period and Cost)
ggplot(MotorS, aes(x=TI, fill = MSI, color = MSI)) +
  geom_histogram (position = "dodge") +
  xlab("Travel Period") +
  ylab("Count") +
  ggtitle("Histogram of Travel Time")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(MotorS, aes(x=DI, fill = MSI, color = MSI)) +
  geom_histogram (position = "dodge") +
  xlab("Distance") +
  ylab("Count") +
  ggtitle("Histogram of Distance")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(MotorS, aes(x=CO, fill = MSI, color = MSI)) +
  geom_histogram (position = "dodge") +
  xlab("Cost") +
  ylab("Count") +
  ggtitle("Histogram of Cost")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Statistic Analysis by boxplot (continuous variales)
MotorS %>%
  group_by(DI, MSI) %>%
  count() %>% 
  ggplot(aes(x = MSI, y = DI, fill = MSI)) +
  geom_boxplot() +
  xlab("Mode Shift to Bus") +
  ylab("Distance") +
  ggtitle("Boxplot of Bus Shift ~ Distance") # Không có sự khác biệt giữa 2 nhóm

MotorS %>%
  group_by(CO, MSI) %>%
  count() %>% 
  ggplot(aes(x = MSI, y = CO, fill = MSI)) +
  geom_boxplot() +
  xlab("Mode Shift to Bus") +
  ylab("Cost") +
  ggtitle("Boxplot of Bus Shift ~ Cost") # Không có sự khác biệt giữa 2 nhóm

MotorS %>%
  group_by(TI, MSI) %>%
  count() %>% 
  ggplot(aes(x = MSI, y = TI, fill = MSI)) +
  geom_boxplot() +
  xlab("Mode Shift to Bus") +
  ylab("Travel Period") +
  ggtitle("Boxplot of Bus Shift ~ Travel Period") # Có sự khác biệt giữa 2 nhóm

## Correlation in continuous variables
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
CVMSI_MoBS = data.frame(MotorS$MSI, MotorS$DI, MotorS$TI, MotorS$CO)
pairs.panels(CVMSI_MoBS)

summary(MotorS)
##    CA        BC                       TP              FR              DT     
##  No :283   No :252   Work/Study        :350   Once     : 23   Normal   :105  
##  Yes:233   Yes:264   Picking Children  : 69   twice    : 17   Rush hour:411  
##                      Entertainment/Food: 88   >=3 times:476                  
##                      Others            :  9                                  
##                                                                              
##                                                                              
##        DI               TI           SC        LS        TS     
##  Min.   : 0.200   Min.   :  0.00   No : 79   No :110   No :247  
##  1st Qu.: 3.000   1st Qu.: 10.00   Yes:437   Yes:406   Yes:269  
##  Median : 5.000   Median : 15.00                                
##  Mean   : 6.289   Mean   : 15.84                                
##  3rd Qu.:10.000   3rd Qu.: 20.00                                
##  Max.   :30.000   Max.   :180.00                                
##              MR        WE        WK                     NBR     
##  Safety       : 27   No :514   No :401   No Route         :122  
##  Comfortable  :254   Yes:  2   Yes:115   Inconvenience    :159  
##  Low price    : 24                       Unsafety         :  7  
##  Accessibility:161                       Long waiting time:158  
##  Reliability  : 42                       Unreliability    : 47  
##  others       :  8                       others           : 23  
##        CO          BUB               SBM               SBS     
##  Min.   : 0.000   No :198   Don't agree:263   Don't agree:299  
##  1st Qu.: 5.000   Yes:318   Normal     :169   Normal     :173  
##  Median :10.000             Agree      : 84   Agree      : 44  
##  Mean   : 9.595                                                
##  3rd Qu.:12.000                                                
##  Max.   :50.000                                                
##           BST               BSC               BSA               BRE     
##  Don't agree:248   Don't agree: 81   Don't agree: 52   Don't agree:101  
##  Normal     :144   Normal     :129   Normal     :133   Normal     :150  
##  Agree      :124   Agree      :306   Agree      :331   Agree      :265  
##                                                                         
##                                                                         
##                                                                         
##           BCO               BIN               BHE               BEN     
##  Don't agree:158   Don't agree:141   Don't agree:227   Don't agree: 48  
##  Normal     :149   Normal     :213   Normal     :160   Normal     :108  
##  Agree      :209   Agree      :162   Agree      :129   Agree      :360  
##                                                                         
##                                                                         
##                                                                         
##           BRC               BWE               SP        BI     
##  Don't agree: 57   Don't agree: 43   No        : 48   No :222  
##  Normal     : 87   Normal     : 50   Yes       :413   Yes:294  
##  Agree      :372   Agree      :423   Don't know: 55            
##                                                                
##                                                                
##                                                                
##                  WI       MSI           GD          AG     
##  No               :222   No :391   Female:221   <=24 :214  
##  At Bus Stops     :144   Yes:125   Male  :295   25-60:293  
##  Internet/Facebook: 73                          >60  :  9  
##  From Friends     : 42                                     
##  Others           : 35                                     
##                                                            
##                     OC                     IN        NC        MC     
##  Pupils/Students     :164   <8 millions     :113   No :223   No : 25  
##  Officers/Workers    :153   (8-15) millions :236   Yes:293   Yes:491  
##  Housewife/Unemployed: 30   (15-25) millions:128                      
##  Free labor          :139   >25 millions    : 39                      
##  Others              : 30                                             
##                                                                       
##    CC        BO        MO        RO         NB         NM        NR     
##  No :445   No :335   No : 42   No :475   None:253   None:  5   No :435  
##  Yes: 71   Yes:181   Yes:474   Yes: 41   1   :197   1   : 68   Yes: 81  
##                                          >=2 : 66   >=2 :443            
##                                                                         
##                                                                         
## 
# Correlations between MSI and variables of Group 1 (Individual Characteristics)
Cor1 = data.frame(MotorS$MSI, MotorS$GD, MotorS$AG, MotorS$OC, MotorS$IN, MotorS$NC, MotorS$MC, MotorS$CC, MotorS$BO, MotorS$MO, MotorS$RO, MotorS$NB, MotorS$NM, MotorS$NR)
pairs.panels(Cor1)

Cor1_1 = data.frame(MotorS$MSI, MotorS$GD, MotorS$AG, MotorS$OC, MotorS$IN, MotorS$NC)
pairs.panels(Cor1_1)

Cor1_2 = data.frame(MotorS$MSI, MotorS$MC, MotorS$CC, MotorS$BO, MotorS$MO, MotorS$RO, MotorS$NB, MotorS$NM, MotorS$NR)
pairs.panels(Cor1_2)

# Correlation between MSI and variables of Group 2 (Trip Characteristics)
Cor2 = data.frame(MotorS$MSI, MotorS$TP, MotorS$FR, MotorS$DT, MotorS$TS, MotorS$CO, MotorS$DI)
pairs.panels(Cor2)

# Correlation between MSI and variables of Group 3 (Infrastructures)
Cor3 = data.frame(MotorS$MSI, MotorS$SP, MotorS$BI, MotorS$WI, MotorS$LS, MotorS$SC, MotorS$BC, MotorS$TI)
pairs.panels(Cor3)

# Correlation between MSI and variables of Group 4 (Surrouding Environment) and 5 (Others)
Cor45 = data.frame(MotorS$MSI, MotorS$WE, MotorS$WK, MotorS$CA, MotorS$MR, MotorS$NBR, MotorS$BUB, MotorS$SBM, MotorS$SBS)
pairs.panels(Cor45)

# Correlation between MSI and variables of Group 6 (Perception of Bus)
Cor6 = data.frame(MotorS$MSI, MotorS$BST, MotorS$BSC, MotorS$BSA, MotorS$BRE, MotorS$BCO, MotorS$BIN, MotorS$BHE, MotorS$BEN, MotorS$BRC, MotorS$BWE)
pairs.panels(Cor6)

3. Binary Model

# Model with multi-variables (in t.c) --> 21 variables
mg = glm(MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI, family = binomial, data = MotorS)
mg
## 
## Call:  glm(formula = MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + 
##     LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + 
##     TI, family = binomial, data = MotorS)
## 
## Coefficients:
##            (Intercept)                 AG25-60                   AG>60  
##             -1.749e+01              -1.375e-01               1.945e+00  
##     OCOfficers/Workers  OCHousewife/Unemployed            OCFree labor  
##             -3.472e-01               2.321e-01              -3.092e-01  
##               OCOthers                   MOYes                     NM1  
##              7.343e-01              -1.427e-01               1.598e+01  
##                  NM>=2      TPPicking Children    TPEntertainment/Food  
##              1.599e+01              -7.910e-01              -6.188e-01  
##               TPOthers                 FRtwice             FR>=3 times  
##             -9.471e-01               4.573e-01               1.152e+00  
##                  TSYes                   SPYes            SPDon't know  
##             -5.712e-01              -3.686e-01              -4.860e-01  
##         WIAt Bus Stops     WIInternet/Facebook          WIFrom Friends  
##              7.663e-01               1.852e+00               1.498e+00  
##               WIOthers                   LSYes                   BCYes  
##              1.229e+00              -6.671e-01              -5.902e-01  
##                  CAYes               BSTNormal                BSTAgree  
##             -5.145e-01              -5.892e-01              -4.991e-01  
##              BSCNormal                BSCAgree               BSANormal  
##             -5.346e-01              -5.667e-01               2.374e-01  
##               BSAAgree               BHENormal                BHEAgree  
##              9.654e-01              -3.817e-01               5.094e-02  
##          MRComfortable             MRLow price         MRAccessibility  
##             -4.249e-01              -2.663e-01               3.595e-01  
##          MRReliability                MRothers                  BUBYes  
##             -1.049e+00              -1.302e+00               5.175e-01  
##              SBMNormal                SBMAgree               SBSNormal  
##              5.631e-01               2.020e+00              -3.276e-01  
##               SBSAgree                      TI  
##              7.508e-01              -4.722e-04  
## 
## Degrees of Freedom: 515 Total (i.e. Null);  472 Residual
## Null Deviance:       571.4 
## Residual Deviance: 412.7     AIC: 500.7
summary(mg) 
## 
## Call:
## glm(formula = MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + 
##     LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + 
##     TI, family = binomial, data = MotorS)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.68669  -0.57742  -0.36532  -0.00083   2.92882  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -1.749e+01  8.037e+02  -0.022  0.98263    
## AG25-60                -1.375e-01  4.484e-01  -0.307  0.75904    
## AG>60                   1.945e+00  1.064e+00   1.827  0.06766 .  
## OCOfficers/Workers     -3.472e-01  5.110e-01  -0.680  0.49680    
## OCHousewife/Unemployed  2.321e-01  7.494e-01   0.310  0.75680    
## OCFree labor           -3.092e-01  4.877e-01  -0.634  0.52616    
## OCOthers                7.343e-01  7.214e-01   1.018  0.30873    
## MOYes                  -1.427e-01  4.965e-01  -0.287  0.77379    
## NM1                     1.598e+01  8.037e+02   0.020  0.98414    
## NM>=2                   1.599e+01  8.037e+02   0.020  0.98413    
## TPPicking Children     -7.910e-01  4.675e-01  -1.692  0.09066 .  
## TPEntertainment/Food   -6.188e-01  3.911e-01  -1.582  0.11360    
## TPOthers               -9.471e-01  1.198e+00  -0.791  0.42911    
## FRtwice                 4.573e-01  1.197e+00   0.382  0.70247    
## FR>=3 times             1.152e+00  8.573e-01   1.344  0.17895    
## TSYes                  -5.712e-01  2.693e-01  -2.121  0.03396 *  
## SPYes                  -3.686e-01  5.883e-01  -0.627  0.53097    
## SPDon't know           -4.860e-01  7.534e-01  -0.645  0.51890    
## WIAt Bus Stops          7.663e-01  4.079e-01   1.879  0.06028 .  
## WIInternet/Facebook     1.852e+00  4.601e-01   4.025 5.71e-05 ***
## WIFrom Friends          1.498e+00  5.181e-01   2.892  0.00383 ** 
## WIOthers                1.229e+00  5.712e-01   2.152  0.03139 *  
## LSYes                  -6.671e-01  3.272e-01  -2.039  0.04150 *  
## BCYes                  -5.902e-01  2.824e-01  -2.090  0.03663 *  
## CAYes                  -5.145e-01  2.936e-01  -1.753  0.07968 .  
## BSTNormal              -5.892e-01  3.912e-01  -1.506  0.13204    
## BSTAgree               -4.991e-01  3.929e-01  -1.270  0.20407    
## BSCNormal              -5.346e-01  5.332e-01  -1.003  0.31602    
## BSCAgree               -5.667e-01  4.867e-01  -1.164  0.24426    
## BSANormal               2.374e-01  5.706e-01   0.416  0.67738    
## BSAAgree                9.654e-01  5.654e-01   1.707  0.08777 .  
## BHENormal              -3.817e-01  3.393e-01  -1.125  0.26060    
## BHEAgree                5.094e-02  3.333e-01   0.153  0.87854    
## MRComfortable          -4.249e-01  6.778e-01  -0.627  0.53075    
## MRLow price            -2.663e-01  9.399e-01  -0.283  0.77690    
## MRAccessibility         3.595e-01  6.947e-01   0.518  0.60479    
## MRReliability          -1.049e+00  8.501e-01  -1.234  0.21736    
## MRothers               -1.302e+00  1.604e+00  -0.812  0.41703    
## BUBYes                  5.175e-01  3.064e-01   1.689  0.09120 .  
## SBMNormal               5.631e-01  3.704e-01   1.520  0.12839    
## SBMAgree                2.020e+00  4.291e-01   4.707 2.51e-06 ***
## SBSNormal              -3.276e-01  3.342e-01  -0.980  0.32694    
## SBSAgree                7.508e-01  4.858e-01   1.546  0.12222    
## TI                     -4.722e-04  1.261e-02  -0.037  0.97014    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 412.66  on 472  degrees of freedom
## AIC: 500.66
## 
## Number of Fisher Scoring iterations: 15
## Remove insignificant variables: Occupation (19), Number.of.Children (21), Car.Certificate (23), Car.Owning (26), Number.of.Motors (28), Number.of.Car (29), Distance (6) --> 12 variables
#mg1 = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
## Way 2: Use function lmr in package "rms" - library(rms) -ml = lrm(MSI ~ GD + AG + OC + NC + MC + CC + MO + NM + TM + TP + TS + SP + WI + BC + CA + BSC + BSA + BRC + MR + BUB + SBM + SBS + TI)

# Find Parsimonious model - Tìm mô hình tối ưu - AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI --> 21 variables
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.5-5)
xvars <- MotorS[, c(34,35,41, 44, 3, 4, 10, 29, 31, 9, 2, 1, 19, 20, 21, 25, 11, 16, 17, 18, 7)]
y <- MotorS$MSI
bma.search <- bic.glm (xvars, y, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search)
## 
## Call:
## bic.glm.data.frame(x = xvars, y = y, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   15  models were selected
##  Best  5  models (cumulative posterior probability =  0.6784 ): 
## 
##                          p!=0    EV        SD       model 1     model 2   
## Intercept                100    -1.752737  0.41813  -1.583e+00  -1.873e+00
## AG                         0.0                                            
##   .25-60                         0.000000  0.00000       .           .    
##   .>60                           0.000000  0.00000       .           .    
## OC                         0.0                                            
##   .Officers/Workers              0.000000  0.00000       .           .    
##   .Housewife/Unemployed          0.000000  0.00000       .           .    
##   .Free labor                    0.000000  0.00000       .           .    
##   .Others                        0.000000  0.00000       .           .    
## MO                         0.0                                            
##   .Yes                           0.000000  0.00000       .           .    
## NM                         0.0                                            
##   .1                             0.000000  0.00000       .           .    
##   .>=2                           0.000000  0.00000       .           .    
## TP                         0.0                                            
##   .Picking Children              0.000000  0.00000       .           .    
##   .Entertainment/Food            0.000000  0.00000       .           .    
##   .Others                        0.000000  0.00000       .           .    
## FR                         1.9                                            
##   .twice                         0.002312  0.15820       .           .    
##   .>=3 times                     0.027007  0.22183       .           .    
## TS                        14.0                                            
##   .Yes                          -0.071426  0.19746       .           .    
## SP                         0.0                                            
##   .Yes                           0.000000  0.00000       .           .    
##   .Don't know                    0.000000  0.00000       .           .    
## WI                        94.9                                            
##   .At Bus Stops                  0.591693  0.32161   6.198e-01   6.499e-01
##   .Internet/Facebook             1.484951  0.47839   1.620e+00   1.489e+00
##   .From Friends                  1.379271  0.50125   1.425e+00   1.444e+00
##   .Others                        0.871717  0.50114   9.161e-01   7.519e-01
## LS                         8.0                                            
##   .Yes                          -0.047928  0.18227       .           .    
## BC                        65.9                                            
##   .Yes                          -0.439201  0.36929  -6.867e-01  -6.296e-01
## CA                        62.0                                            
##   .Yes                          -0.421534  0.38334  -6.961e-01       .    
## BST                        0.0                                            
##    .Normal                       0.000000  0.00000       .           .    
##    .Agree                        0.000000  0.00000       .           .    
## BSC                        0.0                                            
##    .Normal                       0.000000  0.00000       .           .    
##    .Agree                        0.000000  0.00000       .           .    
## BSA                        3.1                                            
##    .Normal                      -0.008968  0.09369       .           .    
##    .Agree                        0.015071  0.10975       .           .    
## BHE                        0.0                                            
##    .Normal                       0.000000  0.00000       .           .    
##    .Agree                        0.000000  0.00000       .           .    
## MR                         0.0                                            
##   .Comfortable                   0.000000  0.00000       .           .    
##   .Low price                     0.000000  0.00000       .           .    
##   .Accessibility                 0.000000  0.00000       .           .    
##   .Reliability                   0.000000  0.00000       .           .    
##   .others                        0.000000  0.00000       .           .    
## BUB                        6.4                                            
##    .Yes                          0.029706  0.13088       .           .    
## SBM                      100.0                                            
##    .Normal                       0.019322  0.27131   2.715e-03   5.202e-02
##    .Agree                        1.773231  0.30597   1.833e+00   1.681e+00
## SBS                        0.0                                            
##    .Normal                       0.000000  0.00000       .           .    
##    .Agree                        0.000000  0.00000       .           .    
## TI                         0.0   0.000000  0.00000       .           .    
##                                                                           
## nVar                                                   4           3      
## BIC                                                 -2.688e+03  -2.685e+03
## post prob                                            0.343       0.118    
##                          model 3     model 4     model 5   
## Intercept                -1.924e+00  -1.411e+00  -2.162e+00
## AG                                                         
##   .25-60                      .           .           .    
##   .>60                        .           .           .    
## OC                                                         
##   .Officers/Workers           .           .           .    
##   .Housewife/Unemployed       .           .           .    
##   .Free labor                 .           .           .    
##   .Others                     .           .           .    
## MO                                                         
##   .Yes                        .           .           .    
## NM                                                         
##   .1                          .           .           .    
##   .>=2                        .           .           .    
## TP                                                         
##   .Picking Children           .           .           .    
##   .Entertainment/Food         .           .           .    
##   .Others                     .           .           .    
## FR                                                         
##   .twice                      .           .           .    
##   .>=3 times                  .           .           .    
## TS                                                         
##   .Yes                        .           .           .    
## SP                                                         
##   .Yes                        .           .           .    
##   .Don't know                 .           .           .    
## WI                                                         
##   .At Bus Stops           6.032e-01   6.783e-01   6.352e-01
##   .Internet/Facebook      1.566e+00   1.546e+00   1.471e+00
##   .From Friends           1.490e+00   1.544e+00   1.514e+00
##   .Others                 1.029e+00   1.006e+00   8.948e-01
## LS                                                         
##   .Yes                        .      -6.503e-01       .    
## BC                                                         
##   .Yes                        .           .           .    
## CA                                                         
##   .Yes                   -6.314e-01  -7.242e-01       .    
## BST                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## BSC                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## BSA                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## BHE                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## MR                                                         
##   .Comfortable                .           .           .    
##   .Low price                  .           .           .    
##   .Accessibility              .           .           .    
##   .Reliability                .           .           .    
##   .others                     .           .           .    
## BUB                                                        
##    .Yes                       .           .           .    
## SBM                                                        
##    .Normal               -1.765e-02  -1.136e-02   1.838e-02
##    .Agree                 1.809e+00   1.834e+00   1.668e+00
## SBS                                                        
##    .Normal                    .           .           .    
##    .Agree                     .           .           .    
## TI                            .           .           .    
##                                                            
## nVar                        3           4           2      
## BIC                      -2.685e+03  -2.684e+03  -2.684e+03
## post prob                 0.094       0.063       0.061
# Remove ns variables (AG, OC, MO, TP, FR, TS, LS, CA, MR) and change DI (7) by TI (8) --> 12 variables
xvars1 <- MotorS[, c(44, 29, 31, 2, 19, 20, 21, 25, 16, 17, 18, 7)]
y1 <- MotorS$MSI
bma.search1 <- bic.glm (xvars1, y1, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search1)
## 
## Call:
## bic.glm.data.frame(x = xvars1, y = y1, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   6  models were selected
##  Best  5  models (cumulative posterior probability =  0.9741 ): 
## 
##                       p!=0    EV       SD       model 1     model 2   
## Intercept             100    -2.01437  0.31491  -1.873e+00  -2.162e+00
## NM                      0.0                                           
##   .1                          0.00000  0.00000       .           .    
##   .>=2                        0.00000  0.00000       .           .    
## SP                      0.0                                           
##   .Yes                        0.00000  0.00000       .           .    
##   .Don't know                 0.00000  0.00000       .           .    
## WI                    100.0                                           
##   .At Bus Stops               0.63184  0.29730   6.499e-01   6.352e-01
##   .Internet/Facebook          1.46875  0.33345   1.489e+00   1.471e+00
##   .From Friends               1.44774  0.39615   1.444e+00   1.514e+00
##   .Others                     0.78514  0.45320   7.519e-01   8.948e-01
## BC                     67.2                                           
##   .Yes                       -0.42544  0.35331  -6.296e-01       .    
## BST                     2.6                                           
##    .Normal                   -0.02005  0.13299       .           .    
##    .Agree                    -0.01151  0.08613       .           .    
## BSC                     0.0                                           
##    .Normal                    0.00000  0.00000       .           .    
##    .Agree                     0.00000  0.00000       .           .    
## BSA                     3.4                                           
##    .Normal                   -0.01030  0.09915       .           .    
##    .Agree                     0.01503  0.10872       .           .    
## BHE                     0.0                                           
##    .Normal                    0.00000  0.00000       .           .    
##    .Agree                     0.00000  0.00000       .           .    
## BUB                    19.3                                           
##    .Yes                       0.09150  0.21839       .           .    
## SBM                   100.0                                           
##    .Normal                    0.03610  0.27425   5.202e-02   1.838e-02
##    .Agree                     1.66220  0.29412   1.681e+00   1.668e+00
## SBS                     0.0                                           
##    .Normal                    0.00000  0.00000       .           .    
##    .Agree                     0.00000  0.00000       .           .    
## TI                      0.0   0.00000  0.00000       .           .    
##                                                                       
## nVar                                               3           2      
## BIC                                             -2.685e+03  -2.684e+03
## post prob                                        0.493       0.255    
##                       model 3     model 4     model 5   
## Intercept             -2.106e+00  -2.390e+00  -2.075e+00
## NM                                                      
##   .1                       .           .           .    
##   .>=2                     .           .           .    
## SP                                                      
##   .Yes                     .           .           .    
##   .Don't know              .           .           .    
## WI                                                      
##   .At Bus Stops        5.817e-01   5.561e-01   6.564e-01
##   .Internet/Facebook   1.405e+00   1.368e+00   1.587e+00
##   .From Friends        1.365e+00   1.416e+00   1.467e+00
##   .Others              6.725e-01   8.001e-01   8.648e-01
## BC                                                      
##   .Yes                -6.178e-01       .      -7.251e-01
## BST                                                     
##    .Normal                 .           .           .    
##    .Agree                  .           .           .    
## BSC                                                     
##    .Normal                 .           .           .    
##    .Agree                  .           .           .    
## BSA                                                     
##    .Normal                 .           .      -3.066e-01
##    .Agree                  .           .       4.472e-01
## BHE                                                     
##    .Normal                 .           .           .    
##    .Agree                  .           .           .    
## BUB                                                     
##    .Yes                4.664e-01   4.888e-01       .    
## SBM                                                     
##    .Normal            -1.303e-02  -5.210e-02   6.190e-02
##    .Agree              1.604e+00   1.580e+00   1.568e+00
## SBS                                                     
##    .Normal                 .           .           .    
##    .Agree                  .           .           .    
## TI                         .           .           .    
##                                                         
## nVar                     4           3           4      
## BIC                   -2.683e+03  -2.682e+03  -2.680e+03
## post prob              0.119       0.073       0.034
#imageplot.bma(bma.search1)

## Model 1 from bma.search1 : Remove ns variables (AG, OC, MO, TP, FR, TS, LS, CA, MR) and change DI (7) by TI (8) --> 12 variables
mg1 = glm(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, family = binomial, data = MotorS)
mg1
## 
## Call:  glm(formula = MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + 
##     BUB + SBM + SBS + TI, family = binomial, data = MotorS)
## 
## Coefficients:
##         (Intercept)                  NM1                NM>=2  
##           -16.38213             14.26799             14.31772  
##               SPYes         SPDon't know       WIAt Bus Stops  
##            -0.48827             -0.54986              0.72698  
## WIInternet/Facebook       WIFrom Friends             WIOthers  
##             1.73430              1.35463              0.95725  
##               BCYes            BSTNormal             BSTAgree  
##            -0.62908             -0.68942             -0.64951  
##           BSCNormal             BSCAgree            BSANormal  
##            -0.29375             -0.31145              0.20265  
##            BSAAgree            BHENormal             BHEAgree  
##             0.96808             -0.21547              0.15164  
##              BUBYes            SBMNormal             SBMAgree  
##             0.49882              0.40746              1.48605  
##           SBSNormal             SBSAgree                   TI  
##            -0.20278              1.01296              0.00233  
## 
## Degrees of Freedom: 515 Total (i.e. Null);  492 Residual
## Null Deviance:       571.4 
## Residual Deviance: 458.2     AIC: 506.2
summary(mg1)
## 
## Call:
## glm(formula = MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + 
##     BUB + SBM + SBS + TI, family = binomial, data = MotorS)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.80649  -0.69419  -0.43485  -0.00113   2.63043  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -16.38213  552.17363  -0.030  0.97633    
## NM1                  14.26799  552.17330   0.026  0.97939    
## NM>=2                14.31772  552.17321   0.026  0.97931    
## SPYes                -0.48827    0.51806  -0.942  0.34594    
## SPDon't know         -0.54986    0.68980  -0.797  0.42538    
## WIAt Bus Stops        0.72698    0.36374   1.999  0.04565 *  
## WIInternet/Facebook   1.73429    0.40686   4.263 2.02e-05 ***
## WIFrom Friends        1.35463    0.45332   2.988  0.00281 ** 
## WIOthers              0.95725    0.50130   1.910  0.05619 .  
## BCYes                -0.62908    0.24950  -2.521  0.01169 *  
## BSTNormal            -0.68942    0.35610  -1.936  0.05286 .  
## BSTAgree             -0.64951    0.34536  -1.881  0.06002 .  
## BSCNormal            -0.29375    0.48745  -0.603  0.54676    
## BSCAgree             -0.31145    0.43655  -0.713  0.47558    
## BSANormal             0.20265    0.53556   0.378  0.70515    
## BSAAgree              0.96808    0.52099   1.858  0.06315 .  
## BHENormal            -0.21547    0.30297  -0.711  0.47698    
## BHEAgree              0.15164    0.29906   0.507  0.61212    
## BUBYes                0.49882    0.27314   1.826  0.06781 .  
## SBMNormal             0.40746    0.33913   1.201  0.22956    
## SBMAgree              1.48605    0.36085   4.118 3.82e-05 ***
## SBSNormal            -0.20278    0.30871  -0.657  0.51128    
## SBSAgree              1.01296    0.43072   2.352  0.01868 *  
## TI                    0.00233    0.01126   0.207  0.83612    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 458.17  on 492  degrees of freedom
## AIC: 506.17
## 
## Number of Fisher Scoring iterations: 14
## CHON: Model 2 --> So với 21 biến ở mg bỏ ns: và có cor gồm:  OC, MO, NM, FR, SP, BST, BSC, BHE, MR, BSB --> Thử và chọn theo AIC min
mg2 = glm(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial, data = MotorS)
mg2
## 
## Call:  glm(formula = MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial, 
##     data = MotorS)
## 
## Coefficients:
##          (Intercept)               AG25-60                 AG>60  
##             -0.75079              -0.07254               2.30511  
##                TSYes        WIAt Bus Stops   WIInternet/Facebook  
##             -0.46179               0.75331               1.67211  
##       WIFrom Friends              WIOthers                 LSYes  
##              1.54306               1.11330              -0.60908  
##                BCYes                 CAYes             SBMNormal  
##             -0.70517              -0.83271               0.04398  
##             SBMAgree    TPPicking Children  TPEntertainment/Food  
##              2.02537              -0.58872              -0.79116  
##             TPOthers  
##             -0.71397  
## 
## Degrees of Freedom: 515 Total (i.e. Null);  500 Residual
## Null Deviance:       571.4 
## Residual Deviance: 457.1     AIC: 489.1
summary(mg2)
## 
## Call:
## glm(formula = MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial, 
##     data = MotorS)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1109  -0.6696  -0.4643  -0.1752   2.6984  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.75079    0.40307  -1.863 0.062508 .  
## AG25-60              -0.07254    0.25689  -0.282 0.777664    
## AG>60                 2.30511    0.83491   2.761 0.005764 ** 
## TSYes                -0.46179    0.24422  -1.891 0.058643 .  
## WIAt Bus Stops        0.75331    0.31163   2.417 0.015637 *  
## WIInternet/Facebook   1.67211    0.35823   4.668 3.04e-06 ***
## WIFrom Friends        1.54306    0.41987   3.675 0.000238 ***
## WIOthers              1.11330    0.49058   2.269 0.023246 *  
## LSYes                -0.60908    0.28641  -2.127 0.033451 *  
## BCYes                -0.70517    0.24843  -2.838 0.004533 ** 
## CAYes                -0.83271    0.25749  -3.234 0.001221 ** 
## SBMNormal             0.04398    0.27512   0.160 0.872991    
## SBMAgree              2.02537    0.32084   6.313 2.74e-10 ***
## TPPicking Children   -0.58872    0.41530  -1.418 0.156319    
## TPEntertainment/Food -0.79116    0.35106  -2.254 0.024221 *  
## TPOthers             -0.71397    0.99890  -0.715 0.474759    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 457.08  on 500  degrees of freedom
## AIC: 489.08
## 
## Number of Fisher Scoring iterations: 5
## Calculate OR (odd ratio)
exp(mg2$coefficients)
##          (Intercept)              AG25-60                AG>60 
##            0.4719945            0.9300307           10.0252841 
##                TSYes       WIAt Bus Stops  WIInternet/Facebook 
##            0.6301538            2.1240088            5.3234125 
##       WIFrom Friends             WIOthers                LSYes 
##            4.6788976            3.0443939            0.5438493 
##                BCYes                CAYes            SBMNormal 
##            0.4940226            0.4348711            1.0449626 
##             SBMAgree   TPPicking Children TPEntertainment/Food 
##            7.5789143            0.5550382            0.4533186 
##             TPOthers 
##            0.4896951
exp(coef(mg2))
##          (Intercept)              AG25-60                AG>60 
##            0.4719945            0.9300307           10.0252841 
##                TSYes       WIAt Bus Stops  WIInternet/Facebook 
##            0.6301538            2.1240088            5.3234125 
##       WIFrom Friends             WIOthers                LSYes 
##            4.6788976            3.0443939            0.5438493 
##                BCYes                CAYes            SBMNormal 
##            0.4940226            0.4348711            1.0449626 
##             SBMAgree   TPPicking Children TPEntertainment/Food 
##            7.5789143            0.5550382            0.4533186 
##             TPOthers 
##            0.4896951
library(epiDisplay)
## Loading required package: foreign
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: nnet
## 
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
## 
##     alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
## 
##     alpha
logistic.display(mg2)
## 
## Logistic regression predicting MSI : Yes vs No 
##  
##                       crude OR(95%CI)   adj. OR(95%CI)     P(Wald's test)
## AG: ref.=<=24                                                            
##    25-60              0.71 (0.47,1.06)  0.93 (0.56,1.54)   0.778         
##    >60                2.1 (0.55,8.1)    10.03 (1.95,51.5)  0.006         
##                                                                          
## TS: Yes vs No         0.68 (0.45,1.02)  0.63 (0.39,1.02)   0.059         
##                                                                          
## WI: ref.=No                                                              
##    At Bus Stops       2.57 (1.5,4.42)   2.12 (1.15,3.91)   0.016         
##    Internet/Facebook  4.83 (2.62,8.91)  5.32 (2.64,10.74)  < 0.001       
##    From Friends       5.2 (2.51,10.77)  4.68 (2.05,10.65)  < 0.001       
##    Others             2.77 (1.2,6.38)   3.04 (1.16,7.96)   0.023         
##                                                                          
## LS: Yes vs No         0.65 (0.4,1.03)   0.54 (0.31,0.95)   0.033         
##                                                                          
## BC: Yes vs No         0.53 (0.35,0.8)   0.49 (0.3,0.8)     0.005         
##                                                                          
## CA: Yes vs No         0.79 (0.53,1.19)  0.43 (0.26,0.72)   0.001         
##                                                                          
## SBM: ref.=Don't agree                                                    
##    Normal             1.18 (0.71,1.93)  1.04 (0.61,1.79)   0.873         
##    Agree              6.15 (3.6,10.53)  7.58 (4.04,14.21)  < 0.001       
##                                                                          
## TP: ref.=Work/Study                                                      
##    Picking Children   0.57 (0.29,1.1)   0.56 (0.25,1.25)   0.156         
##    Entertainment/Food 0.6 (0.33,1.08)   0.45 (0.23,0.9)    0.024         
##    Others             0.77 (0.16,3.76)  0.49 (0.07,3.47)   0.475         
##                                                                          
##                       P(LR-test)
## AG: ref.=<=24         0.029     
##    25-60                        
##    >60                          
##                                 
## TS: Yes vs No         0.057     
##                                 
## WI: ref.=No           < 0.001   
##    At Bus Stops                 
##    Internet/Facebook            
##    From Friends                 
##    Others                       
##                                 
## LS: Yes vs No         0.035     
##                                 
## BC: Yes vs No         0.004     
##                                 
## CA: Yes vs No         < 0.001   
##                                 
## SBM: ref.=Don't agree < 0.001   
##    Normal                       
##    Agree                        
##                                 
## TP: ref.=Work/Study   0.072     
##    Picking Children             
##    Entertainment/Food           
##    Others                       
##                                 
## Log-likelihood = -228.5402
## No. of observations = 516
## AIC value = 489.0804
## Thay vì sử dụng hàm glm dùng cách 2 với hàm lrm trong gói rms: mg2-8 variables, mg1-19 variables, mg-23 variables
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
## 
##     dotplot
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:table1':
## 
##     label, label<-, units
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## Registered S3 method overwritten by 'rms':
##   method       from      
##   print.lrtest epiDisplay
## 
## Attaching package: 'rms'
## The following object is masked from 'package:epiDisplay':
## 
##     lrtest
## The following objects are masked from 'package:car':
## 
##     Predict, vif
mg2.c2 = lrm(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, data = MotorS)
mg2.c2
## Logistic Regression Model
##  
##  lrm(formula = MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, data = MotorS)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           516    LR chi2     114.29    R2       0.297    C       0.792    
##   No           391    d.f.            15    g        1.393    Dxy     0.584    
##   Yes          125    Pr(> chi2) <0.0001    gr       4.025    gamma   0.586    
##  max |deriv| 2e-11                          gp       0.215    tau-a   0.215    
##                                             Brier    0.140                     
##  
##                        Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept             -0.7508 0.4031 -1.86  0.0625  
##  AG=25-60              -0.0725 0.2569 -0.28  0.7777  
##  AG=>60                 2.3051 0.8349  2.76  0.0058  
##  TS=Yes                -0.4618 0.2442 -1.89  0.0586  
##  WI=At Bus Stops        0.7533 0.3116  2.42  0.0156  
##  WI=Internet/Facebook   1.6721 0.3582  4.67  <0.0001 
##  WI=From Friends        1.5431 0.4199  3.68  0.0002  
##  WI=Others              1.1133 0.4906  2.27  0.0232  
##  LS=Yes                -0.6091 0.2864 -2.13  0.0335  
##  BC=Yes                -0.7052 0.2484 -2.84  0.0045  
##  CA=Yes                -0.8327 0.2575 -3.23  0.0012  
##  SBM=Normal             0.0440 0.2751  0.16  0.8730  
##  SBM=Agree              2.0254 0.3208  6.31  <0.0001 
##  TP=Picking Children   -0.5887 0.4153 -1.42  0.1563  
##  TP=Entertainment/Food -0.7912 0.3511 -2.25  0.0242  
##  TP=Others             -0.7140 0.9989 -0.71  0.4748  
## 
mg1.c2 = lrm(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, data = MotorS)
mg1.c2
## Logistic Regression Model
##  
##  lrm(formula = MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + 
##      BUB + SBM + SBS + TI, data = MotorS)
##  
##                        Model Likelihood    Discrimination    Rank Discrim.    
##                              Ratio Test           Indexes          Indexes    
##  Obs          516    LR chi2     113.20    R2       0.294    C       0.797    
##   No          391    d.f.            23    g        1.523    Dxy     0.594    
##   Yes         125    Pr(> chi2) <0.0001    gr       4.588    gamma   0.594    
##  max |deriv| 0.03                          gp       0.216    tau-a   0.218    
##                                            Brier    0.145                     
##  
##                       Coef    S.E.    Wald Z Pr(>|Z|)
##  Intercept            -9.3693 27.3434 -0.34  0.7319  
##  NM=1                  7.2551 27.3366  0.27  0.7907  
##  NM=>=2                7.3049 27.3348  0.27  0.7893  
##  SP=Yes               -0.4883  0.5181 -0.94  0.3459  
##  SP=Don't know        -0.5499  0.6898 -0.80  0.4254  
##  WI=At Bus Stops       0.7270  0.3637  2.00  0.0456  
##  WI=Internet/Facebook  1.7343  0.4069  4.26  <0.0001 
##  WI=From Friends       1.3546  0.4533  2.99  0.0028  
##  WI=Others             0.9572  0.5013  1.91  0.0562  
##  BC=Yes               -0.6291  0.2495 -2.52  0.0117  
##  BST=Normal           -0.6894  0.3561 -1.94  0.0529  
##  BST=Agree            -0.6495  0.3454 -1.88  0.0600  
##  BSC=Normal           -0.2937  0.4874 -0.60  0.5468  
##  BSC=Agree            -0.3114  0.4366 -0.71  0.4756  
##  BSA=Normal            0.2026  0.5356  0.38  0.7051  
##  BSA=Agree             0.9681  0.5210  1.86  0.0631  
##  BHE=Normal           -0.2155  0.3030 -0.71  0.4770  
##  BHE=Agree             0.1516  0.2991  0.51  0.6121  
##  BUB=Yes               0.4988  0.2731  1.83  0.0678  
##  SBM=Normal            0.4075  0.3391  1.20  0.2296  
##  SBM=Agree             1.4860  0.3609  4.12  <0.0001 
##  SBS=Normal           -0.2028  0.3087 -0.66  0.5113  
##  SBS=Agree             1.0130  0.4307  2.35  0.0187  
##  TI                    0.0023  0.0113  0.21  0.8361  
## 
mg.c2 = lrm(MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI, data = MotorS)
mg.c2
## Logistic Regression Model
##  
##  lrm(formula = MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + 
##      LS + BC + CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + 
##      TI, data = MotorS)
##  
##                        Model Likelihood    Discrimination    Rank Discrim.    
##                              Ratio Test           Indexes          Indexes    
##  Obs          516    LR chi2     158.71    R2       0.395    C       0.854    
##   No          391    d.f.            43    g        1.935    Dxy     0.708    
##   Yes         125    Pr(> chi2) <0.0001    gr       6.921    gamma   0.708    
##  max |deriv| 0.03                          gp       0.250    tau-a   0.260    
##                                            Brier    0.126                     
##  
##                          Coef    S.E.    Wald Z Pr(>|Z|)
##  Intercept               -9.5085 24.5401 -0.39  0.6984  
##  AG=25-60                -0.1375  0.4484 -0.31  0.7590  
##  AG=>60                   1.9446  1.0642  1.83  0.0677  
##  OC=Officers/Workers     -0.3472  0.5110 -0.68  0.4968  
##  OC=Housewife/Unemployed  0.2321  0.7494  0.31  0.7568  
##  OC=Free labor           -0.3092  0.4877 -0.63  0.5262  
##  OC=Others                0.7343  0.7214  1.02  0.3087  
##  MO=Yes                  -0.1427  0.4965 -0.29  0.7738  
##  NM=1                     7.9915 24.5100  0.33  0.7444  
##  NM=>=2                   8.0031 24.5068  0.33  0.7440  
##  TP=Picking Children     -0.7910  0.4675 -1.69  0.0907  
##  TP=Entertainment/Food   -0.6188  0.3911 -1.58  0.1136  
##  TP=Others               -0.9471  1.1978 -0.79  0.4291  
##  FR=twice                 0.4573  1.1971  0.38  0.7025  
##  FR=>=3 times             1.1522  0.8573  1.34  0.1790  
##  TS=Yes                  -0.5712  0.2693 -2.12  0.0340  
##  SP=Yes                  -0.3686  0.5883 -0.63  0.5310  
##  SP=Don't know           -0.4860  0.7534 -0.65  0.5189  
##  WI=At Bus Stops          0.7663  0.4079  1.88  0.0603  
##  WI=Internet/Facebook     1.8519  0.4601  4.02  <0.0001 
##  WI=From Friends          1.4981  0.5181  2.89  0.0038  
##  WI=Others                1.2293  0.5712  2.15  0.0314  
##  LS=Yes                  -0.6671  0.3272 -2.04  0.0415  
##  BC=Yes                  -0.5902  0.2824 -2.09  0.0366  
##  CA=Yes                  -0.5145  0.2936 -1.75  0.0797  
##  BST=Normal              -0.5892  0.3912 -1.51  0.1320  
##  BST=Agree               -0.4991  0.3929 -1.27  0.2041  
##  BSC=Normal              -0.5346  0.5332 -1.00  0.3160  
##  BSC=Agree               -0.5667  0.4867 -1.16  0.2443  
##  BSA=Normal               0.2374  0.5706  0.42  0.6774  
##  BSA=Agree                0.9654  0.5654  1.71  0.0878  
##  BHE=Normal              -0.3817  0.3393 -1.12  0.2606  
##  BHE=Agree                0.0509  0.3333  0.15  0.8785  
##  MR=Comfortable          -0.4249  0.6778 -0.63  0.5307  
##  MR=Low price            -0.2663  0.9399 -0.28  0.7769  
##  MR=Accessibility         0.3595  0.6947  0.52  0.6048  
##  MR=Reliability          -1.0487  0.8501 -1.23  0.2174  
##  MR=others               -1.3017  1.6040 -0.81  0.4170  
##  BUB=Yes                  0.5175  0.3064  1.69  0.0912  
##  SBM=Normal               0.5631  0.3704  1.52  0.1284  
##  SBM=Agree                2.0200  0.4291  4.71  <0.0001 
##  SBS=Normal              -0.3276  0.3342 -0.98  0.3269  
##  SBS=Agree                0.7508  0.4858  1.55  0.1222  
##  TI                      -0.0005  0.0126 -0.04  0.9701  
## 

4. Test models

# 4.1. Deviance - càng nhỏ càng tốt (sai khác giữa giá trị thực tế và giá trị ước lượng theo mô hình): mg2 tốt nhất
deviance(mg)
## [1] 412.662
deviance(mg1)
## [1] 458.174
deviance(mg2)
## [1] 457.0804
# 4.2. LRT (Likelihood Ratio Test) - Chênh lệch deviance giữa mô hình đơn giản và mô hình phức tạp --> có ý nghĩa thống kê - đều có ý nghĩa tk
library(rms)
lrtest(mg2, mg)
## 
## Model 1: MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP
## Model 2: MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + 
##     CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI
## 
##  L.R. Chisq        d.f.           P 
## 44.41840539 28.00000000  0.02524525
lrtest(mg,mg2)
## 
## Model 1: MSI ~ AG + OC + MO + NM + TP + FR + TS + SP + WI + LS + BC + 
##     CA + BST + BSC + BSA + BHE + MR + BUB + SBM + SBS + TI
## Model 2: MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP
## 
##  L.R. Chisq        d.f.           P 
## 44.41840539 28.00000000  0.02524525
lrtest(mg2, mg1)
## 
## Model 1: MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP
## Model 2: MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + 
##     SBS + TI
## 
## L.R. Chisq       d.f.          P 
##  1.0935863  8.0000000  0.9975851
# 4.3. AIC (Aikake Information Criterion) - càng nhỏ càng tốt - mg2 tốt nhất
AIC(mg)
## [1] 500.662
AIC(mg1)
## [1] 506.174
AIC(mg2)
## [1] 489.0804
## Calculate MacFadden's R2 voi packages "pscl" - mg tốt nhất
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(mg2)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -228.5402071 -285.6872179  114.2940216    0.2000335    0.1986841    0.2967406
pR2(mg1)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -229.0870002 -285.6872179  113.2004353    0.1981195    0.1969840    0.2942015
pR2(mg)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -206.3310044 -285.6872179  158.7124270    0.2777731    0.2647776    0.3954531
# 4.4. Calcule the number of observation that model predict correctly = Accuracy (Độ chính xác của mô hình)
## Model mg2
contrasts(MotorS$MSI) # Qs là không chuyển đổi nhưng phân loại thành chuyển đổi thì gán giá trị là 0 và ngược lại
##     Yes
## No    0
## Yes   1
dim(MotorS)
## [1] 516  45
glm.probs <- predict(mg2, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg2 (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng). Có thể dùng hàm probmg2 <- fitted(mg2), head(probmg2), tail(probmg2)
glm.probs[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên 
##          2          3          4          5          8          9         10 
## 0.07399812 0.29844325 0.14510373 0.10549728 0.39616094 0.28094218 0.29249884 
##         13         15         16 
## 0.21219585 0.51293946 0.64246947
glm.pred <- rep("No", 516)
glm.pred[glm.probs >= 0.5] = "Yes"
table(glm.pred, MotorS$MSI)
##         
## glm.pred  No Yes
##      No  374  74
##      Yes  17  51
mean(glm.pred == MotorS$MSI) # Tỷ lệ dự báo chính xác của mô hình 80% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-82,36 = 17,64%
## [1] 0.8236434
round(prop.table(table(MotorS$MSI))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không chuyển đổi xe buýt 75.78% < 82,36% --> KL: mức độ chính xác dự báo của mô hình 2 > lớp có tỷ lệ cao nhất ở biến phụ thuộc
## 
##    No   Yes 
## 75.78 24.22
## Model mg
contrasts(MotorS$MSI)
##     Yes
## No    0
## Yes   1
glm.probs.mg1 <- predict(mg1, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng)
glm.probs.mg1[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên 
##          2          3          4          5          8          9         10 
## 0.07230818 0.43608906 0.10147891 0.06051944 0.33375020 0.22061850 0.54331774 
##         13         15         16 
## 0.14091064 0.60090204 0.52938246
glm.pred.mg1 <- rep("No", 516)
glm.pred.mg1[glm.probs.mg1 >= 0.5] = "Yes"
table(glm.pred.mg1, MotorS$MSI)
##             
## glm.pred.mg1  No Yes
##          No  368  85
##          Yes  23  40
mean(glm.pred.mg1 == MotorS$MSI) # Tỷ lệ dự báo chính xác của mô hình 81% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-79,1 = 20,9%
## [1] 0.7906977
round(prop.table(table(MotorS$MSI))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không chuyển đổi sang xe buýt 73.2% < 81% --> KL: mức độ chính xác dự báo của mô hình > lớp có tỷ lệ cao nhất ở biến phụ thuộc
## 
##    No   Yes 
## 75.78 24.22
# 4.5. Calculate Sensitivity - Độ nhạy
374/(374+74) # Đạt 83,5% --> Rất lý tưởng: Mô hình 2 xác định tới 81% người không chuyển đổi sang xe buýt
## [1] 0.8348214
# 4.6. Calculate Specificity - Độ đặc hiệu
51/(51+17) # Đạt 75% --> Mô hình 2 có khả năng dự báo 63,5% người chuyển đổi sang xe buýt
## [1] 0.75
# 4.7. Hosmer-Lemeshow = Goodness of fit test - Kiểm định sự phù hợp của mô hình với library(MKmisc) và hàm HLgof.test(fit = fitted(mg2), obs = BSs$MSI) # trong package "MKmisc" (Version 3.6.3) : Nếu p-value < 0.05 --> MH không phù hợp --> Ít sử dụng
# 4.8. Kiểm tra tính phổ quát của mô hình
   ## Chạy lại hai mô hình với hàm train thuộc package "caret"
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
## The following object is masked from 'package:purrr':
## 
##     lift
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10, classProbs = TRUE, summaryFunction = defaultSummary) # sử dụng kiểm tra chéo 10 lớp (number=10), lặp lại 10 lần (repeats=10) với lựa chọn method="repeatedcv, lựa chọn classProbs = TRUE áp dụng cho mô hình phân loại và lựa chọn summaryFunction = defaultSummary để chỉ thị R tính toán chỉ 2 thông tin về phẩm chất của mô hình là Accuracy và Kappa.
mg1train = train(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, data = MotorS, method = "glm", family = "binomial", trControl = ctrl)
summary(mg1train)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.80649  -0.69419  -0.43485  -0.00113   2.63043  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -16.38213  552.17363  -0.030  0.97633    
## NM1                    14.26799  552.17330   0.026  0.97939    
## `NM>=2`                14.31772  552.17321   0.026  0.97931    
## SPYes                  -0.48827    0.51806  -0.942  0.34594    
## `SPDon't know`         -0.54986    0.68980  -0.797  0.42538    
## `WIAt Bus Stops`        0.72698    0.36374   1.999  0.04565 *  
## `WIInternet/Facebook`   1.73429    0.40686   4.263 2.02e-05 ***
## `WIFrom Friends`        1.35463    0.45332   2.988  0.00281 ** 
## WIOthers                0.95725    0.50130   1.910  0.05619 .  
## BCYes                  -0.62908    0.24950  -2.521  0.01169 *  
## BSTNormal              -0.68942    0.35610  -1.936  0.05286 .  
## BSTAgree               -0.64951    0.34536  -1.881  0.06002 .  
## BSCNormal              -0.29375    0.48745  -0.603  0.54676    
## BSCAgree               -0.31145    0.43655  -0.713  0.47558    
## BSANormal               0.20265    0.53556   0.378  0.70515    
## BSAAgree                0.96808    0.52099   1.858  0.06315 .  
## BHENormal              -0.21547    0.30297  -0.711  0.47698    
## BHEAgree                0.15164    0.29906   0.507  0.61212    
## BUBYes                  0.49882    0.27314   1.826  0.06781 .  
## SBMNormal               0.40746    0.33913   1.201  0.22956    
## SBMAgree                1.48605    0.36085   4.118 3.82e-05 ***
## SBSNormal              -0.20278    0.30871  -0.657  0.51128    
## SBSAgree                1.01296    0.43072   2.352  0.01868 *  
## TI                      0.00233    0.01126   0.207  0.83612    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 458.17  on 492  degrees of freedom
## AIC: 506.17
## 
## Number of Fisher Scoring iterations: 14
mg2train = train(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, data = MotorS, method = "glm", family = "binomial", trControl = ctrl)
summary(mg2train)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1109  -0.6696  -0.4643  -0.1752   2.6984  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.75079    0.40307  -1.863 0.062508 .  
## `AG25-60`              -0.07254    0.25689  -0.282 0.777664    
## `AG>60`                 2.30511    0.83491   2.761 0.005764 ** 
## TSYes                  -0.46179    0.24422  -1.891 0.058643 .  
## `WIAt Bus Stops`        0.75331    0.31163   2.417 0.015637 *  
## `WIInternet/Facebook`   1.67211    0.35823   4.668 3.04e-06 ***
## `WIFrom Friends`        1.54306    0.41987   3.675 0.000238 ***
## WIOthers                1.11330    0.49058   2.269 0.023246 *  
## LSYes                  -0.60908    0.28641  -2.127 0.033451 *  
## BCYes                  -0.70517    0.24843  -2.838 0.004533 ** 
## CAYes                  -0.83271    0.25749  -3.234 0.001221 ** 
## SBMNormal               0.04398    0.27512   0.160 0.872991    
## SBMAgree                2.02537    0.32084   6.313 2.74e-10 ***
## `TPPicking Children`   -0.58872    0.41530  -1.418 0.156319    
## `TPEntertainment/Food` -0.79116    0.35106  -2.254 0.024221 *  
## TPOthers               -0.71397    0.99890  -0.715 0.474759    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 457.08  on 500  degrees of freedom
## AIC: 489.08
## 
## Number of Fisher Scoring iterations: 5
  ## Các thông tin về khả năng dự báo của mô hình bằng confusionMatrix()
predmg1 <- predict(mg1train, newdata = MotorS)
confusionMatrix(data = predmg1, MotorS$MSI)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  368  85
##        Yes  23  40
##                                         
##                Accuracy : 0.7907        
##                  95% CI : (0.753, 0.825)
##     No Information Rate : 0.7578        
##     P-Value [Acc > NIR] : 0.04332       
##                                         
##                   Kappa : 0.3142        
##                                         
##  Mcnemar's Test P-Value : 4.365e-09     
##                                         
##             Sensitivity : 0.9412        
##             Specificity : 0.3200        
##          Pos Pred Value : 0.8124        
##          Neg Pred Value : 0.6349        
##              Prevalence : 0.7578        
##          Detection Rate : 0.7132        
##    Detection Prevalence : 0.8779        
##       Balanced Accuracy : 0.6306        
##                                         
##        'Positive' Class : No            
## 
predmg2 <- predict(mg2train, newdata = MotorS)
confusionMatrix(data = predmg2, MotorS$MSI)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  374  74
##        Yes  17  51
##                                          
##                Accuracy : 0.8236         
##                  95% CI : (0.788, 0.8556)
##     No Information Rate : 0.7578         
##     P-Value [Acc > NIR] : 0.0001897      
##                                          
##                   Kappa : 0.4314         
##                                          
##  Mcnemar's Test P-Value : 4.348e-09      
##                                          
##             Sensitivity : 0.9565         
##             Specificity : 0.4080         
##          Pos Pred Value : 0.8348         
##          Neg Pred Value : 0.7500         
##              Prevalence : 0.7578         
##          Detection Rate : 0.7248         
##    Detection Prevalence : 0.8682         
##       Balanced Accuracy : 0.6823         
##                                          
##        'Positive' Class : No             
## 
  ## Lấy dataframe với thông tin Accuracy của mô hình
a = mg1train$resample
a = a[, -3] # Trừ cột resample
a$Mohinh = "Logitmg1" # Thêm biến mô hình
a
##      Accuracy       Kappa   Mohinh
## 1   0.8269231  0.46575342 Logitmg1
## 2   0.7692308  0.22580645 Logitmg1
## 3   0.7500000  0.27777778 Logitmg1
## 4   0.8431373  0.47286822 Logitmg1
## 5   0.7058824  0.04494382 Logitmg1
## 6   0.7115385  0.06250000 Logitmg1
## 7   0.8269231  0.40000000 Logitmg1
## 8   0.7450980  0.04329004 Logitmg1
## 9   0.7647059  0.15000000 Logitmg1
## 10  0.6923077  0.17948718 Logitmg1
## 11  0.8461538  0.51515152 Logitmg1
## 12  0.7647059  0.20930233 Logitmg1
## 13  0.7307692  0.03448276 Logitmg1
## 14  0.6538462 -0.10377358 Logitmg1
## 15  0.8627451  0.52208835 Logitmg1
## 16  0.8039216  0.45512821 Logitmg1
## 17  0.7500000  0.18750000 Logitmg1
## 18  0.6923077  0.03030303 Logitmg1
## 19  0.8076923  0.35483871 Logitmg1
## 20  0.7647059  0.26086957 Logitmg1
## 21  0.8039216  0.34108527 Logitmg1
## 22  0.7307692  0.09677419 Logitmg1
## 23  0.8235294  0.49504950 Logitmg1
## 24  0.7450980  0.17228464 Logitmg1
## 25  0.7115385  0.00000000 Logitmg1
## 26  0.7692308  0.22580645 Logitmg1
## 27  0.7735849  0.22815534 Logitmg1
## 28  0.7843137  0.34385965 Logitmg1
## 29  0.7500000  0.23529412 Logitmg1
## 30  0.7254902  0.13768116 Logitmg1
## 31  0.7254902  0.19047619 Logitmg1
## 32  0.7058824  0.04494382 Logitmg1
## 33  0.7358491  0.15489749 Logitmg1
## 34  0.7058824  0.04494382 Logitmg1
## 35  0.8461538  0.48387097 Logitmg1
## 36  0.7115385  0.00000000 Logitmg1
## 37  0.7692308  0.31428571 Logitmg1
## 38  0.7843137  0.24899598 Logitmg1
## 39  0.8431373  0.47286822 Logitmg1
## 40  0.7500000  0.23529412 Logitmg1
## 41  0.7692308  0.31428571 Logitmg1
## 42  0.7884615  0.35294118 Logitmg1
## 43  0.7450980  0.04329004 Logitmg1
## 44  0.8846154  0.65714286 Logitmg1
## 45  0.7692308  0.26415094 Logitmg1
## 46  0.7500000  0.13333333 Logitmg1
## 47  0.7692308  0.22580645 Logitmg1
## 48  0.6666667 -0.08239700 Logitmg1
## 49  0.7647059  0.20930233 Logitmg1
## 50  0.7843137  0.29962547 Logitmg1
## 51  0.7058824  0.04494382 Logitmg1
## 52  0.8076923  0.45945946 Logitmg1
## 53  0.6923077  0.08571429 Logitmg1
## 54  0.7692308  0.15217391 Logitmg1
## 55  0.7884615  0.31250000 Logitmg1
## 56  0.8235294  0.33766234 Logitmg1
## 57  0.8269231  0.40000000 Logitmg1
## 58  0.7647059  0.20930233 Logitmg1
## 59  0.7692308  0.22580645 Logitmg1
## 60  0.7647059  0.26086957 Logitmg1
## 61  0.7843137  0.29962547 Logitmg1
## 62  0.7692308  0.27272727 Logitmg1
## 63  0.7647059  0.20930233 Logitmg1
## 64  0.8269231  0.47058824 Logitmg1
## 65  0.7500000  0.22831050 Logitmg1
## 66  0.8076923  0.35483871 Logitmg1
## 67  0.7692308  0.22580645 Logitmg1
## 68  0.7058824  0.10526316 Logitmg1
## 69  0.7647059  0.15000000 Logitmg1
## 70  0.7500000  0.18750000 Logitmg1
## 71  0.8235294  0.38554217 Logitmg1
## 72  0.8235294  0.38554217 Logitmg1
## 73  0.8269231  0.43750000 Logitmg1
## 74  0.7450980  0.17228464 Logitmg1
## 75  0.6470588 -0.04081633 Logitmg1
## 76  0.6730769 -0.13333333 Logitmg1
## 77  0.7692308  0.22580645 Logitmg1
## 78  0.7169811  0.12154696 Logitmg1
## 79  0.7843137  0.34385965 Logitmg1
## 80  0.7692308  0.31428571 Logitmg1
## 81  0.8076923  0.42857143 Logitmg1
## 82  0.7307692  0.15151515 Logitmg1
## 83  0.7307692  0.24324324 Logitmg1
## 84  0.7115385  0.00000000 Logitmg1
## 85  0.7307692  0.08080808 Logitmg1
## 86  0.8235294  0.38554217 Logitmg1
## 87  0.8039216  0.29166667 Logitmg1
## 88  0.7843137  0.24899598 Logitmg1
## 89  0.7884615  0.35294118 Logitmg1
## 90  0.6862745  0.01449275 Logitmg1
## 91  0.7307692  0.09677419 Logitmg1
## 92  0.8039216  0.23423423 Logitmg1
## 93  0.7647059  0.30612245 Logitmg1
## 94  0.7884615  0.38888889 Logitmg1
## 95  0.7450980  0.22456140 Logitmg1
## 96  0.7254902  0.13768116 Logitmg1
## 97  0.7547170  0.19036428 Logitmg1
## 98  0.8461538  0.48387097 Logitmg1
## 99  0.8039216  0.29166667 Logitmg1
## 100 0.7307692  0.20000000 Logitmg1
b = mg2train$resample
b = b[, -3]
b$Mohinh = "Logitmg2"
b
##      Accuracy       Kappa   Mohinh
## 1   0.7647059  0.20930233 Logitmg2
## 2   0.7692308  0.35135135 Logitmg2
## 3   0.7884615  0.26666667 Logitmg2
## 4   0.8431373  0.47286822 Logitmg2
## 5   0.9038462  0.72222222 Logitmg2
## 6   0.7500000  0.13333333 Logitmg2
## 7   0.8431373  0.47286822 Logitmg2
## 8   0.7547170  0.13550816 Logitmg2
## 9   0.8627451  0.55430712 Logitmg2
## 10  0.8235294  0.42696629 Logitmg2
## 11  0.8076923  0.39393939 Logitmg2
## 12  0.7647059  0.26086957 Logitmg2
## 13  0.8039216  0.29166667 Logitmg2
## 14  0.8039216  0.29166667 Logitmg2
## 15  0.8076923  0.35483871 Logitmg2
## 16  0.8431373  0.43333333 Logitmg2
## 17  0.8653846  0.61111111 Logitmg2
## 18  0.7500000  0.27467811 Logitmg2
## 19  0.8461538  0.54285714 Logitmg2
## 20  0.7307692  0.15151515 Logitmg2
## 21  0.7692308  0.17241379 Logitmg2
## 22  0.8076923  0.42477876 Logitmg2
## 23  0.7500000  0.13333333 Logitmg2
## 24  0.7884615  0.38888889 Logitmg2
## 25  0.8269231  0.50000000 Logitmg2
## 26  0.8039216  0.38405797 Logitmg2
## 27  0.8431373  0.50724638 Logitmg2
## 28  0.8431373  0.47286822 Logitmg2
## 29  0.7692308  0.11111111 Logitmg2
## 30  0.8235294  0.38554217 Logitmg2
## 31  0.8627451  0.55430712 Logitmg2
## 32  0.7647059  0.15000000 Logitmg2
## 33  0.7843137  0.38283828 Logitmg2
## 34  0.7254902  0.07751938 Logitmg2
## 35  0.8461538  0.51515152 Logitmg2
## 36  0.7884615  0.35294118 Logitmg2
## 37  0.7115385 -0.07142857 Logitmg2
## 38  0.9019608  0.70175439 Logitmg2
## 39  0.7884615  0.21428571 Logitmg2
## 40  0.8490566  0.56997972 Logitmg2
## 41  0.8431373  0.47286822 Logitmg2
## 42  0.8431373  0.50724638 Logitmg2
## 43  0.8431373  0.47286822 Logitmg2
## 44  0.8431373  0.47286822 Logitmg2
## 45  0.7450980  0.22456140 Logitmg2
## 46  0.8076923  0.35483871 Logitmg2
## 47  0.7735849  0.27562642 Logitmg2
## 48  0.7692308  0.27272727 Logitmg2
## 49  0.7692308  0.31428571 Logitmg2
## 50  0.8269231  0.40000000 Logitmg2
## 51  0.7692308  0.31428571 Logitmg2
## 52  0.7647059  0.30612245 Logitmg2
## 53  0.7500000  0.23529412 Logitmg2
## 54  0.8461538  0.48387097 Logitmg2
## 55  0.7843137  0.34385965 Logitmg2
## 56  0.8076923  0.39393939 Logitmg2
## 57  0.7450980  0.17228464 Logitmg2
## 58  0.8269231  0.47058824 Logitmg2
## 59  0.8627451  0.52208835 Logitmg2
## 60  0.8076923  0.29347826 Logitmg2
## 61  0.8076923  0.42477876 Logitmg2
## 62  0.8076923  0.39393939 Logitmg2
## 63  0.7692308  0.17241379 Logitmg2
## 64  0.8431373  0.50724638 Logitmg2
## 65  0.7647059  0.15000000 Logitmg2
## 66  0.9019608  0.70175439 Logitmg2
## 67  0.8076923  0.39393939 Logitmg2
## 68  0.8039216  0.23423423 Logitmg2
## 69  0.7884615  0.38888889 Logitmg2
## 70  0.7692308  0.17241379 Logitmg2
## 71  0.7884615  0.26666667 Logitmg2
## 72  0.8461538  0.54285714 Logitmg2
## 73  0.7843137  0.29962547 Logitmg2
## 74  0.7692308  0.26415094 Logitmg2
## 75  0.9038462  0.70588235 Logitmg2
## 76  0.7647059  0.15000000 Logitmg2
## 77  0.8039216  0.34108527 Logitmg2
## 78  0.8076923  0.39393939 Logitmg2
## 79  0.7500000  0.23529412 Logitmg2
## 80  0.8039216  0.38405797 Logitmg2
## 81  0.7692308  0.27272727 Logitmg2
## 82  0.7647059  0.30612245 Logitmg2
## 83  0.7058824 -0.02409639 Logitmg2
## 84  0.7843137  0.29962547 Logitmg2
## 85  0.7884615  0.31250000 Logitmg2
## 86  0.9230769  0.77142857 Logitmg2
## 87  0.7884615  0.19209040 Logitmg2
## 88  0.8653846  0.58823529 Logitmg2
## 89  0.7884615  0.31250000 Logitmg2
## 90  0.8039216  0.34108527 Logitmg2
## 91  0.7450980  0.17228464 Logitmg2
## 92  0.8235294  0.38554217 Logitmg2
## 93  0.8269231  0.47058824 Logitmg2
## 94  0.7450980  0.11244980 Logitmg2
## 95  0.8113208  0.35679612 Logitmg2
## 96  0.7500000  0.13333333 Logitmg2
## 97  0.8076923  0.39393939 Logitmg2
## 98  0.7307692  0.24324324 Logitmg2
## 99  0.8627451  0.58245614 Logitmg2
## 100 0.7843137  0.29962547 Logitmg2
resamplemod = rbind(a, b) # Hợp nhất 2 data frame a và b
resamplemod
##      Accuracy       Kappa   Mohinh
## 1   0.8269231  0.46575342 Logitmg1
## 2   0.7692308  0.22580645 Logitmg1
## 3   0.7500000  0.27777778 Logitmg1
## 4   0.8431373  0.47286822 Logitmg1
## 5   0.7058824  0.04494382 Logitmg1
## 6   0.7115385  0.06250000 Logitmg1
## 7   0.8269231  0.40000000 Logitmg1
## 8   0.7450980  0.04329004 Logitmg1
## 9   0.7647059  0.15000000 Logitmg1
## 10  0.6923077  0.17948718 Logitmg1
## 11  0.8461538  0.51515152 Logitmg1
## 12  0.7647059  0.20930233 Logitmg1
## 13  0.7307692  0.03448276 Logitmg1
## 14  0.6538462 -0.10377358 Logitmg1
## 15  0.8627451  0.52208835 Logitmg1
## 16  0.8039216  0.45512821 Logitmg1
## 17  0.7500000  0.18750000 Logitmg1
## 18  0.6923077  0.03030303 Logitmg1
## 19  0.8076923  0.35483871 Logitmg1
## 20  0.7647059  0.26086957 Logitmg1
## 21  0.8039216  0.34108527 Logitmg1
## 22  0.7307692  0.09677419 Logitmg1
## 23  0.8235294  0.49504950 Logitmg1
## 24  0.7450980  0.17228464 Logitmg1
## 25  0.7115385  0.00000000 Logitmg1
## 26  0.7692308  0.22580645 Logitmg1
## 27  0.7735849  0.22815534 Logitmg1
## 28  0.7843137  0.34385965 Logitmg1
## 29  0.7500000  0.23529412 Logitmg1
## 30  0.7254902  0.13768116 Logitmg1
## 31  0.7254902  0.19047619 Logitmg1
## 32  0.7058824  0.04494382 Logitmg1
## 33  0.7358491  0.15489749 Logitmg1
## 34  0.7058824  0.04494382 Logitmg1
## 35  0.8461538  0.48387097 Logitmg1
## 36  0.7115385  0.00000000 Logitmg1
## 37  0.7692308  0.31428571 Logitmg1
## 38  0.7843137  0.24899598 Logitmg1
## 39  0.8431373  0.47286822 Logitmg1
## 40  0.7500000  0.23529412 Logitmg1
## 41  0.7692308  0.31428571 Logitmg1
## 42  0.7884615  0.35294118 Logitmg1
## 43  0.7450980  0.04329004 Logitmg1
## 44  0.8846154  0.65714286 Logitmg1
## 45  0.7692308  0.26415094 Logitmg1
## 46  0.7500000  0.13333333 Logitmg1
## 47  0.7692308  0.22580645 Logitmg1
## 48  0.6666667 -0.08239700 Logitmg1
## 49  0.7647059  0.20930233 Logitmg1
## 50  0.7843137  0.29962547 Logitmg1
## 51  0.7058824  0.04494382 Logitmg1
## 52  0.8076923  0.45945946 Logitmg1
## 53  0.6923077  0.08571429 Logitmg1
## 54  0.7692308  0.15217391 Logitmg1
## 55  0.7884615  0.31250000 Logitmg1
## 56  0.8235294  0.33766234 Logitmg1
## 57  0.8269231  0.40000000 Logitmg1
## 58  0.7647059  0.20930233 Logitmg1
## 59  0.7692308  0.22580645 Logitmg1
## 60  0.7647059  0.26086957 Logitmg1
## 61  0.7843137  0.29962547 Logitmg1
## 62  0.7692308  0.27272727 Logitmg1
## 63  0.7647059  0.20930233 Logitmg1
## 64  0.8269231  0.47058824 Logitmg1
## 65  0.7500000  0.22831050 Logitmg1
## 66  0.8076923  0.35483871 Logitmg1
## 67  0.7692308  0.22580645 Logitmg1
## 68  0.7058824  0.10526316 Logitmg1
## 69  0.7647059  0.15000000 Logitmg1
## 70  0.7500000  0.18750000 Logitmg1
## 71  0.8235294  0.38554217 Logitmg1
## 72  0.8235294  0.38554217 Logitmg1
## 73  0.8269231  0.43750000 Logitmg1
## 74  0.7450980  0.17228464 Logitmg1
## 75  0.6470588 -0.04081633 Logitmg1
## 76  0.6730769 -0.13333333 Logitmg1
## 77  0.7692308  0.22580645 Logitmg1
## 78  0.7169811  0.12154696 Logitmg1
## 79  0.7843137  0.34385965 Logitmg1
## 80  0.7692308  0.31428571 Logitmg1
## 81  0.8076923  0.42857143 Logitmg1
## 82  0.7307692  0.15151515 Logitmg1
## 83  0.7307692  0.24324324 Logitmg1
## 84  0.7115385  0.00000000 Logitmg1
## 85  0.7307692  0.08080808 Logitmg1
## 86  0.8235294  0.38554217 Logitmg1
## 87  0.8039216  0.29166667 Logitmg1
## 88  0.7843137  0.24899598 Logitmg1
## 89  0.7884615  0.35294118 Logitmg1
## 90  0.6862745  0.01449275 Logitmg1
## 91  0.7307692  0.09677419 Logitmg1
## 92  0.8039216  0.23423423 Logitmg1
## 93  0.7647059  0.30612245 Logitmg1
## 94  0.7884615  0.38888889 Logitmg1
## 95  0.7450980  0.22456140 Logitmg1
## 96  0.7254902  0.13768116 Logitmg1
## 97  0.7547170  0.19036428 Logitmg1
## 98  0.8461538  0.48387097 Logitmg1
## 99  0.8039216  0.29166667 Logitmg1
## 100 0.7307692  0.20000000 Logitmg1
## 101 0.7647059  0.20930233 Logitmg2
## 102 0.7692308  0.35135135 Logitmg2
## 103 0.7884615  0.26666667 Logitmg2
## 104 0.8431373  0.47286822 Logitmg2
## 105 0.9038462  0.72222222 Logitmg2
## 106 0.7500000  0.13333333 Logitmg2
## 107 0.8431373  0.47286822 Logitmg2
## 108 0.7547170  0.13550816 Logitmg2
## 109 0.8627451  0.55430712 Logitmg2
## 110 0.8235294  0.42696629 Logitmg2
## 111 0.8076923  0.39393939 Logitmg2
## 112 0.7647059  0.26086957 Logitmg2
## 113 0.8039216  0.29166667 Logitmg2
## 114 0.8039216  0.29166667 Logitmg2
## 115 0.8076923  0.35483871 Logitmg2
## 116 0.8431373  0.43333333 Logitmg2
## 117 0.8653846  0.61111111 Logitmg2
## 118 0.7500000  0.27467811 Logitmg2
## 119 0.8461538  0.54285714 Logitmg2
## 120 0.7307692  0.15151515 Logitmg2
## 121 0.7692308  0.17241379 Logitmg2
## 122 0.8076923  0.42477876 Logitmg2
## 123 0.7500000  0.13333333 Logitmg2
## 124 0.7884615  0.38888889 Logitmg2
## 125 0.8269231  0.50000000 Logitmg2
## 126 0.8039216  0.38405797 Logitmg2
## 127 0.8431373  0.50724638 Logitmg2
## 128 0.8431373  0.47286822 Logitmg2
## 129 0.7692308  0.11111111 Logitmg2
## 130 0.8235294  0.38554217 Logitmg2
## 131 0.8627451  0.55430712 Logitmg2
## 132 0.7647059  0.15000000 Logitmg2
## 133 0.7843137  0.38283828 Logitmg2
## 134 0.7254902  0.07751938 Logitmg2
## 135 0.8461538  0.51515152 Logitmg2
## 136 0.7884615  0.35294118 Logitmg2
## 137 0.7115385 -0.07142857 Logitmg2
## 138 0.9019608  0.70175439 Logitmg2
## 139 0.7884615  0.21428571 Logitmg2
## 140 0.8490566  0.56997972 Logitmg2
## 141 0.8431373  0.47286822 Logitmg2
## 142 0.8431373  0.50724638 Logitmg2
## 143 0.8431373  0.47286822 Logitmg2
## 144 0.8431373  0.47286822 Logitmg2
## 145 0.7450980  0.22456140 Logitmg2
## 146 0.8076923  0.35483871 Logitmg2
## 147 0.7735849  0.27562642 Logitmg2
## 148 0.7692308  0.27272727 Logitmg2
## 149 0.7692308  0.31428571 Logitmg2
## 150 0.8269231  0.40000000 Logitmg2
## 151 0.7692308  0.31428571 Logitmg2
## 152 0.7647059  0.30612245 Logitmg2
## 153 0.7500000  0.23529412 Logitmg2
## 154 0.8461538  0.48387097 Logitmg2
## 155 0.7843137  0.34385965 Logitmg2
## 156 0.8076923  0.39393939 Logitmg2
## 157 0.7450980  0.17228464 Logitmg2
## 158 0.8269231  0.47058824 Logitmg2
## 159 0.8627451  0.52208835 Logitmg2
## 160 0.8076923  0.29347826 Logitmg2
## 161 0.8076923  0.42477876 Logitmg2
## 162 0.8076923  0.39393939 Logitmg2
## 163 0.7692308  0.17241379 Logitmg2
## 164 0.8431373  0.50724638 Logitmg2
## 165 0.7647059  0.15000000 Logitmg2
## 166 0.9019608  0.70175439 Logitmg2
## 167 0.8076923  0.39393939 Logitmg2
## 168 0.8039216  0.23423423 Logitmg2
## 169 0.7884615  0.38888889 Logitmg2
## 170 0.7692308  0.17241379 Logitmg2
## 171 0.7884615  0.26666667 Logitmg2
## 172 0.8461538  0.54285714 Logitmg2
## 173 0.7843137  0.29962547 Logitmg2
## 174 0.7692308  0.26415094 Logitmg2
## 175 0.9038462  0.70588235 Logitmg2
## 176 0.7647059  0.15000000 Logitmg2
## 177 0.8039216  0.34108527 Logitmg2
## 178 0.8076923  0.39393939 Logitmg2
## 179 0.7500000  0.23529412 Logitmg2
## 180 0.8039216  0.38405797 Logitmg2
## 181 0.7692308  0.27272727 Logitmg2
## 182 0.7647059  0.30612245 Logitmg2
## 183 0.7058824 -0.02409639 Logitmg2
## 184 0.7843137  0.29962547 Logitmg2
## 185 0.7884615  0.31250000 Logitmg2
## 186 0.9230769  0.77142857 Logitmg2
## 187 0.7884615  0.19209040 Logitmg2
## 188 0.8653846  0.58823529 Logitmg2
## 189 0.7884615  0.31250000 Logitmg2
## 190 0.8039216  0.34108527 Logitmg2
## 191 0.7450980  0.17228464 Logitmg2
## 192 0.8235294  0.38554217 Logitmg2
## 193 0.8269231  0.47058824 Logitmg2
## 194 0.7450980  0.11244980 Logitmg2
## 195 0.8113208  0.35679612 Logitmg2
## 196 0.7500000  0.13333333 Logitmg2
## 197 0.8076923  0.39393939 Logitmg2
## 198 0.7307692  0.24324324 Logitmg2
## 199 0.8627451  0.58245614 Logitmg2
## 200 0.7843137  0.29962547 Logitmg2
   ## Phân tích hình ảnh
# install.packages("ggplot2")
library(ggplot2)
library(ggthemes)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
h1 <- ggplot(resamplemod) + geom_boxplot(aes(x = Mohinh, y = Accuracy, fill = Mohinh)) + coord_flip() + geom_hline(yintercept = 0.7578, color = "blue") + theme_wsj() # Giá trị 0.7578 = 75.78% là tỷ lệ nhóm chiếm ưu thế (không chuyển đổi xe buýt)
h1 # Nhận xét: Cả 2 mô hình đều có khả năng phân loại tốt - độ chính xác accuracy của cả 2 mô hình đều lớn hơn 75.78% (đường màu xanh)

h2 <- ggplot(resamplemod, aes(Accuracy, fill = Mohinh)) + geom_density(alpha = 0.3) + theme_wsj() + facet_wrap(~ Mohinh)
h2

grid.arrange(h1, h2, ncol = 2, nrow = 1)

   ## Kiểm định t-test để đánh giá sự khác biệt về Accuracy và Kappa của 2 mô hình: p<0.05 có ý nghĩa thống kê về sự khác biệt Accuracy và Kappa của 2 mô hình
t.test(resamplemod$Accuracy ~ resamplemod$Mohinh)
## 
##  Welch Two Sample t-test
## 
## data:  resamplemod$Accuracy by resamplemod$Mohinh
## t = -5.6188, df = 197.15, p-value = 6.492e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04976722 -0.02390861
## sample estimates:
## mean in group Logitmg1 mean in group Logitmg2 
##              0.7647548              0.8015927
summary(a)
##     Accuracy          Kappa            Mohinh         
##  Min.   :0.6471   Min.   :-0.1333   Length:100        
##  1st Qu.:0.7308   1st Qu.: 0.1377   Class :character  
##  Median :0.7647   Median : 0.2270   Mode  :character  
##  Mean   :0.7648   Mean   : 0.2363                     
##  3rd Qu.:0.8039   3rd Qu.: 0.3439                     
##  Max.   :0.8846   Max.   : 0.6571
summary(b)
##     Accuracy          Kappa             Mohinh         
##  Min.   :0.7059   Min.   :-0.07143   Length:100        
##  1st Qu.:0.7692   1st Qu.: 0.24126   Class :character  
##  Median :0.8039   Median : 0.35215   Mode  :character  
##  Mean   :0.8016   Mean   : 0.35229                     
##  3rd Qu.:0.8431   3rd Qu.: 0.47287                     
##  Max.   :0.9231   Max.   : 0.77143
t.test(resamplemod$Kappa ~ resamplemod$Mohinh)
## 
##  Welch Two Sample t-test
## 
## data:  resamplemod$Kappa by resamplemod$Mohinh
## t = -5.1443, df = 197.59, p-value = 6.449e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.16046674 -0.07153101
## sample estimates:
## mean in group Logitmg1 mean in group Logitmg2 
##              0.2362892              0.3522881
# 4.9. Calculate AUC (Area under Curve) - Diện tích dưới đường cong ROC (Receiver Operating Characteristics), đánh giá mức độ lồi về phía trên của đường ROC (Tiêu chí thường được sử dụng khi so sánh các mô hình phân loại với nhau): sử dụng hàm roc trong gói "pROC": AUC càng gần 1 càng tốt, >0.7 xem như chấp nhận được
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
## 
##     ci
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
   ## Mô hình mg1 
mgtrain1 <- train(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, method = "glm", family = "binomial", data = MotorS)
summary(mgtrain1)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.80649  -0.69419  -0.43485  -0.00113   2.63043  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -16.38213  552.17363  -0.030  0.97633    
## NM1                    14.26799  552.17330   0.026  0.97939    
## `NM>=2`                14.31772  552.17321   0.026  0.97931    
## SPYes                  -0.48827    0.51806  -0.942  0.34594    
## `SPDon't know`         -0.54986    0.68980  -0.797  0.42538    
## `WIAt Bus Stops`        0.72698    0.36374   1.999  0.04565 *  
## `WIInternet/Facebook`   1.73429    0.40686   4.263 2.02e-05 ***
## `WIFrom Friends`        1.35463    0.45332   2.988  0.00281 ** 
## WIOthers                0.95725    0.50130   1.910  0.05619 .  
## BCYes                  -0.62908    0.24950  -2.521  0.01169 *  
## BSTNormal              -0.68942    0.35610  -1.936  0.05286 .  
## BSTAgree               -0.64951    0.34536  -1.881  0.06002 .  
## BSCNormal              -0.29375    0.48745  -0.603  0.54676    
## BSCAgree               -0.31145    0.43655  -0.713  0.47558    
## BSANormal               0.20265    0.53556   0.378  0.70515    
## BSAAgree                0.96808    0.52099   1.858  0.06315 .  
## BHENormal              -0.21547    0.30297  -0.711  0.47698    
## BHEAgree                0.15164    0.29906   0.507  0.61212    
## BUBYes                  0.49882    0.27314   1.826  0.06781 .  
## SBMNormal               0.40746    0.33913   1.201  0.22956    
## SBMAgree                1.48605    0.36085   4.118 3.82e-05 ***
## SBSNormal              -0.20278    0.30871  -0.657  0.51128    
## SBSAgree                1.01296    0.43072   2.352  0.01868 *  
## TI                      0.00233    0.01126   0.207  0.83612    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 458.17  on 492  degrees of freedom
## AIC: 506.17
## 
## Number of Fisher Scoring iterations: 14
pred1mg <- predict(mgtrain1, newdata = MotorS, type = "prob")
rmg <- roc(MotorS$MSI, pred1mg$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg$auc
## Area under the curve: 0.7969
plot(rmg, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)

plot(smooth(rmg), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)

   ## Có thể dùng các lệnh đơn giản
auc(rmg)
## Area under the curve: 0.7969
plot.roc(rmg)

ci(rmg)
## 95% CI: 0.754-0.8397 (DeLong)
   ## Mô hình mg2 
mg2train1 <- train(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, data = MotorS, method = "glm", family = "binomial")
summary(mg2train1)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1109  -0.6696  -0.4643  -0.1752   2.6984  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.75079    0.40307  -1.863 0.062508 .  
## `AG25-60`              -0.07254    0.25689  -0.282 0.777664    
## `AG>60`                 2.30511    0.83491   2.761 0.005764 ** 
## TSYes                  -0.46179    0.24422  -1.891 0.058643 .  
## `WIAt Bus Stops`        0.75331    0.31163   2.417 0.015637 *  
## `WIInternet/Facebook`   1.67211    0.35823   4.668 3.04e-06 ***
## `WIFrom Friends`        1.54306    0.41987   3.675 0.000238 ***
## WIOthers                1.11330    0.49058   2.269 0.023246 *  
## LSYes                  -0.60908    0.28641  -2.127 0.033451 *  
## BCYes                  -0.70517    0.24843  -2.838 0.004533 ** 
## CAYes                  -0.83271    0.25749  -3.234 0.001221 ** 
## SBMNormal               0.04398    0.27512   0.160 0.872991    
## SBMAgree                2.02537    0.32084   6.313 2.74e-10 ***
## `TPPicking Children`   -0.58872    0.41530  -1.418 0.156319    
## `TPEntertainment/Food` -0.79116    0.35106  -2.254 0.024221 *  
## TPOthers               -0.71397    0.99890  -0.715 0.474759    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 457.08  on 500  degrees of freedom
## AIC: 489.08
## 
## Number of Fisher Scoring iterations: 5
pred1mg2 <- predict(mg2train1, newdata = MotorS, type = "prob")
rmg2 <- roc(MotorS$MSI, pred1mg2$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg2$auc
## Area under the curve: 0.7923
plot(rmg2, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)

plot(smooth(rmg2), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)

   ## Tính hệ số Gini = 2AUC-1
Ginimg <- 2*(rmg$auc)-1
Ginimg
## [1] 0.5937187
Ginimg2 <- 2*(rmg2$auc)-1
Ginimg2
## [1] 0.5845524

5. Importance of variables in models

library(caret)
varImp(mg2)
##                        Overall
## AG25-60              0.2823645
## AG>60                2.7609178
## TSYes                1.8908608
## WIAt Bus Stops       2.4172876
## WIInternet/Facebook  4.6677670
## WIFrom Friends       3.6750679
## WIOthers             2.2693698
## LSYes                2.1266243
## BCYes                2.8384880
## CAYes                3.2339820
## SBMNormal            0.1598598
## SBMAgree             6.3127968
## TPPicking Children   1.4175620
## TPEntertainment/Food 2.2536040
## TPOthers             0.7147577
# plot(varImp(mg2))

6. Probility of bus use - Prohibit model

# Prohibit Models
library(aod)
## 
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
## 
##     rats
   ## Model mg1
prohibitmg1 <- glm(MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + BUB + SBM + SBS + TI, family = binomial(link = "probit"), data = MotorS)
summary(prohibitmg1)
## 
## Call:
## glm(formula = MSI ~ NM + SP + WI + BC + BST + BSC + BSA + BHE + 
##     BUB + SBM + SBS + TI, family = binomial(link = "probit"), 
##     data = MotorS)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.78116  -0.69947  -0.42198  -0.00098   2.70426  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -6.234267 140.267761  -0.044  0.96455    
## NM1                   4.958399 140.267361   0.035  0.97180    
## NM>=2                 4.946783 140.267221   0.035  0.97187    
## SPYes                -0.221128   0.289658  -0.763  0.44522    
## SPDon't know         -0.281875   0.368785  -0.764  0.44467    
## WIAt Bus Stops        0.370945   0.203212   1.825  0.06794 .  
## WIInternet/Facebook   0.975965   0.231298   4.220 2.45e-05 ***
## WIFrom Friends        0.779809   0.261582   2.981  0.00287 ** 
## WIOthers              0.526548   0.286183   1.840  0.06578 .  
## BCYes                -0.349442   0.142179  -2.458  0.01398 *  
## BSTNormal            -0.409165   0.201514  -2.030  0.04231 *  
## BSTAgree             -0.381980   0.197425  -1.935  0.05301 .  
## BSCNormal            -0.159231   0.278115  -0.573  0.56696    
## BSCAgree             -0.167714   0.251752  -0.666  0.50529    
## BSANormal             0.140126   0.305581   0.459  0.64655    
## BSAAgree              0.562618   0.299267   1.880  0.06011 .  
## BHENormal            -0.118151   0.172999  -0.683  0.49463    
## BHEAgree              0.088132   0.173235   0.509  0.61093    
## BUBYes                0.295002   0.154523   1.909  0.05625 .  
## SBMNormal             0.251063   0.195509   1.284  0.19909    
## SBMAgree              0.891703   0.213962   4.168 3.08e-05 ***
## SBSNormal            -0.115522   0.178942  -0.646  0.51855    
## SBSAgree              0.613392   0.257007   2.387  0.01700 *  
## TI                    0.001392   0.006498   0.214  0.83034    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 457.07  on 492  degrees of freedom
## AIC: 505.07
## 
## Number of Fisher Scoring iterations: 14
   ## Model mg2
prohibitmg2 <- glm(MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial(link = "probit"), data = MotorS)
summary(prohibitmg2)
## 
## Call:
## glm(formula = MSI ~ AG + TS + WI + LS + BC + CA + SBM + TP, family = binomial(link = "probit"), 
##     data = MotorS)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1034  -0.6752  -0.4654  -0.1388   2.7459  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.44907    0.23012  -1.951 0.051000 .  
## AG25-60              -0.04813    0.14639  -0.329 0.742307    
## AG>60                 1.27195    0.49303   2.580 0.009884 ** 
## TSYes                -0.25612    0.13812  -1.854 0.063686 .  
## WIAt Bus Stops        0.40190    0.17255   2.329 0.019850 *  
## WIInternet/Facebook   0.92887    0.20409   4.551 5.33e-06 ***
## WIFrom Friends        0.87198    0.24272   3.593 0.000328 ***
## WIOthers              0.60169    0.27713   2.171 0.029922 *  
## LSYes                -0.38435    0.16348  -2.351 0.018722 *  
## BCYes                -0.38520    0.13998  -2.752 0.005928 ** 
## CAYes                -0.43460    0.14417  -3.014 0.002574 ** 
## SBMNormal             0.04853    0.15448   0.314 0.753382    
## SBMAgree              1.18416    0.18382   6.442 1.18e-10 ***
## TPPicking Children   -0.31201    0.22912  -1.362 0.173260    
## TPEntertainment/Food -0.42127    0.19550  -2.155 0.031172 *  
## TPOthers             -0.43526    0.56172  -0.775 0.438409    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 571.37  on 515  degrees of freedom
## Residual deviance: 458.21  on 500  degrees of freedom
## AIC: 490.21
## 
## Number of Fisher Scoring iterations: 5
# Đánh giá tác động tổng thể Purpose (Total effect of Temporary Stops) trong mô hinhg mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với Purpose là từ 4)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 4) # Giá trị 3.4 ứng với p-value > 0.05 --> KL: tác động tổng thể về TS (Số điểm dừng tạm thời) của người sử dụng lên khả năng chuyển đổi xe buýt là không có ý nghĩa thống kê
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 3.4, df = 1, P(> X2) = 0.064
# Đánh giá tác động tổng thể WI (Total effect of WI) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với WI là từ 5:8)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 5:8)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 26.3, df = 4, P(> X2) = 2.7e-05
# Đánh giá tác động tổng thể SBM (Total effect of SBM) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với SBM là từ 12:13)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 12:13)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 45.2, df = 2, P(> X2) = 1.5e-10
# Đánh giá tác động tổng thể TP (Total effect of TP) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với SP là từ 14:16)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 14:16)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 6.1, df = 3, P(> X2) = 0.11
# Đánh giá tác động tổng thể AG (Total effect of AG) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chuyển đổi xe buýt (thứ tự hệ số ứng với OC là từ 2:3)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 2:3)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 7.3, df = 2, P(> X2) = 0.026

7. Perception of bus in two groups people: Shift to bus and non ship to bus 7.1. Tạo dữ liệu biểu đồ likert tương ứng với 2 nhóm : Chuyển đổi BP_MS và không chuyển đổi BP_nMS

# Create data for Likert graph by BS (remove ID and TM =7-Bus) 
head(MS)
##   ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 1  1  0  1  6  3  3  1  2 10  1  1  0  4  1  0   1 15   1   3   3   3   4   4
## 2  2  0  1  3  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5
## 3  3  0  1  3  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4
## 4  4  0  1  3  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4
## 5  5  0  1  3  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4
## 6  6  0  1  4  1  4  1 20 30  1  1  0  5  3  0   2 50   0   3   2   4   4   4
##   BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 1   4   4   3   4   4   4   4  1  1  1   0  0  5  4  2  2  0  0  0  0  0  1  2
## 2   2   2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3
## 3   3   3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3
## 4   4   4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3
## 5   2   2   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2
## 6   3   3   3   2   4   4   4  2  0  0   0  1  4  3  4  2  1  1  1  1  1  1  2
##   NR
## 1  1
## 2  0
## 3  0
## 4  0
## 5  0
## 6  1
str(MS)
## 'data.frame':    810 obs. of  47 variables:
##  $ ID : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CA : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TM : int  6 3 3 3 3 4 4 3 3 3 ...
##  $ TP : int  3 1 1 1 1 1 1 1 1 1 ...
##  $ FR : int  3 4 4 4 4 4 4 4 4 4 ...
##  $ DT : int  1 1 3 1 3 1 1 1 2 1 ...
##  $ DI : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ TI : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ SC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LS : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TS : int  0 1 1 1 0 0 0 0 1 1 ...
##  $ MR : int  4 2 4 2 2 5 5 2 2 2 ...
##  $ WE : int  1 3 1 1 3 3 3 3 1 3 ...
##  $ WK : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ NBR: int  1 1 2 4 2 2 4 2 1 4 ...
##  $ CO : num  15 5 3 10 5 50 40 5 7 7 ...
##  $ BUB: int  1 0 1 1 1 0 0 1 1 1 ...
##  $ SBM: int  3 2 2 2 2 3 2 3 3 3 ...
##  $ SBS: int  3 2 2 2 2 2 2 2 3 2 ...
##  $ BST: int  3 2 2 3 3 4 4 5 4 2 ...
##  $ BSC: int  4 4 4 4 4 4 4 5 4 5 ...
##  $ BSA: int  4 5 4 4 4 4 4 5 4 5 ...
##  $ BRE: int  4 2 3 4 2 3 2 3 3 3 ...
##  $ BCO: int  4 2 3 4 2 3 2 3 3 2 ...
##  $ BIN: int  3 4 4 4 3 3 3 3 3 4 ...
##  $ BHE: int  4 2 2 3 2 2 2 3 3 2 ...
##  $ BEN: int  4 5 4 3 4 4 5 5 4 5 ...
##  $ BRC: int  4 5 4 3 4 4 5 5 4 5 ...
##  $ BWE: int  4 3 4 5 4 4 5 5 4 4 ...
##  $ SP : int  1 2 1 1 2 2 2 1 1 1 ...
##  $ BI : int  1 0 1 1 0 0 0 1 1 1 ...
##  $ WI : int  1 0 2 1 0 0 0 2 3 2 ...
##  $ MSI: int  0 0 0 0 0 0 0 1 0 0 ...
##  $ GD : int  0 0 1 1 0 1 1 0 0 1 ...
##  $ AG : int  5 3 3 3 4 4 5 4 3 4 ...
##  $ OC : int  4 6 2 2 6 3 3 7 7 3 ...
##  $ IN : int  2 3 4 3 3 4 4 2 3 3 ...
##  $ NC : int  2 1 0 0 1 2 2 0 1 0 ...
##  $ MC : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ CC : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ BO : int  0 1 0 1 0 1 0 0 1 0 ...
##  $ MO : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ RO : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ NB : int  1 1 0 1 0 1 0 0 1 0 ...
##  $ NM : int  2 3 3 3 2 2 2 2 3 3 ...
##  $ NR : int  1 0 0 0 0 1 1 0 0 0 ...
dim(MS)
## [1] 810  47
BS_likert = subset(MS,TM == 3)
head(BS_likert)
##   ID CA BC TM TP FR DT DI TI SC LS TS MR WE WK NBR CO BUB SBM SBS BST BSC BSA
## 2  2  0  1  3  1  4  1  8 15  1  1  1  2  3  0   1  5   0   2   2   2   4   5
## 3  3  0  1  3  1  4  3  5 15  1  1  1  4  1  0   2  3   1   2   2   2   4   4
## 4  4  0  1  3  1  4  1  5 10  1  1  1  2  1  0   4 10   1   2   2   3   4   4
## 5  5  0  1  3  1  4  3  8 15  1  1  0  2  3  0   2  5   1   2   2   3   4   4
## 8  8  0  1  3  1  4  1 10 25  1  1  0  2  3  1   2  5   1   3   2   5   5   5
## 9  9  0  1  3  1  4  2 12 30  1  1  1  2  1  0   1  7   1   3   3   4   4   4
##   BRE BCO BIN BHE BEN BRC BWE SP BI WI MSI GD AG OC IN NC MC CC BO MO RO NB NM
## 2   2   2   4   2   5   5   3  2  0  0   0  0  3  6  3  1  1  0  1  1  0  1  3
## 3   3   3   4   2   4   4   4  1  1  2   0  1  3  2  4  0  1  0  0  1  0  0  3
## 4   4   4   4   3   3   3   5  1  1  1   0  1  3  2  3  0  1  0  1  1  0  1  3
## 5   2   2   3   2   4   4   4  2  0  0   0  0  4  6  3  1  1  0  0  1  0  0  2
## 8   3   3   3   3   5   5   5  1  1  2   1  0  4  7  2  0  1  0  0  1  0  0  2
## 9   3   3   3   3   4   4   4  1  1  3   0  0  3  7  3  1  1  0  1  1  0  1  3
##   NR
## 2  0
## 3  0
## 4  0
## 5  0
## 8  0
## 9  0
str(BS_likert)
## 'data.frame':    516 obs. of  47 variables:
##  $ ID : int  2 3 4 5 8 9 10 13 15 16 ...
##  $ CA : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ BC : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ TM : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ TP : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ FR : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ DT : int  1 3 1 3 1 2 1 1 1 1 ...
##  $ DI : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ TI : num  15 15 10 15 25 30 25 5 5 10 ...
##  $ SC : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LS : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ TS : int  1 1 1 0 0 1 1 0 1 0 ...
##  $ MR : int  2 4 2 2 2 2 2 2 4 3 ...
##  $ WE : int  3 1 1 3 3 1 3 1 3 1 ...
##  $ WK : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ NBR: int  1 2 4 2 2 1 4 4 4 2 ...
##  $ CO : num  5 3 10 5 5 7 7 3 3 10 ...
##  $ BUB: int  0 1 1 1 1 1 1 1 1 1 ...
##  $ SBM: int  2 2 2 2 3 3 3 1 4 4 ...
##  $ SBS: int  2 2 2 2 2 3 2 1 3 2 ...
##  $ BST: int  2 2 3 3 5 4 2 3 2 3 ...
##  $ BSC: int  4 4 4 4 5 4 5 3 4 3 ...
##  $ BSA: int  5 4 4 4 5 4 5 4 4 4 ...
##  $ BRE: int  2 3 4 2 3 3 3 1 4 4 ...
##  $ BCO: int  2 3 4 2 3 3 2 4 4 2 ...
##  $ BIN: int  4 4 4 3 3 3 4 3 4 4 ...
##  $ BHE: int  2 2 3 2 3 3 2 4 3 2 ...
##  $ BEN: int  5 4 3 4 5 4 5 4 4 4 ...
##  $ BRC: int  5 4 3 4 5 4 5 4 5 4 ...
##  $ BWE: int  3 4 5 4 5 4 4 4 5 4 ...
##  $ SP : int  2 1 1 2 1 1 1 1 1 1 ...
##  $ BI : int  0 1 1 0 1 1 1 1 1 1 ...
##  $ WI : int  0 2 1 0 2 3 2 1 1 1 ...
##  $ MSI: int  0 0 0 0 1 0 0 0 1 1 ...
##  $ GD : int  0 1 1 0 0 0 1 1 1 1 ...
##  $ AG : int  3 3 3 4 4 3 4 2 4 3 ...
##  $ OC : int  6 2 2 6 7 7 3 1 7 7 ...
##  $ IN : int  3 4 3 3 2 3 3 2 2 1 ...
##  $ NC : int  1 0 0 1 0 1 0 0 1 0 ...
##  $ MC : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ CC : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BO : int  1 0 1 0 0 1 0 1 0 0 ...
##  $ MO : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ RO : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NB : int  1 0 1 0 0 1 0 1 2 0 ...
##  $ NM : int  3 3 3 2 2 3 3 3 2 1 ...
##  $ NR : int  0 0 0 0 0 0 0 0 0 0 ...
dim(BS_likert)
## [1] 516  47
## Create dat_BST
dat_BST <- BS_likert[ , c(18,19,20,21,34)]
head(dat_BST)
##   BUB SBM SBS BST MSI
## 2   0   2   2   2   0
## 3   1   2   2   2   0
## 4   1   2   2   3   0
## 5   1   2   2   3   0
## 8   1   3   2   5   1
## 9   1   3   3   4   0
dim(dat_BST)
## [1] 516   5
size <- 516
dat_BST$BPE <- rep("BST", size = size) # Tạo biến mới BPE
names(dat_BST)[names(dat_BST) == "BST"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BST)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   2   0 BST
## 3   1   2   2   2   0 BST
## 4   1   2   2   3   0 BST
## 5   1   2   2   3   0 BST
## 8   1   3   2   5   1 BST
## 9   1   3   3   4   0 BST
## Create dat_BSC
dat_BSC <- BS_likert[ , c(18,19,20,22,34)]
head(dat_BSC)
##   BUB SBM SBS BSC MSI
## 2   0   2   2   4   0
## 3   1   2   2   4   0
## 4   1   2   2   4   0
## 5   1   2   2   4   0
## 8   1   3   2   5   1
## 9   1   3   3   4   0
dim(dat_BSC)
## [1] 516   5
str(dat_BSC)
## 'data.frame':    516 obs. of  5 variables:
##  $ BUB: int  0 1 1 1 1 1 1 1 1 1 ...
##  $ SBM: int  2 2 2 2 3 3 3 1 4 4 ...
##  $ SBS: int  2 2 2 2 2 3 2 1 3 2 ...
##  $ BSC: int  4 4 4 4 5 4 5 3 4 3 ...
##  $ MSI: int  0 0 0 0 1 0 0 0 1 1 ...
dat_BSC$BPE <- rep("BSC", size = size) # Tạo biến mới BPE
names(dat_BSC)[names(dat_BSC) == "BSC"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BSC)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   4   0 BSC
## 3   1   2   2   4   0 BSC
## 4   1   2   2   4   0 BSC
## 5   1   2   2   4   0 BSC
## 8   1   3   2   5   1 BSC
## 9   1   3   3   4   0 BSC
## Create dat_BSA
dat_BSA <- BS_likert[ , c(18,19,20,23,34)]
head(dat_BSA)
##   BUB SBM SBS BSA MSI
## 2   0   2   2   5   0
## 3   1   2   2   4   0
## 4   1   2   2   4   0
## 5   1   2   2   4   0
## 8   1   3   2   5   1
## 9   1   3   3   4   0
dim(dat_BSA)
## [1] 516   5
dat_BSA$BPE <- rep("BSA", size = size) # Tạo biến mới BPE
names(dat_BSA)[names(dat_BSA) == "BSA"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BSA)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   5   0 BSA
## 3   1   2   2   4   0 BSA
## 4   1   2   2   4   0 BSA
## 5   1   2   2   4   0 BSA
## 8   1   3   2   5   1 BSA
## 9   1   3   3   4   0 BSA
## Create dat_BRE
dat_BRE <- BS_likert[ , c(18,19,20,24,34)]
head(dat_BRE)
##   BUB SBM SBS BRE MSI
## 2   0   2   2   2   0
## 3   1   2   2   3   0
## 4   1   2   2   4   0
## 5   1   2   2   2   0
## 8   1   3   2   3   1
## 9   1   3   3   3   0
dim(dat_BRE)
## [1] 516   5
dat_BRE$BPE <- rep("BRE", size = size) # Tạo biến mới BPE
names(dat_BRE)[names(dat_BRE) == "BRE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BRE)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   2   0 BRE
## 3   1   2   2   3   0 BRE
## 4   1   2   2   4   0 BRE
## 5   1   2   2   2   0 BRE
## 8   1   3   2   3   1 BRE
## 9   1   3   3   3   0 BRE
## Create dat_BCO
dat_BCO <- BS_likert[ , c(18,19,20,25,34)]
head(dat_BCO)
##   BUB SBM SBS BCO MSI
## 2   0   2   2   2   0
## 3   1   2   2   3   0
## 4   1   2   2   4   0
## 5   1   2   2   2   0
## 8   1   3   2   3   1
## 9   1   3   3   3   0
dim(dat_BCO)
## [1] 516   5
dat_BCO$BPE <- rep("BCO", size = size) # Tạo biến mới BPE
names(dat_BCO)[names(dat_BCO) == "BCO"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BCO)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   2   0 BCO
## 3   1   2   2   3   0 BCO
## 4   1   2   2   4   0 BCO
## 5   1   2   2   2   0 BCO
## 8   1   3   2   3   1 BCO
## 9   1   3   3   3   0 BCO
## Create dat_BIN
dat_BIN <- BS_likert[ , c(18,19,20,26,34)]
head(dat_BIN)
##   BUB SBM SBS BIN MSI
## 2   0   2   2   4   0
## 3   1   2   2   4   0
## 4   1   2   2   4   0
## 5   1   2   2   3   0
## 8   1   3   2   3   1
## 9   1   3   3   3   0
dim(dat_BIN)
## [1] 516   5
dat_BIN$BPE <- rep("BIN", size = size) # Tạo biến mới BPE
names(dat_BIN)[names(dat_BIN) == "BIN"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BIN)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   4   0 BIN
## 3   1   2   2   4   0 BIN
## 4   1   2   2   4   0 BIN
## 5   1   2   2   3   0 BIN
## 8   1   3   2   3   1 BIN
## 9   1   3   3   3   0 BIN
## Create dat_BHE
dat_BHE <- BS_likert[ , c(18,19,20,27,34)]
head(dat_BHE)
##   BUB SBM SBS BHE MSI
## 2   0   2   2   2   0
## 3   1   2   2   2   0
## 4   1   2   2   3   0
## 5   1   2   2   2   0
## 8   1   3   2   3   1
## 9   1   3   3   3   0
dim(dat_BHE)
## [1] 516   5
dat_BHE$BPE <- rep("BHE", size = size) # Tạo biến mới BPE
names(dat_BHE)[names(dat_BHE) == "BHE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BHE)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   2   0 BHE
## 3   1   2   2   2   0 BHE
## 4   1   2   2   3   0 BHE
## 5   1   2   2   2   0 BHE
## 8   1   3   2   3   1 BHE
## 9   1   3   3   3   0 BHE
## Create dat_BEN
dat_BEN <- BS_likert[ , c(18,19,20,28,34)]
head(dat_BEN)
##   BUB SBM SBS BEN MSI
## 2   0   2   2   5   0
## 3   1   2   2   4   0
## 4   1   2   2   3   0
## 5   1   2   2   4   0
## 8   1   3   2   5   1
## 9   1   3   3   4   0
dim(dat_BEN)
## [1] 516   5
dat_BEN$BPE <- rep("BEN", size = size) # Tạo biến mới BPE
names(dat_BEN)[names(dat_BEN) == "BEN"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BEN)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   5   0 BEN
## 3   1   2   2   4   0 BEN
## 4   1   2   2   3   0 BEN
## 5   1   2   2   4   0 BEN
## 8   1   3   2   5   1 BEN
## 9   1   3   3   4   0 BEN
## Create dat_BRC
dat_BRC <- BS_likert[ , c(18,19,20,29,34)]
head(dat_BRC)
##   BUB SBM SBS BRC MSI
## 2   0   2   2   5   0
## 3   1   2   2   4   0
## 4   1   2   2   3   0
## 5   1   2   2   4   0
## 8   1   3   2   5   1
## 9   1   3   3   4   0
dim(dat_BRC)
## [1] 516   5
dat_BRC$BPE <- rep("BRC", size = size) # Tạo biến mới BPE
names(dat_BRC)[names(dat_BRC) == "BRC"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BRC)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   5   0 BRC
## 3   1   2   2   4   0 BRC
## 4   1   2   2   3   0 BRC
## 5   1   2   2   4   0 BRC
## 8   1   3   2   5   1 BRC
## 9   1   3   3   4   0 BRC
## Create dat_BWE
dat_BWE <- BS_likert[ , c(18,19,20,30,34)]
head(dat_BWE)
##   BUB SBM SBS BWE MSI
## 2   0   2   2   3   0
## 3   1   2   2   4   0
## 4   1   2   2   5   0
## 5   1   2   2   4   0
## 8   1   3   2   5   1
## 9   1   3   3   4   0
dim(dat_BWE)
## [1] 516   5
dat_BWE$BPE <- rep("BWE", size = size) # Tạo biến mới BPE
names(dat_BWE)[names(dat_BWE) == "BWE"] <- "Res" # Đổi tên biến BST thành Res
head(dat_BWE)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   3   0 BWE
## 3   1   2   2   4   0 BWE
## 4   1   2   2   5   0 BWE
## 5   1   2   2   4   0 BWE
## 8   1   3   2   5   1 BWE
## 9   1   3   3   4   0 BWE
## Tạo dữ liệu likert từ 10 dữ liệu: dat_BST, dat_BSC, dat_BSA, dat_BRE, dat_BCO, dat_BIN, dat_BHE, dat_BEN, dat_BRC, dat_BWE
B.Per <- rbind(dat_BST, dat_BSC, dat_BSA, dat_BRE, dat_BCO, dat_BIN, dat_BHE, dat_BEN, dat_BRC, dat_BWE)
head(B.Per)
##   BUB SBM SBS Res MSI BPE
## 2   0   2   2   2   0 BST
## 3   1   2   2   2   0 BST
## 4   1   2   2   3   0 BST
## 5   1   2   2   3   0 BST
## 8   1   3   2   5   1 BST
## 9   1   3   3   4   0 BST
str(B.Per)
## 'data.frame':    5160 obs. of  6 variables:
##  $ BUB: int  0 1 1 1 1 1 1 1 1 1 ...
##  $ SBM: int  2 2 2 2 3 3 3 1 4 4 ...
##  $ SBS: int  2 2 2 2 2 3 2 1 3 2 ...
##  $ Res: int  2 2 3 3 5 4 2 3 2 3 ...
##  $ MSI: int  0 0 0 0 1 0 0 0 1 1 ...
##  $ BPE: chr  "BST" "BST" "BST" "BST" ...
dim(B.Per)
## [1] 5160    6
# Cách 2: Tạo dữ liệu thủ công trên excel với các cột BPE, Res, BUB, MSI, SBM, SBS: t_likert = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 3-Mode shift\\So lieu mode choice tu phan 1\\8. MS-Bieu do Likert.csv" ; BP_likert = read.csv(t_likert, header = T, sep = ";")

# Data Bus perception of people shift to bus
BP_MS = subset(B.Per,MSI == 1)
str(BP_MS)
## 'data.frame':    1250 obs. of  6 variables:
##  $ BUB: int  1 1 1 1 1 0 1 1 1 1 ...
##  $ SBM: int  3 4 4 5 4 4 4 4 4 4 ...
##  $ SBS: int  2 3 2 4 4 2 2 3 2 2 ...
##  $ Res: int  5 2 3 1 4 2 4 4 4 2 ...
##  $ MSI: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ BPE: chr  "BST" "BST" "BST" "BST" ...
dim(BP_MS)
## [1] 1250    6
BP_MS = BP_MS[ , c(4, 6)]
dim(BP_MS)
## [1] 1250    2
head(BP_MS)
##    Res BPE
## 8    5 BST
## 15   2 BST
## 16   3 BST
## 18   1 BST
## 21   4 BST
## 29   2 BST
# Data Bus perception of people shift to bus (MSI = 1) and used to use bus before (BUB = 1) 
BP_MS_BU = subset(B.Per,BUB == 1 & MSI == 1)
str(BP_MS_BU)
## 'data.frame':    950 obs. of  6 variables:
##  $ BUB: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ SBM: int  3 4 4 5 4 4 4 4 4 4 ...
##  $ SBS: int  2 3 2 4 4 2 3 2 2 3 ...
##  $ Res: int  5 2 3 1 4 4 4 4 2 4 ...
##  $ MSI: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ BPE: chr  "BST" "BST" "BST" "BST" ...
dim(BP_MS_BU)
## [1] 950   6
BP_MS_BU = BP_MS_BU[ , c(4, 6)]
dim(BP_MS_BU)
## [1] 950   2
head(BP_MS_BU)
##    Res BPE
## 8    5 BST
## 15   2 BST
## 16   3 BST
## 18   1 BST
## 21   4 BST
## 30   4 BST
# Data Bus perception of people don't shift to bus
BP_nMS = subset(B.Per,MSI == 0)
str(BP_nMS)
## 'data.frame':    3910 obs. of  6 variables:
##  $ BUB: int  0 1 1 1 1 1 1 1 1 1 ...
##  $ SBM: int  2 2 2 2 3 3 1 4 2 2 ...
##  $ SBS: int  2 2 2 2 3 2 1 2 2 2 ...
##  $ Res: int  2 2 3 3 4 2 3 3 2 2 ...
##  $ MSI: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ BPE: chr  "BST" "BST" "BST" "BST" ...
dim(BP_nMS)
## [1] 3910    6
BP_nMS = BP_nMS[ , c(4, 6)]
dim(BP_nMS)
## [1] 3910    2
head(BP_nMS)
##    Res BPE
## 2    2 BST
## 3    2 BST
## 4    3 BST
## 5    3 BST
## 9    4 BST
## 10   2 BST

7.2. Vẽ biểu đồ likert cho nhóm đối tượng chuyển đổi - BP_MS

# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_MS)
##    Res BPE
## 8    5 BST
## 15   2 BST
## 16   3 BST
## 18   1 BST
## 21   4 BST
## 29   2 BST
attach(BP_MS)
BP_MS = within(BP_MS, {
  Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
  BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
  })
str(BP_MS)
## 'data.frame':    1250 obs. of  2 variables:
##  $ Res: Factor w/ 5 levels "Very Disagree",..: 5 2 3 1 4 2 4 4 4 2 ...
##  $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_MS)
##             Res           BPE     
##  Very Disagree: 85   BST    :125  
##  Disagree     :190   BSC    :125  
##  Normal       :263   BSA    :125  
##  Agree        :582   BRE    :125  
##  Very Agree   :130   BCO    :125  
##                      BIN    :125  
##                      (Other):500
t = compareGroups(Res ~ BPE, data = BP_MS)
createTable(t)
## 
## --------Summary descriptives table by 'Res'---------
## 
## ___________________________________________________________________________ 
##         Very Disagree  Disagree    Normal     Agree    Very Agree p.overall 
##             N=85        N=190      N=263      N=582      N=130              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## BPE:                                                               <0.001   
##     BST  10 (11.8%)   29 (15.3%) 32 (12.2%) 48 (8.25%) 6 (4.62%)            
##     BSC   6 (7.06%)   5 (2.63%)  21 (7.98%) 67 (11.5%) 26 (20.0%)           
##     BSA   9 (10.6%)   36 (18.9%) 37 (14.1%) 37 (6.36%) 6 (4.62%)            
##     BRE   6 (7.06%)   31 (16.3%) 46 (17.5%) 38 (6.53%) 4 (3.08%)            
##     BCO   9 (10.6%)   5 (2.63%)  17 (6.46%) 67 (11.5%) 27 (20.8%)           
##     BIN   7 (8.24%)   13 (6.84%) 33 (12.5%) 62 (10.7%) 10 (7.69%)           
##     BHE   7 (8.24%)   5 (2.63%)  20 (7.60%) 81 (13.9%) 12 (9.23%)           
##     BEN   9 (10.6%)   11 (5.79%) 21 (7.98%) 75 (12.9%) 9 (6.92%)            
##     BRC  15 (17.6%)   51 (26.8%) 23 (8.75%) 34 (5.84%) 2 (1.54%)            
##     BWE   7 (8.24%)   4 (2.11%)  13 (4.94%) 73 (12.5%) 28 (21.5%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_MS %>% 
  group_by(BPE, Res) %>% 
  count() %>% 
  ungroup() %>% 
  group_by(BPE) %>% 
  mutate(percent = 100*n / sum(n)) %>% 
  mutate(percent = round(percent, 2)) %>% 
  mutate(bar_text = paste0(percent, "%")) %>% 
  ungroup() -> df_for_ploting
df_for_ploting %>% 
  filter(Res == Res[5]) %>% 
  arrange(percent) %>% 
  pull(BPE) -> order_x
order_x
##  [1] BRC BRE BST BSA BEN BIN BHE BSC BCO BWE
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot: 
my_colors <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font <- "Roboto Condensed"

df_for_ploting %>% 
  mutate(BPE = factor(BPE, levels = order_x), Res = factor(Res, levels = Res[5:1])) -> df_odered
df_odered
## # A tibble: 50 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree    10     8   8%      
##  2 BST   Disagree         29    23.2 23.2%   
##  3 BST   Normal           32    25.6 25.6%   
##  4 BST   Agree            48    38.4 38.4%   
##  5 BST   Very Agree        6     4.8 4.8%    
##  6 BSC   Very Disagree     6     4.8 4.8%    
##  7 BSC   Disagree          5     4   4%      
##  8 BSC   Normal           21    16.8 16.8%   
##  9 BSC   Agree            67    53.6 53.6%   
## 10 BSC   Very Agree       26    20.8 20.8%   
## # ... with 40 more rows
library(extrafont)
## Registering fonts with R
theme_set(theme_minimal())
gg <- df_odered %>% 
  ggplot(aes(x = BPE, y = percent, fill = Res)) + 
  geom_col(width = 0.8) + 
  coord_flip() + 
  scale_fill_manual(values = my_colors[5:1], name = "") + 
  theme(legend.position = "top") + 
  theme(text = element_text(family = my_font)) + 
  guides(fill = guide_legend(reverse = TRUE)) + 
  scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) + 
  theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) + 
  theme(axis.text = element_text(color = "grey20", size = 10.2)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) + 
  theme(legend.key.height = unit(0.15, "mm")) + 
  labs(x = NULL, y = NULL, 
       title = "Bus perception of motor users to shift to bus", 
       subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# For displaying percent of "Very Disagree": 
df_odered %>% 
  filter(Res == "Very Disagree") %>% 
  filter(percent >= 3) -> df_for_text1
df_for_text1
## # A tibble: 10 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree    10     8   8%      
##  2 BSC   Very Disagree     6     4.8 4.8%    
##  3 BSA   Very Disagree     9     7.2 7.2%    
##  4 BRE   Very Disagree     6     4.8 4.8%    
##  5 BCO   Very Disagree     9     7.2 7.2%    
##  6 BIN   Very Disagree     7     5.6 5.6%    
##  7 BHE   Very Disagree     7     5.6 5.6%    
##  8 BEN   Very Disagree     9     7.2 7.2%    
##  9 BRC   Very Disagree    15    12   12%     
## 10 BWE   Very Disagree     7     5.6 5.6%
# For displaying percent of "Disagree": 
df_odered %>% 
  filter(Res == "Disagree") %>% 
  filter(percent >= 3) -> df_for_text2
df_for_text2
## # A tibble: 10 x 5
##    BPE   Res          n percent bar_text
##    <fct> <fct>    <int>   <dbl> <chr>   
##  1 BST   Disagree    29    23.2 23.2%   
##  2 BSC   Disagree     5     4   4%      
##  3 BSA   Disagree    36    28.8 28.8%   
##  4 BRE   Disagree    31    24.8 24.8%   
##  5 BCO   Disagree     5     4   4%      
##  6 BIN   Disagree    13    10.4 10.4%   
##  7 BHE   Disagree     5     4   4%      
##  8 BEN   Disagree    11     8.8 8.8%    
##  9 BRC   Disagree    51    40.8 40.8%   
## 10 BWE   Disagree     4     3.2 3.2%
# For displaying percent of "Normal": 
df_odered %>% 
  filter(Res == "Normal") %>% 
  filter(percent >= 3) -> df_for_text3
df_for_text3
## # A tibble: 10 x 5
##    BPE   Res        n percent bar_text
##    <fct> <fct>  <int>   <dbl> <chr>   
##  1 BST   Normal    32    25.6 25.6%   
##  2 BSC   Normal    21    16.8 16.8%   
##  3 BSA   Normal    37    29.6 29.6%   
##  4 BRE   Normal    46    36.8 36.8%   
##  5 BCO   Normal    17    13.6 13.6%   
##  6 BIN   Normal    33    26.4 26.4%   
##  7 BHE   Normal    20    16   16%     
##  8 BEN   Normal    21    16.8 16.8%   
##  9 BRC   Normal    23    18.4 18.4%   
## 10 BWE   Normal    13    10.4 10.4%
# For displaying percent of "Agree": 
df_odered %>% 
  filter(Res == "Agree") %>% 
  filter(percent >= 3) -> df_for_text4
df_for_text4
## # A tibble: 10 x 5
##    BPE   Res       n percent bar_text
##    <fct> <fct> <int>   <dbl> <chr>   
##  1 BST   Agree    48    38.4 38.4%   
##  2 BSC   Agree    67    53.6 53.6%   
##  3 BSA   Agree    37    29.6 29.6%   
##  4 BRE   Agree    38    30.4 30.4%   
##  5 BCO   Agree    67    53.6 53.6%   
##  6 BIN   Agree    62    49.6 49.6%   
##  7 BHE   Agree    81    64.8 64.8%   
##  8 BEN   Agree    75    60   60%     
##  9 BRC   Agree    34    27.2 27.2%   
## 10 BWE   Agree    73    58.4 58.4%
# For displaying percent of "Very Agree": 
df_odered %>% 
  filter(Res == "Very Agree") %>% 
  filter(percent >= 3) -> df_for_text5
df_for_text5
## # A tibble: 9 x 5
##   BPE   Res            n percent bar_text
##   <fct> <fct>      <int>   <dbl> <chr>   
## 1 BST   Very Agree     6     4.8 4.8%    
## 2 BSC   Very Agree    26    20.8 20.8%   
## 3 BSA   Very Agree     6     4.8 4.8%    
## 4 BRE   Very Agree     4     3.2 3.2%    
## 5 BCO   Very Agree    27    21.6 21.6%   
## 6 BIN   Very Agree    10     8   8%      
## 7 BHE   Very Agree    12     9.6 9.6%    
## 8 BEN   Very Agree     9     7.2 7.2%    
## 9 BWE   Very Agree    28    22.4 22.4%
# For displaying percent of "Very Agree": 
df_odered %>% 
  filter(Res == "Very Agree") -> df_for_text
df_for_text
## # A tibble: 10 x 5
##    BPE   Res            n percent bar_text
##    <fct> <fct>      <int>   <dbl> <chr>   
##  1 BST   Very Agree     6     4.8 4.8%    
##  2 BSC   Very Agree    26    20.8 20.8%   
##  3 BSA   Very Agree     6     4.8 4.8%    
##  4 BRE   Very Agree     4     3.2 3.2%    
##  5 BCO   Very Agree    27    21.6 21.6%   
##  6 BIN   Very Agree    10     8   8%      
##  7 BHE   Very Agree    12     9.6 9.6%    
##  8 BEN   Very Agree     9     7.2 7.2%    
##  9 BRC   Very Agree     2     1.6 1.6%    
## 10 BWE   Very Agree    28    22.4 22.4%
# Ad text layers-thay đổi giá trị y để thể hiện vị trí ghi text phù hợp
gg + 
  geom_text(data = df_for_text1 %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font) + 
  geom_text(data = df_for_text1 %>% filter(BPE == "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font) +
  geom_text(data = df_for_text, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

7.3. Vẽ biểu đồ likert cho nhóm đối tượng chuyển đổi và đã từng sử dụng xe buýt trước đó - BP_MS_BU

# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_MS_BU)
##    Res BPE
## 8    5 BST
## 15   2 BST
## 16   3 BST
## 18   1 BST
## 21   4 BST
## 30   4 BST
attach(BP_MS_BU)
## The following objects are masked from BP_MS:
## 
##     BPE, Res
BP_MS_BU = within(BP_MS_BU, {
  Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
  BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
  })
str(BP_MS_BU)
## 'data.frame':    950 obs. of  2 variables:
##  $ Res: Factor w/ 5 levels "Very Disagree",..: 5 2 3 1 4 4 4 4 2 4 ...
##  $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_MS_BU)
##             Res           BPE     
##  Very Disagree: 70   BST    : 95  
##  Disagree     :131   BSC    : 95  
##  Normal       :213   BSA    : 95  
##  Agree        :429   BRE    : 95  
##  Very Agree   :107   BCO    : 95  
##                      BIN    : 95  
##                      (Other):380
t_BU = compareGroups(Res ~ BPE, data = BP_MS_BU)
createTable(t_BU)
## 
## --------Summary descriptives table by 'Res'---------
## 
## ___________________________________________________________________________ 
##         Very Disagree  Disagree    Normal     Agree    Very Agree p.overall 
##             N=70        N=131      N=213      N=429      N=107              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## BPE:                                                               <0.001   
##     BST   7 (10.0%)   21 (16.0%) 28 (13.1%) 33 (7.69%) 6 (5.61%)            
##     BSC   6 (8.57%)   2 (1.53%)  18 (8.45%) 51 (11.9%) 18 (16.8%)           
##     BSA   7 (10.0%)   29 (22.1%) 28 (13.1%) 26 (6.06%) 5 (4.67%)            
##     BRE   5 (7.14%)   19 (14.5%) 37 (17.4%) 30 (6.99%) 4 (3.74%)            
##     BCO   7 (10.0%)   3 (2.29%)  14 (6.57%) 51 (11.9%) 20 (18.7%)           
##     BIN   5 (7.14%)   11 (8.40%) 26 (12.2%) 43 (10.0%) 10 (9.35%)           
##     BHE   7 (10.0%)   3 (2.29%)  16 (7.51%) 58 (13.5%) 11 (10.3%)           
##     BEN   9 (12.9%)   6 (4.58%)  18 (8.45%) 54 (12.6%) 8 (7.48%)            
##     BRC  11 (15.7%)   35 (26.7%) 20 (9.39%) 27 (6.29%) 2 (1.87%)            
##     BWE   6 (8.57%)   2 (1.53%)  8 (3.76%)  56 (13.1%) 23 (21.5%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_MS_BU %>% 
  group_by(BPE, Res) %>% 
  count() %>% 
  ungroup() %>% 
  group_by(BPE) %>% 
  mutate(percent = 100*n / sum(n)) %>% 
  mutate(percent = round(percent, 2)) %>% 
  mutate(bar_text = paste0(percent, "%")) %>% 
  ungroup() -> df_for_ploting_BU
df_for_ploting_BU %>% 
  filter(Res == Res[5]) %>% 
  arrange(percent) %>% 
  pull(BPE) -> order_x_BU
order_x_BU
##  [1] BRC BRE BSA BST BEN BIN BHE BSC BCO BWE
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot: 
my_colors_BU <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font_BU <- "Roboto Condensed"

df_for_ploting_BU %>% 
  mutate(BPE = factor(BPE, levels = order_x_BU), Res = factor(Res, levels = Res[5:1])) -> df_odered_BU
df_odered_BU
## # A tibble: 50 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree     7    7.37 7.37%   
##  2 BST   Disagree         21   22.1  22.11%  
##  3 BST   Normal           28   29.5  29.47%  
##  4 BST   Agree            33   34.7  34.74%  
##  5 BST   Very Agree        6    6.32 6.32%   
##  6 BSC   Very Disagree     6    6.32 6.32%   
##  7 BSC   Disagree          2    2.11 2.11%   
##  8 BSC   Normal           18   19.0  18.95%  
##  9 BSC   Agree            51   53.7  53.68%  
## 10 BSC   Very Agree       18   19.0  18.95%  
## # ... with 40 more rows
library(extrafont)
theme_set(theme_minimal())
gg_BU <- df_odered_BU %>% 
  ggplot(aes(x = BPE, y = percent, fill = Res)) + 
  geom_col(width = 0.8) + 
  coord_flip() + 
  scale_fill_manual(values = my_colors[5:1], name = "") + 
  theme(legend.position = "top") + 
  theme(text = element_text(family = my_font)) + 
  guides(fill = guide_legend(reverse = TRUE)) + 
  scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) + 
  theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) + 
  theme(axis.text = element_text(color = "grey20", size = 10.2)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) + 
  theme(legend.key.height = unit(0.15, "mm")) + 
  labs(x = NULL, y = NULL, 
       title = "Bus perception of motor users shifting to bus and being used to use bus before", 
       subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg_BU
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# For displaying percent of "Very Disagree": 
df_odered_BU %>% 
  filter(Res == "Very Disagree") %>% 
  filter(percent >= 3) -> df_for_text1_BU
df_for_text1_BU
## # A tibble: 10 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree     7    7.37 7.37%   
##  2 BSC   Very Disagree     6    6.32 6.32%   
##  3 BSA   Very Disagree     7    7.37 7.37%   
##  4 BRE   Very Disagree     5    5.26 5.26%   
##  5 BCO   Very Disagree     7    7.37 7.37%   
##  6 BIN   Very Disagree     5    5.26 5.26%   
##  7 BHE   Very Disagree     7    7.37 7.37%   
##  8 BEN   Very Disagree     9    9.47 9.47%   
##  9 BRC   Very Disagree    11   11.6  11.58%  
## 10 BWE   Very Disagree     6    6.32 6.32%
# For displaying percent of "Disagree": 
df_odered_BU %>% 
  filter(Res == "Disagree") %>% 
  filter(percent >= 3) -> df_for_text2_BU
df_for_text2_BU
## # A tibble: 8 x 5
##   BPE   Res          n percent bar_text
##   <fct> <fct>    <int>   <dbl> <chr>   
## 1 BST   Disagree    21   22.1  22.11%  
## 2 BSA   Disagree    29   30.5  30.53%  
## 3 BRE   Disagree    19   20    20%     
## 4 BCO   Disagree     3    3.16 3.16%   
## 5 BIN   Disagree    11   11.6  11.58%  
## 6 BHE   Disagree     3    3.16 3.16%   
## 7 BEN   Disagree     6    6.32 6.32%   
## 8 BRC   Disagree    35   36.8  36.84%
# For displaying percent of "Normal": 
df_odered_BU %>% 
  filter(Res == "Normal") %>% 
  filter(percent >= 3) -> df_for_text3_BU
df_for_text3_BU
## # A tibble: 10 x 5
##    BPE   Res        n percent bar_text
##    <fct> <fct>  <int>   <dbl> <chr>   
##  1 BST   Normal    28   29.5  29.47%  
##  2 BSC   Normal    18   19.0  18.95%  
##  3 BSA   Normal    28   29.5  29.47%  
##  4 BRE   Normal    37   39.0  38.95%  
##  5 BCO   Normal    14   14.7  14.74%  
##  6 BIN   Normal    26   27.4  27.37%  
##  7 BHE   Normal    16   16.8  16.84%  
##  8 BEN   Normal    18   19.0  18.95%  
##  9 BRC   Normal    20   21.0  21.05%  
## 10 BWE   Normal     8    8.42 8.42%
# For displaying percent of "Agree": 
df_odered_BU %>% 
  filter(Res == "Agree") %>% 
  filter(percent >= 3) -> df_for_text4_BU
df_for_text4_BU
## # A tibble: 10 x 5
##    BPE   Res       n percent bar_text
##    <fct> <fct> <int>   <dbl> <chr>   
##  1 BST   Agree    33    34.7 34.74%  
##  2 BSC   Agree    51    53.7 53.68%  
##  3 BSA   Agree    26    27.4 27.37%  
##  4 BRE   Agree    30    31.6 31.58%  
##  5 BCO   Agree    51    53.7 53.68%  
##  6 BIN   Agree    43    45.3 45.26%  
##  7 BHE   Agree    58    61.0 61.05%  
##  8 BEN   Agree    54    56.8 56.84%  
##  9 BRC   Agree    27    28.4 28.42%  
## 10 BWE   Agree    56    59.0 58.95%
# For displaying percent of "Very Agree": 
df_odered_BU %>% 
  filter(Res == "Very Agree") %>% 
  filter(percent >= 3) -> df_for_text5_BU
df_for_text5_BU
## # A tibble: 9 x 5
##   BPE   Res            n percent bar_text
##   <fct> <fct>      <int>   <dbl> <chr>   
## 1 BST   Very Agree     6    6.32 6.32%   
## 2 BSC   Very Agree    18   19.0  18.95%  
## 3 BSA   Very Agree     5    5.26 5.26%   
## 4 BRE   Very Agree     4    4.21 4.21%   
## 5 BCO   Very Agree    20   21.0  21.05%  
## 6 BIN   Very Agree    10   10.5  10.53%  
## 7 BHE   Very Agree    11   11.6  11.58%  
## 8 BEN   Very Agree     8    8.42 8.42%   
## 9 BWE   Very Agree    23   24.2  24.21%
# For displaying percent of "Very Agree": 
df_odered_BU %>% 
  filter(Res == "Very Agree") -> df_for_text_BU
df_for_text_BU
## # A tibble: 10 x 5
##    BPE   Res            n percent bar_text
##    <fct> <fct>      <int>   <dbl> <chr>   
##  1 BST   Very Agree     6    6.32 6.32%   
##  2 BSC   Very Agree    18   19.0  18.95%  
##  3 BSA   Very Agree     5    5.26 5.26%   
##  4 BRE   Very Agree     4    4.21 4.21%   
##  5 BCO   Very Agree    20   21.0  21.05%  
##  6 BIN   Very Agree    10   10.5  10.53%  
##  7 BHE   Very Agree    11   11.6  11.58%  
##  8 BEN   Very Agree     8    8.42 8.42%   
##  9 BRC   Very Agree     2    2.11 2.11%   
## 10 BWE   Very Agree    23   24.2  24.21%
# Ad text layers-thay đổi giá trị y để thể hiện vị trí ghi text phù hợp
gg_BU + 
  geom_text(data = df_for_text1_BU %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_BU) + 
  geom_text(data = df_for_text1_BU %>% filter(BPE == "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_BU) +
  geom_text(data = df_for_text_BU, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font_BU)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

7.4. Vẽ biểu đồ likert cho nhóm đối tượng không chuyển đổi - BP_nMS

# Plot Likert graph for mode shift to bus people - BP_MS
library(tidyverse)
library(compareGroups)
# Data Bus perception of people shift to bus - BP_MS
head(BP_nMS)
##    Res BPE
## 2    2 BST
## 3    2 BST
## 4    3 BST
## 5    3 BST
## 9    4 BST
## 10   2 BST
attach(BP_nMS)
## The following objects are masked from BP_MS_BU:
## 
##     BPE, Res
## The following objects are masked from BP_MS:
## 
##     BPE, Res
BP_nMS = within(BP_nMS, {
  Res = factor(Res, labels = c("Very Disagree", "Disagree", "Normal", "Agree", "Very Agree"))
  BPE = factor(BPE, labels = c("BST", "BSC", "BSA", "BRE", "BCO", "BIN", "BHE", "BEN", "BRC", "BWE"))
  })
str(BP_nMS)
## 'data.frame':    3910 obs. of  2 variables:
##  $ Res: Factor w/ 5 levels "Very Disagree",..: 2 2 3 3 4 2 3 3 2 2 ...
##  $ BPE: Factor w/ 10 levels "BST","BSC","BSA",..: 9 9 9 9 9 9 9 9 9 9 ...
summary(BP_nMS)
##             Res            BPE      
##  Very Disagree: 137   BST    : 391  
##  Disagree     : 744   BSC    : 391  
##  Normal       :1060   BSA    : 391  
##  Agree        :1673   BRE    : 391  
##  Very Agree   : 296   BCO    : 391  
##                       BIN    : 391  
##                       (Other):1564
t_nMS = compareGroups(Res ~ BPE, data = BP_nMS)
createTable(t_nMS)
## 
## --------Summary descriptives table by 'Res'---------
## 
## ______________________________________________________________________________ 
##         Very Disagree  Disagree     Normal       Agree    Very Agree p.overall 
##             N=137        N=744      N=1060      N=1673      N=296              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## BPE:                                                                  <0.001   
##     BST  10 (7.30%)   109 (14.7%) 117 (11.0%) 139 (8.31%) 16 (5.41%)           
##     BSC   9 (6.57%)   28 (3.76%)  87 (8.21%)  221 (13.2%) 46 (15.5%)           
##     BSA  10 (7.30%)   172 (23.1%) 123 (11.6%) 69 (4.12%)  17 (5.74%)           
##     BRE  18 (13.1%)   86 (11.6%)  167 (15.8%) 108 (6.46%) 12 (4.05%)           
##     BCO  10 (7.30%)   33 (4.44%)  70 (6.60%)  223 (13.3%) 55 (18.6%)           
##     BIN   6 (4.38%)   75 (10.1%)  117 (11.0%) 173 (10.3%) 20 (6.76%)           
##     BHE  15 (10.9%)   25 (3.36%)  113 (10.7%) 209 (12.5%) 29 (9.80%)           
##     BEN  18 (13.1%)   43 (5.78%)  108 (10.2%) 197 (11.8%) 25 (8.45%)           
##     BRC  33 (24.1%)   149 (20.0%) 121 (11.4%) 81 (4.84%)  7 (2.36%)            
##     BWE   8 (5.84%)   24 (3.23%)  37 (3.49%)  253 (15.1%) 69 (23.3%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
BP_nMS %>% 
  group_by(BPE, Res) %>% 
  count() %>% 
  ungroup() %>% 
  group_by(BPE) %>% 
  mutate(percent = 100*n / sum(n)) %>% 
  mutate(percent = round(percent, 2)) %>% 
  mutate(bar_text = paste0(percent, "%")) %>% 
  ungroup() -> df_for_ploting_nMS
df_for_ploting_nMS %>% 
  filter(Res == Res[5]) %>% 
  arrange(percent) %>% 
  pull(BPE) -> order_x_nMS
order_x_nMS
##  [1] BRC BRE BST BSA BIN BEN BHE BSC BCO BWE
## Levels: BST BSC BSA BRE BCO BIN BHE BEN BRC BWE
# Make a draft plot: 
my_colors_nMS <- c("#3e6487", "#829cb2", "pink", "pink3", "palevioletred")
my_font_nMS <- "Roboto Condensed"

df_for_ploting_nMS %>% 
  mutate(BPE = factor(BPE, levels = order_x_nMS), Res = factor(Res, levels = Res[5:1])) -> df_odered_nMS
df_odered_nMS
## # A tibble: 50 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree    10    2.56 2.56%   
##  2 BST   Disagree        109   27.9  27.88%  
##  3 BST   Normal          117   29.9  29.92%  
##  4 BST   Agree           139   35.6  35.55%  
##  5 BST   Very Agree       16    4.09 4.09%   
##  6 BSC   Very Disagree     9    2.3  2.3%    
##  7 BSC   Disagree         28    7.16 7.16%   
##  8 BSC   Normal           87   22.2  22.25%  
##  9 BSC   Agree           221   56.5  56.52%  
## 10 BSC   Very Agree       46   11.8  11.76%  
## # ... with 40 more rows
library(extrafont)
theme_set(theme_minimal())
gg_nMS <- df_odered_nMS %>% 
  ggplot(aes(x = BPE, y = percent, fill = Res)) + 
  geom_col(width = 0.8) + 
  coord_flip() + 
  scale_fill_manual(values = my_colors[5:1], name = "") + 
  theme(legend.position = "top") + 
  theme(text = element_text(family = my_font)) + 
  guides(fill = guide_legend(reverse = TRUE)) + 
  scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) + 
  theme(plot.title = element_text(size = 18), plot.subtitle = element_text(size = 11, color = "grey20")) + 
  theme(axis.text = element_text(color = "grey20", size = 10.2)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) + 
  theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) + 
  theme(legend.key.height = unit(0.15, "mm")) + 
  labs(x = NULL, y = NULL, 
       title = "Bus perception of motor users not shifting to bus", 
       subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.")
gg_nMS
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# For displaying percent of "Very Disagree": 
df_odered_nMS %>% 
  filter(Res == "Very Disagree") -> df_for_text1_nMS
df_for_text1_nMS
## # A tibble: 10 x 5
##    BPE   Res               n percent bar_text
##    <fct> <fct>         <int>   <dbl> <chr>   
##  1 BST   Very Disagree    10    2.56 2.56%   
##  2 BSC   Very Disagree     9    2.3  2.3%    
##  3 BSA   Very Disagree    10    2.56 2.56%   
##  4 BRE   Very Disagree    18    4.6  4.6%    
##  5 BCO   Very Disagree    10    2.56 2.56%   
##  6 BIN   Very Disagree     6    1.53 1.53%   
##  7 BHE   Very Disagree    15    3.84 3.84%   
##  8 BEN   Very Disagree    18    4.6  4.6%    
##  9 BRC   Very Disagree    33    8.44 8.44%   
## 10 BWE   Very Disagree     8    2.05 2.05%
# For displaying percent of "Disagree": 
df_odered_nMS %>% 
  filter(Res == "Disagree") %>% 
  filter(percent >= 3) -> df_for_text2_nMS
df_for_text2_nMS
## # A tibble: 10 x 5
##    BPE   Res          n percent bar_text
##    <fct> <fct>    <int>   <dbl> <chr>   
##  1 BST   Disagree   109   27.9  27.88%  
##  2 BSC   Disagree    28    7.16 7.16%   
##  3 BSA   Disagree   172   44.0  43.99%  
##  4 BRE   Disagree    86   22.0  21.99%  
##  5 BCO   Disagree    33    8.44 8.44%   
##  6 BIN   Disagree    75   19.2  19.18%  
##  7 BHE   Disagree    25    6.39 6.39%   
##  8 BEN   Disagree    43   11    11%     
##  9 BRC   Disagree   149   38.1  38.11%  
## 10 BWE   Disagree    24    6.14 6.14%
# For displaying percent of "Normal": 
df_odered_nMS %>% 
  filter(Res == "Normal") %>% 
  filter(percent >= 3) -> df_for_text3_nMS
df_for_text3_nMS
## # A tibble: 10 x 5
##    BPE   Res        n percent bar_text
##    <fct> <fct>  <int>   <dbl> <chr>   
##  1 BST   Normal   117   29.9  29.92%  
##  2 BSC   Normal    87   22.2  22.25%  
##  3 BSA   Normal   123   31.5  31.46%  
##  4 BRE   Normal   167   42.7  42.71%  
##  5 BCO   Normal    70   17.9  17.9%   
##  6 BIN   Normal   117   29.9  29.92%  
##  7 BHE   Normal   113   28.9  28.9%   
##  8 BEN   Normal   108   27.6  27.62%  
##  9 BRC   Normal   121   31.0  30.95%  
## 10 BWE   Normal    37    9.46 9.46%
# For displaying percent of "Agree": 
df_odered_nMS %>% 
  filter(Res == "Agree") %>% 
  filter(percent >= 3) -> df_for_text4_nMS
df_for_text4_nMS
## # A tibble: 10 x 5
##    BPE   Res       n percent bar_text
##    <fct> <fct> <int>   <dbl> <chr>   
##  1 BST   Agree   139    35.6 35.55%  
##  2 BSC   Agree   221    56.5 56.52%  
##  3 BSA   Agree    69    17.6 17.65%  
##  4 BRE   Agree   108    27.6 27.62%  
##  5 BCO   Agree   223    57.0 57.03%  
##  6 BIN   Agree   173    44.2 44.25%  
##  7 BHE   Agree   209    53.4 53.45%  
##  8 BEN   Agree   197    50.4 50.38%  
##  9 BRC   Agree    81    20.7 20.72%  
## 10 BWE   Agree   253    64.7 64.71%
# For displaying percent of "Very Agree": 
df_odered_nMS %>% 
  filter(Res == "Very Agree") %>% 
  filter(percent >= 3) -> df_for_text5_nMS
df_for_text5_nMS
## # A tibble: 9 x 5
##   BPE   Res            n percent bar_text
##   <fct> <fct>      <int>   <dbl> <chr>   
## 1 BST   Very Agree    16    4.09 4.09%   
## 2 BSC   Very Agree    46   11.8  11.76%  
## 3 BSA   Very Agree    17    4.35 4.35%   
## 4 BRE   Very Agree    12    3.07 3.07%   
## 5 BCO   Very Agree    55   14.1  14.07%  
## 6 BIN   Very Agree    20    5.12 5.12%   
## 7 BHE   Very Agree    29    7.42 7.42%   
## 8 BEN   Very Agree    25    6.39 6.39%   
## 9 BWE   Very Agree    69   17.6  17.65%
# For displaying percent of "Very Agree": 
df_odered_nMS %>% 
  filter(Res == "Very Agree") -> df_for_text_nMS
df_for_text_nMS
## # A tibble: 10 x 5
##    BPE   Res            n percent bar_text
##    <fct> <fct>      <int>   <dbl> <chr>   
##  1 BST   Very Agree    16    4.09 4.09%   
##  2 BSC   Very Agree    46   11.8  11.76%  
##  3 BSA   Very Agree    17    4.35 4.35%   
##  4 BRE   Very Agree    12    3.07 3.07%   
##  5 BCO   Very Agree    55   14.1  14.07%  
##  6 BIN   Very Agree    20    5.12 5.12%   
##  7 BHE   Very Agree    29    7.42 7.42%   
##  8 BEN   Very Agree    25    6.39 6.39%   
##  9 BRC   Very Agree     7    1.79 1.79%   
## 10 BWE   Very Agree    69   17.6  17.65%
# Ad text layers: 
gg_nMS + 
  geom_text(data = df_for_text1_nMS %>% filter(BPE != "BST"), aes(x = BPE, y = 5, label = bar_text), size = 4, color = "white", family = my_font_nMS) + 
  geom_text(data = df_for_text_nMS, aes(x = BPE, y = 100-4, label = bar_text), size = 4, color = "white", family = my_font_nMS)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database