A. ANALYSE WITH DATA OF THE CHOICE OF BUS AND MOTOR 1. Read and Create Data

t = "E:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 1-Mode choice\\SO LIEU R\\Mode choice in DN.csv"
MC = read.csv(t, header = T)
head(MC)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency Departure.Time
## 1           6                  1            0       3         3              1
## 2           3                  1            0       1         4              1
## 3           3                  1            0       1         4              3
## 4           3                  1            0       1         4              1
## 5           3                  1            0       1         4              3
## 6           4                  1            0       1         4              1
##   Distance Travel.Period Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## 1        2            10                  1             1                     0
## 2        8            15                  1             1                     1
## 3        5            15                  1             1                     1
## 4        5            10                  1             1                     1
## 5        8            15                  1             1                     0
## 6       20            30                  1             1                     0
##   Mode.Choice.Reason Weather Weekend Non.Bus.Reason Cost Bus.Stop.Present
## 1                  4       1       0              1   12                1
## 2                  2       3       0              1    8                2
## 3                  4       1       0              2    5                1
## 4                  2       1       0              4    5                1
## 5                  2       3       0              2    8                2
## 6                  5       3       0              2   40                2
##   Gender Age Occupation Income Number.of.Children Motor.Certificate
## 1      0   5          4      2                  2                 0
## 2      0   3          6      3                  1                 1
## 3      1   3          2      4                  0                 1
## 4      1   3          2      3                  0                 1
## 5      0   4          6      3                  1                 1
## 6      1   4          3      4                  2                 1
##   Car.Certificate Bicycle.Owning Motor.Owning Car.Owning Number.of.Bicycles
## 1               0              0            0          0                  1
## 2               0              1            1          0                  1
## 3               0              0            1          0                  0
## 4               0              1            1          0                  1
## 5               0              0            1          0                  0
## 6               1              1            1          1                  1
##   Number.of.Motors Number.of.Car
## 1                2             1
## 2                3             0
## 3                3             0
## 4                3             0
## 5                2             0
## 6                2             1
str(MC)
## 'data.frame':    847 obs. of  30 variables:
##  $ Travel.Mode          : int  6 3 3 3 3 4 4 3 3 3 ...
##  $ Bus.Stop.Condition   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Central.Area         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Purpose              : int  3 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : int  3 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : int  1 1 3 1 3 1 1 1 2 1 ...
##  $ Distance             : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ Travel.Period        : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ Sidewalk.Clearance   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Lane.Separate        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Temporary.Stop.Number: int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Mode.Choice.Reason   : int  4 2 4 2 2 5 5 2 2 2 ...
##  $ Weather              : int  1 3 1 1 3 3 3 3 1 3 ...
##  $ Weekend              : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ Non.Bus.Reason       : int  1 1 2 4 2 2 4 2 1 4 ...
##  $ Cost                 : int  12 8 5 5 8 40 30 10 12 10 ...
##  $ Bus.Stop.Present     : int  1 2 1 1 2 2 2 1 1 1 ...
##  $ Gender               : int  0 0 1 1 0 1 1 0 0 1 ...
##  $ Age                  : int  5 3 3 3 4 4 5 4 3 4 ...
##  $ Occupation           : int  4 6 2 2 6 3 3 7 7 3 ...
##  $ Income               : int  2 3 4 3 3 4 4 2 3 3 ...
##  $ Number.of.Children   : int  2 1 0 0 1 2 2 0 1 0 ...
##  $ Motor.Certificate    : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Car.Certificate      : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Bicycle.Owning       : int  0 1 0 1 0 1 0 0 1 0 ...
##  $ Motor.Owning         : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Car.Owning           : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Number.of.Bicycles   : int  1 1 0 1 0 1 0 0 1 0 ...
##  $ Number.of.Motors     : int  2 3 3 3 2 2 2 2 3 3 ...
##  $ Number.of.Car        : int  1 0 0 0 0 1 1 0 0 0 ...
str(MC$Travel.Mode)
##  int [1:847] 6 3 3 3 3 4 4 3 3 3 ...
# View data (number of rows and colume)
dim(MC) 
## [1] 847  30
# Sumerize Data
summary(MC) 
##   Travel.Mode    Bus.Stop.Condition  Central.Area      Purpose     
##  Min.   :1.000   Min.   :0.0000     Min.   :0.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:0.0000     1st Qu.:0.000   1st Qu.:1.000  
##  Median :3.000   Median :1.0000     Median :0.000   Median :1.000  
##  Mean   :3.646   Mean   :0.5089     Mean   :0.477   Mean   :1.568  
##  3rd Qu.:4.000   3rd Qu.:1.0000     3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :1.0000     Max.   :1.000   Max.   :4.000  
##                                                                    
##    Frequency     Departure.Time     Distance      Travel.Period   
##  Min.   :1.000   Min.   :1.000   Min.   : 0.015   Min.   :  0.00  
##  1st Qu.:4.000   1st Qu.:1.000   1st Qu.: 3.000   1st Qu.: 10.00  
##  Median :4.000   Median :1.000   Median : 5.000   Median : 15.00  
##  Mean   :3.548   Mean   :1.843   Mean   : 6.492   Mean   : 17.42  
##  3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:10.000   3rd Qu.: 20.00  
##  Max.   :4.000   Max.   :4.000   Max.   :35.000   Max.   :180.00  
##                                                                   
##  Sidewalk.Clearance Lane.Separate    Temporary.Stop.Number Mode.Choice.Reason
##  Min.   :0.0000     Min.   :0.0000   Min.   :0.0000        Min.   :1.000     
##  1st Qu.:1.0000     1st Qu.:1.0000   1st Qu.:0.0000        1st Qu.:2.000     
##  Median :1.0000     Median :1.0000   Median :0.0000        Median :2.000     
##  Mean   :0.8666     Mean   :0.7898   Mean   :0.6824        Mean   :2.713     
##  3rd Qu.:1.0000     3rd Qu.:1.0000   3rd Qu.:1.0000        3rd Qu.:4.000     
##  Max.   :1.0000     Max.   :1.0000   Max.   :3.0000        Max.   :6.000     
##                                                                              
##     Weather         Weekend       Non.Bus.Reason       Cost        
##  Min.   :1.000   Min.   :0.0000   Min.   :1.000   Min.   :  0.000  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:  3.000  
##  Median :1.000   Median :0.0000   Median :2.000   Median :  5.000  
##  Mean   :1.646   Mean   :0.2444   Mean   :2.912   Mean   :  8.685  
##  3rd Qu.:3.000   3rd Qu.:0.0000   3rd Qu.:4.000   3rd Qu.: 10.000  
##  Max.   :3.000   Max.   :1.0000   Max.   :6.000   Max.   :285.000  
##                                   NA's   :110                      
##  Bus.Stop.Present     Gender            Age         Occupation   
##  Min.   :0.000    Min.   :0.0000   Min.   :1.00   Min.   :1.000  
##  1st Qu.:1.000    1st Qu.:0.0000   1st Qu.:3.00   1st Qu.:2.000  
##  Median :1.000    Median :1.0000   Median :4.00   Median :3.000  
##  Mean   :1.004    Mean   :0.5832   Mean   :3.56   Mean   :4.152  
##  3rd Qu.:1.000    3rd Qu.:1.0000   3rd Qu.:4.00   3rd Qu.:7.000  
##  Max.   :2.000    Max.   :1.0000   Max.   :6.00   Max.   :8.000  
##                                                                  
##      Income      Number.of.Children Motor.Certificate Car.Certificate
##  Min.   :1.000   Min.   :0.0000     Min.   :0.0000    Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:0.0000     1st Qu.:1.0000    1st Qu.:0.000  
##  Median :2.000   Median :1.0000     Median :1.0000    Median :0.000  
##  Mean   :2.205   Mean   :0.7674     Mean   :0.8453    Mean   :0.183  
##  3rd Qu.:3.000   3rd Qu.:1.0000     3rd Qu.:1.0000    3rd Qu.:0.000  
##  Max.   :4.000   Max.   :3.0000     Max.   :1.0000    Max.   :1.000  
##                                                                      
##  Bicycle.Owning    Motor.Owning      Car.Owning     Number.of.Bicycles
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000    
##  1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000    
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :1.0000    
##  Mean   :0.4109   Mean   :0.7851   Mean   :0.1405   Mean   :0.7084    
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000    
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :3.0000    
##                                                                       
##  Number.of.Motors Number.of.Car   
##  Min.   :0.000    Min.   :0.0000  
##  1st Qu.:2.000    1st Qu.:0.0000  
##  Median :2.000    Median :0.0000  
##  Mean   :2.118    Mean   :0.2456  
##  3rd Qu.:3.000    3rd Qu.:0.0000  
##  Max.   :4.000    Max.   :2.0000  
## 
# Create dat - MotorBus (Bus và xe máy)
datMB <- MC[MC$Travel.Mode %in% c(3,7),]
head(datMB)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency Departure.Time
## 2           3                  1            0       1         4              1
## 3           3                  1            0       1         4              3
## 4           3                  1            0       1         4              1
## 5           3                  1            0       1         4              3
## 8           3                  1            0       1         4              1
## 9           3                  1            0       1         4              2
##   Distance Travel.Period Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## 2        8            15                  1             1                     1
## 3        5            15                  1             1                     1
## 4        5            10                  1             1                     1
## 5        8            15                  1             1                     0
## 8       10            25                  1             1                     0
## 9       12            30                  1             1                     1
##   Mode.Choice.Reason Weather Weekend Non.Bus.Reason Cost Bus.Stop.Present
## 2                  2       3       0              1    8                2
## 3                  4       1       0              2    5                1
## 4                  2       1       0              4    5                1
## 5                  2       3       0              2    8                2
## 8                  2       3       1              2   10                1
## 9                  2       1       0              1   12                1
##   Gender Age Occupation Income Number.of.Children Motor.Certificate
## 2      0   3          6      3                  1                 1
## 3      1   3          2      4                  0                 1
## 4      1   3          2      3                  0                 1
## 5      0   4          6      3                  1                 1
## 8      0   4          7      2                  0                 1
## 9      0   3          7      3                  1                 1
##   Car.Certificate Bicycle.Owning Motor.Owning Car.Owning Number.of.Bicycles
## 2               0              1            1          0                  1
## 3               0              0            1          0                  0
## 4               0              1            1          0                  1
## 5               0              0            1          0                  0
## 8               0              0            1          0                  0
## 9               0              1            1          0                  1
##   Number.of.Motors Number.of.Car
## 2                3             0
## 3                3             0
## 4                3             0
## 5                2             0
## 8                2             0
## 9                3             0
dim(datMB)
## [1] 628  30
datMB$Bus[datMB$Travel.Mode == 3] <- 0
datMB$Bus[datMB$Travel.Mode == 7] <- 1
datMB$Temporary.Stop.Number[datMB$Temporary.Stop.Number == 0] <- 0
datMB$Temporary.Stop.Number[datMB$Temporary.Stop.Number >= 1] <- 1
datMB$Weather[datMB$Weather == 1] <- 0
datMB$Weather[datMB$Weather == 2] <- 1
datMB$Weather[datMB$Weather == 3] <- 0
datMB$Number.of.Children[datMB$Number.of.Children == 0] <- 0
datMB$Number.of.Children[datMB$Number.of.Children >= 1] <- 1
datMB$Age[datMB$Age == 2] <- 1
datMB$Age[datMB$Age == 3] <- 1
datMB$Age[datMB$Age == 4] <- 2
datMB$Age[datMB$Age == 5] <- 2
datMB$Age[datMB$Age == 6] <- 3
datMB$Occupation[datMB$Occupation == 2] <- 1
datMB$Occupation[datMB$Occupation == 3] <- 2
datMB$Occupation[datMB$Occupation == 6] <- 2
datMB$Occupation[datMB$Occupation == 4] <- 3
datMB$Occupation[datMB$Occupation == 5] <- 3
datMB$Occupation[datMB$Occupation == 7] <- 4
datMB$Occupation[datMB$Occupation == 8] <- 5
head(datMB)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency Departure.Time
## 2           3                  1            0       1         4              1
## 3           3                  1            0       1         4              3
## 4           3                  1            0       1         4              1
## 5           3                  1            0       1         4              3
## 8           3                  1            0       1         4              1
## 9           3                  1            0       1         4              2
##   Distance Travel.Period Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## 2        8            15                  1             1                     1
## 3        5            15                  1             1                     1
## 4        5            10                  1             1                     1
## 5        8            15                  1             1                     0
## 8       10            25                  1             1                     0
## 9       12            30                  1             1                     1
##   Mode.Choice.Reason Weather Weekend Non.Bus.Reason Cost Bus.Stop.Present
## 2                  2       0       0              1    8                2
## 3                  4       0       0              2    5                1
## 4                  2       0       0              4    5                1
## 5                  2       0       0              2    8                2
## 8                  2       0       1              2   10                1
## 9                  2       0       0              1   12                1
##   Gender Age Occupation Income Number.of.Children Motor.Certificate
## 2      0   1          2      3                  1                 1
## 3      1   1          1      4                  0                 1
## 4      1   1          1      3                  0                 1
## 5      0   2          2      3                  1                 1
## 8      0   2          4      2                  0                 1
## 9      0   1          4      3                  1                 1
##   Car.Certificate Bicycle.Owning Motor.Owning Car.Owning Number.of.Bicycles
## 2               0              1            1          0                  1
## 3               0              0            1          0                  0
## 4               0              1            1          0                  1
## 5               0              0            1          0                  0
## 8               0              0            1          0                  0
## 9               0              1            1          0                  1
##   Number.of.Motors Number.of.Car Bus
## 2                3             0   0
## 3                3             0   0
## 4                3             0   0
## 5                2             0   0
## 8                2             0   0
## 9                3             0   0
str(datMB)
## 'data.frame':    628 obs. of  31 variables:
##  $ Travel.Mode          : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ Bus.Stop.Condition   : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ Central.Area         : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ Purpose              : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : int  1 3 1 3 1 2 1 1 1 1 ...
##  $ Distance             : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ Travel.Period        : num  15 15 10 15 25 30 25 5 5 10 ...
##  $ Sidewalk.Clearance   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Lane.Separate        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Temporary.Stop.Number: num  1 1 1 0 0 1 1 0 1 0 ...
##  $ Mode.Choice.Reason   : int  2 4 2 2 2 2 2 2 4 3 ...
##  $ Weather              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Weekend              : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ Non.Bus.Reason       : int  1 2 4 2 2 1 4 4 4 2 ...
##  $ Cost                 : int  8 5 5 8 10 12 10 3 2 4 ...
##  $ Bus.Stop.Present     : int  2 1 1 2 1 1 1 1 1 1 ...
##  $ Gender               : int  0 1 1 0 0 0 1 1 1 1 ...
##  $ Age                  : num  1 1 1 2 2 1 2 1 2 1 ...
##  $ Occupation           : num  2 1 1 2 4 4 2 1 4 4 ...
##  $ Income               : int  3 4 3 3 2 3 3 2 2 1 ...
##  $ Number.of.Children   : num  1 0 0 1 0 1 0 0 1 0 ...
##  $ Motor.Certificate    : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ Car.Certificate      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bicycle.Owning       : int  1 0 1 0 0 1 0 1 0 0 ...
##  $ Motor.Owning         : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ Car.Owning           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Number.of.Bicycles   : int  1 0 1 0 0 1 0 1 2 0 ...
##  $ Number.of.Motors     : int  3 3 3 2 2 3 3 3 2 1 ...
##  $ Number.of.Car        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bus                  : num  0 0 0 0 0 0 0 0 0 0 ...
# reformat variables in factor variables
attach(datMB)
datMB = within(datMB, {
  Travel.Mode = factor(Travel.Mode, labels = c("Motorbike", "Bus"))
  Bus.Stop.Condition = factor(Bus.Stop.Condition,labels = c("No", "Yes"))
  Central.Area = factor(Central.Area, labels = c("No", "Yes"))
  Purpose = factor(Purpose, labels = c("Work/Study", "Picking Children", "Entertainment", "Others"))
  Frequency = factor(Frequency, labels = c("Once", "2 times", "3 times", "> 3 times"))
  Departure.Time = factor(Departure.Time, labels = c("Morning", "Afternoon", "Evening", "Others"))
  Sidewalk.Clearance = factor(Sidewalk.Clearance, labels = c("No", "Yes"))
  Lane.Separate = factor(Lane.Separate, labels = c("No", "Yes"))
  Temporary.Stop.Number = factor(Temporary.Stop.Number, labels = c("No", "Yes"))
  Mode.Choice.Reason = factor(Mode.Choice.Reason, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "Reliability", "others"))
  Weather = factor(Weather, labels = c("Sunny/cool", "Rainny"))
  Weekend = factor(Weekend, labels = c("No", "Yes"))
  Non.Bus.Reason = factor(Non.Bus.Reason, labels = c("No Route", "Uncomfortable", "Unsafety", "Long waiting time", "Unreliability", "others"))
  Bus.Stop.Present = factor(Bus.Stop.Present, labels = c("No", "Yes", "Don't know"))
  Gender = factor(Gender, labels = c("Female", "Male"))
  Age = factor(Age, labels = c("15-24", "25-60", ">60"))
  Occupation = factor(Occupation, labels = c("Pupils/Students", "Officers/Workers", "Housewife/Unemployed", "Free labor", "Others"))
  Number.of.Children = factor(Number.of.Children, labels = c("No", "Yes"))
  Motor.Certificate = factor(Motor.Certificate, labels = c("No", "Yes"))
  Car.Certificate = factor(Car.Certificate, labels = c("No", "Yes"))
  Bicycle.Owning = factor(Bicycle.Owning, labels = c("No", "Yes"))
  Motor.Owning = factor(Motor.Owning, labels = c("No", "Yes"))
  Car.Owning = factor(Car.Owning, labels = c("No", "Yes"))
  Number.of.Bicycles = factor(Number.of.Bicycles, labels = c("None", "1", "2", ">=3"))
  Number.of.Motors = factor(Number.of.Motors, labels = c("None", "1", "2", "3", ">3"))
  Number.of.Car = factor(Number.of.Car, labels = c("None", "1", ">=2"))
  Income = factor(Income, labels = c("<8 millions", "(8-15) millions", "(15-25) millions",  ">25 millions"))
  Bus = factor(Bus, labels = c("No", "Yes"))
  Distance = as.numeric(Distance)
  Travel.Period = as.numeric(Travel.Period)
  Cost = as.numeric(Cost)
    } )
head(datMB)
##   Travel.Mode Bus.Stop.Condition Central.Area    Purpose Frequency
## 2   Motorbike                Yes           No Work/Study > 3 times
## 3   Motorbike                Yes           No Work/Study > 3 times
## 4   Motorbike                Yes           No Work/Study > 3 times
## 5   Motorbike                Yes           No Work/Study > 3 times
## 8   Motorbike                Yes           No Work/Study > 3 times
## 9   Motorbike                Yes           No Work/Study > 3 times
##   Departure.Time Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 2        Morning        8            15                Yes           Yes
## 3        Evening        5            15                Yes           Yes
## 4        Morning        5            10                Yes           Yes
## 5        Evening        8            15                Yes           Yes
## 8        Morning       10            25                Yes           Yes
## 9      Afternoon       12            30                Yes           Yes
##   Temporary.Stop.Number Mode.Choice.Reason    Weather Weekend    Non.Bus.Reason
## 2                   Yes        Comfortable Sunny/cool      No          No Route
## 3                   Yes      Accessibility Sunny/cool      No     Uncomfortable
## 4                   Yes        Comfortable Sunny/cool      No Long waiting time
## 5                    No        Comfortable Sunny/cool      No     Uncomfortable
## 8                    No        Comfortable Sunny/cool     Yes     Uncomfortable
## 9                   Yes        Comfortable Sunny/cool      No          No Route
##   Cost Bus.Stop.Present Gender   Age       Occupation           Income
## 2    8       Don't know Female 15-24 Officers/Workers (15-25) millions
## 3    5              Yes   Male 15-24  Pupils/Students     >25 millions
## 4    5              Yes   Male 15-24  Pupils/Students (15-25) millions
## 5    8       Don't know Female 25-60 Officers/Workers (15-25) millions
## 8   10              Yes Female 25-60       Free labor  (8-15) millions
## 9   12              Yes Female 15-24       Free labor (15-25) millions
##   Number.of.Children Motor.Certificate Car.Certificate Bicycle.Owning
## 2                Yes               Yes              No            Yes
## 3                 No               Yes              No             No
## 4                 No               Yes              No            Yes
## 5                Yes               Yes              No             No
## 8                 No               Yes              No             No
## 9                Yes               Yes              No            Yes
##   Motor.Owning Car.Owning Number.of.Bicycles Number.of.Motors Number.of.Car Bus
## 2          Yes         No                  1                3          None  No
## 3          Yes         No               None                3          None  No
## 4          Yes         No                  1                3          None  No
## 5          Yes         No               None                2          None  No
## 8          Yes         No               None                2          None  No
## 9          Yes         No                  1                3          None  No
str(datMB)
## 'data.frame':    628 obs. of  31 variables:
##  $ Travel.Mode          : Factor w/ 2 levels "Motorbike","Bus": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Bus.Stop.Condition   : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 1 1 ...
##  $ Central.Area         : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Purpose              : Factor w/ 4 levels "Work/Study","Picking Children",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : Factor w/ 4 levels "Once","2 times",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : Factor w/ 4 levels "Morning","Afternoon",..: 1 3 1 3 1 2 1 1 1 1 ...
##  $ Distance             : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ Travel.Period        : num  15 15 10 15 25 30 25 5 5 10 ...
##  $ Sidewalk.Clearance   : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Lane.Separate        : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Temporary.Stop.Number: Factor w/ 2 levels "No","Yes": 2 2 2 1 1 2 2 1 2 1 ...
##  $ Mode.Choice.Reason   : Factor w/ 6 levels "Safety","Comfortable",..: 2 4 2 2 2 2 2 2 4 3 ...
##  $ Weather              : Factor w/ 2 levels "Sunny/cool","Rainny": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Weekend              : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
##  $ Non.Bus.Reason       : Factor w/ 6 levels "No Route","Uncomfortable",..: 1 2 4 2 2 1 4 4 4 2 ...
##  $ Cost                 : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ Bus.Stop.Present     : Factor w/ 3 levels "No","Yes","Don't know": 3 2 2 3 2 2 2 2 2 2 ...
##  $ Gender               : Factor w/ 2 levels "Female","Male": 1 2 2 1 1 1 2 2 2 2 ...
##  $ Age                  : Factor w/ 3 levels "15-24","25-60",..: 1 1 1 2 2 1 2 1 2 1 ...
##  $ Occupation           : Factor w/ 5 levels "Pupils/Students",..: 2 1 1 2 4 4 2 1 4 4 ...
##  $ Income               : Factor w/ 4 levels "<8 millions",..: 3 4 3 3 2 3 3 2 2 1 ...
##  $ Number.of.Children   : Factor w/ 2 levels "No","Yes": 2 1 1 2 1 2 1 1 2 1 ...
##  $ Motor.Certificate    : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 2 2 ...
##  $ Car.Certificate      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Bicycle.Owning       : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 2 1 2 1 1 ...
##  $ Motor.Owning         : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 2 2 ...
##  $ Car.Owning           : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Number.of.Bicycles   : Factor w/ 4 levels "None","1","2",..: 2 1 2 1 1 2 1 2 3 1 ...
##  $ Number.of.Motors     : Factor w/ 5 levels "None","1","2",..: 4 4 4 3 3 4 4 4 3 2 ...
##  $ Number.of.Car        : Factor w/ 3 levels "None","1",">=2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Bus                  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
# Remove tiep variables : Travel.Mode (1), Car.Certificate (24), Bicycle.Owning (25), Car.Owning (27), Number.of.Bicycles (28), Number.of.Car (30)
attach(datMB)
## The following objects are masked from datMB (pos = 3):
## 
##     Age, Bicycle.Owning, Bus, Bus.Stop.Condition, Bus.Stop.Present,
##     Car.Certificate, Car.Owning, Central.Area, Cost, Departure.Time,
##     Distance, Frequency, Gender, Income, Lane.Separate,
##     Mode.Choice.Reason, Motor.Certificate, Motor.Owning,
##     Non.Bus.Reason, Number.of.Bicycles, Number.of.Car,
##     Number.of.Children, Number.of.Motors, Occupation, Purpose,
##     Sidewalk.Clearance, Temporary.Stop.Number, Travel.Mode,
##     Travel.Period, Weather, Weekend
datMB <- datMB[,c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,26,29,31)]
head(datMB)
##   Bus.Stop.Condition Central.Area    Purpose Frequency Departure.Time Distance
## 2                Yes           No Work/Study > 3 times        Morning        8
## 3                Yes           No Work/Study > 3 times        Evening        5
## 4                Yes           No Work/Study > 3 times        Morning        5
## 5                Yes           No Work/Study > 3 times        Evening        8
## 8                Yes           No Work/Study > 3 times        Morning       10
## 9                Yes           No Work/Study > 3 times      Afternoon       12
##   Travel.Period Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## 2            15                Yes           Yes                   Yes
## 3            15                Yes           Yes                   Yes
## 4            10                Yes           Yes                   Yes
## 5            15                Yes           Yes                    No
## 8            25                Yes           Yes                    No
## 9            30                Yes           Yes                   Yes
##   Mode.Choice.Reason    Weather Weekend    Non.Bus.Reason Cost Bus.Stop.Present
## 2        Comfortable Sunny/cool      No          No Route    8       Don't know
## 3      Accessibility Sunny/cool      No     Uncomfortable    5              Yes
## 4        Comfortable Sunny/cool      No Long waiting time    5              Yes
## 5        Comfortable Sunny/cool      No     Uncomfortable    8       Don't know
## 8        Comfortable Sunny/cool     Yes     Uncomfortable   10              Yes
## 9        Comfortable Sunny/cool      No          No Route   12              Yes
##   Gender   Age       Occupation           Income Number.of.Children
## 2 Female 15-24 Officers/Workers (15-25) millions                Yes
## 3   Male 15-24  Pupils/Students     >25 millions                 No
## 4   Male 15-24  Pupils/Students (15-25) millions                 No
## 5 Female 25-60 Officers/Workers (15-25) millions                Yes
## 8 Female 25-60       Free labor  (8-15) millions                 No
## 9 Female 15-24       Free labor (15-25) millions                Yes
##   Motor.Certificate Motor.Owning Number.of.Motors Bus
## 2               Yes          Yes                3  No
## 3               Yes          Yes                3  No
## 4               Yes          Yes                3  No
## 5               Yes          Yes                2  No
## 8               Yes          Yes                2  No
## 9               Yes          Yes                3  No
dim(datMB)
## [1] 628  25

2. Descriptive analyse

# Descriptive analyse with package "table 1" - sort by Bus variable
  ## install.packages("table1", dependencies = T)
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1 (~ Bus.Stop.Condition + Central.Area + Purpose + Frequency + Departure.Time +  Sidewalk.Clearance + Sidewalk.Clearance + Lane.Separate + Temporary.Stop.Number + Mode.Choice.Reason + Weather + Weekend + Non.Bus.Reason + Bus.Stop.Present + Gender + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Motor.Owning + Number.of.Motors +Distance + Travel.Period + Cost| Bus , data = datMB)
No
(N=518)
Yes
(N=110)
Overall
(N=628)
Bus.Stop.Condition
No 253 (48.8%) 63 (57.3%) 316 (50.3%)
Yes 265 (51.2%) 47 (42.7%) 312 (49.7%)
Central.Area
No 284 (54.8%) 44 (40.0%) 328 (52.2%)
Yes 234 (45.2%) 66 (60.0%) 300 (47.8%)
Purpose
Work/Study 351 (67.8%) 80 (72.7%) 431 (68.6%)
Picking Children 69 (13.3%) 9 (8.2%) 78 (12.4%)
Entertainment 89 (17.2%) 14 (12.7%) 103 (16.4%)
Others 9 (1.7%) 7 (6.4%) 16 (2.5%)
Frequency
Once 23 (4.4%) 13 (11.8%) 36 (5.7%)
2 times 18 (3.5%) 19 (17.3%) 37 (5.9%)
3 times 33 (6.4%) 20 (18.2%) 53 (8.4%)
> 3 times 444 (85.7%) 58 (52.7%) 502 (79.9%)
Departure.Time
Morning 328 (63.3%) 71 (64.5%) 399 (63.5%)
Afternoon 34 (6.6%) 12 (10.9%) 46 (7.3%)
Evening 85 (16.4%) 20 (18.2%) 105 (16.7%)
Others 71 (13.7%) 7 (6.4%) 78 (12.4%)
Sidewalk.Clearance
No 79 (15.3%) 15 (13.6%) 94 (15.0%)
Yes 439 (84.7%) 95 (86.4%) 534 (85.0%)
Lane.Separate
No 111 (21.4%) 20 (18.2%) 131 (20.9%)
Yes 407 (78.6%) 90 (81.8%) 497 (79.1%)
Temporary.Stop.Number
No 247 (47.7%) 81 (73.6%) 328 (52.2%)
Yes 271 (52.3%) 29 (26.4%) 300 (47.8%)
Mode.Choice.Reason
Safety 27 (5.2%) 32 (29.1%) 59 (9.4%)
Comfortable 256 (49.4%) 41 (37.3%) 297 (47.3%)
Low price 24 (4.6%) 34 (30.9%) 58 (9.2%)
Accessibility 161 (31.1%) 1 (0.9%) 162 (25.8%)
Reliability 42 (8.1%) 0 (0%) 42 (6.7%)
others 8 (1.5%) 2 (1.8%) 10 (1.6%)
Weather
Sunny/cool 516 (99.6%) 101 (91.8%) 617 (98.2%)
Rainny 2 (0.4%) 9 (8.2%) 11 (1.8%)
Weekend
No 403 (77.8%) 87 (79.1%) 490 (78.0%)
Yes 115 (22.2%) 23 (20.9%) 138 (22.0%)
Non.Bus.Reason
No Route 122 (23.6%) 0 (0%) 122 (19.4%)
Uncomfortable 160 (30.9%) 0 (0%) 160 (25.5%)
Unsafety 7 (1.4%) 0 (0%) 7 (1.1%)
Long waiting time 159 (30.7%) 0 (0%) 159 (25.3%)
Unreliability 47 (9.1%) 0 (0%) 47 (7.5%)
others 23 (4.4%) 0 (0%) 23 (3.7%)
Missing 0 (0%) 110 (100%) 110 (17.5%)
Bus.Stop.Present
No 48 (9.3%) 19 (17.3%) 67 (10.7%)
Yes 415 (80.1%) 86 (78.2%) 501 (79.8%)
Don't know 55 (10.6%) 5 (4.5%) 60 (9.6%)
Gender
Female 223 (43.1%) 44 (40.0%) 267 (42.5%)
Male 295 (56.9%) 66 (60.0%) 361 (57.5%)
Age
15-24 215 (41.5%) 64 (58.2%) 279 (44.4%)
25-60 294 (56.8%) 26 (23.6%) 320 (51.0%)
>60 9 (1.7%) 20 (18.2%) 29 (4.6%)
Occupation
Pupils/Students 165 (31.9%) 54 (49.1%) 219 (34.9%)
Officers/Workers 153 (29.5%) 14 (12.7%) 167 (26.6%)
Housewife/Unemployed 31 (6.0%) 12 (10.9%) 43 (6.8%)
Free labor 139 (26.8%) 12 (10.9%) 151 (24.0%)
Others 30 (5.8%) 18 (16.4%) 48 (7.6%)
Income
<8 millions 114 (22.0%) 52 (47.3%) 166 (26.4%)
(8-15) millions 236 (45.6%) 29 (26.4%) 265 (42.2%)
(15-25) millions 129 (24.9%) 26 (23.6%) 155 (24.7%)
>25 millions 39 (7.5%) 3 (2.7%) 42 (6.7%)
Number.of.Children
No 224 (43.2%) 64 (58.2%) 288 (45.9%)
Yes 294 (56.8%) 46 (41.8%) 340 (54.1%)
Motor.Certificate
No 25 (4.8%) 39 (35.5%) 64 (10.2%)
Yes 493 (95.2%) 71 (64.5%) 564 (89.8%)
Motor.Owning
No 42 (8.1%) 51 (46.4%) 93 (14.8%)
Yes 476 (91.9%) 59 (53.6%) 535 (85.2%)
Number.of.Motors
None 5 (1.0%) 13 (11.8%) 18 (2.9%)
1 68 (13.1%) 27 (24.5%) 95 (15.1%)
2 262 (50.6%) 50 (45.5%) 312 (49.7%)
3 147 (28.4%) 17 (15.5%) 164 (26.1%)
>3 36 (6.9%) 3 (2.7%) 39 (6.2%)
Distance
Mean (SD) 6.31 (5.00) 8.55 (4.73) 6.70 (5.03)
Median [Min, Max] 5.00 [0.200, 30.0] 8.00 [0.500, 25.0] 5.00 [0.200, 30.0]
Travel.Period
Mean (SD) 15.8 (12.8) 21.5 (12.4) 16.8 (12.9)
Median [Min, Max] 15.0 [0, 180] 20.0 [3.00, 60.0] 15.0 [0, 180]
Cost
Mean (SD) 6.33 (4.98) 5.23 (1.05) 6.14 (4.57)
Median [Min, Max] 5.00 [0, 30.0] 5.00 [5.00, 10.0] 5.00 [0, 30.0]
# Descriptive analyse with package "compareGroups" - sort by Bus variable (hon table1 la cung cap them tri so p. Doi voi bien phan loai, tri so p cung duoc tinh theo Chi-Square)
  ## install.packages("compareGroups", dependencies = T)
library(compareGroups)
t = compareGroups(Bus ~ Bus.Stop.Condition + Central.Area + Purpose + Frequency + Departure.Time +  Sidewalk.Clearance + Lane.Separate + Temporary.Stop.Number + Mode.Choice.Reason + Weather + Weekend + Non.Bus.Reason + Bus.Stop.Present + Gender + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Motor.Owning + Number.of.Motors +Distance + Travel.Period + Cost, data = datMB) # 24 variables
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t)
## 
## --------Summary descriptives table by 'Bus'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=518       N=110              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## Bus.Stop.Condition:                                0.133   
##     No                   253 (48.8%) 63 (57.3%)            
##     Yes                  265 (51.2%) 47 (42.7%)            
## Central.Area:                                      0.006   
##     No                   284 (54.8%) 44 (40.0%)            
##     Yes                  234 (45.2%) 66 (60.0%)            
## Purpose:                                           0.019   
##     Work/Study           351 (67.8%) 80 (72.7%)            
##     Picking Children     69 (13.3%)   9 (8.18%)            
##     Entertainment        89 (17.2%)  14 (12.7%)            
##     Others                9 (1.74%)   7 (6.36%)            
## Frequency:                                        <0.001   
##     Once                 23 (4.44%)  13 (11.8%)            
##     2 times              18 (3.47%)  19 (17.3%)            
##     3 times              33 (6.37%)  20 (18.2%)            
##     > 3 times            444 (85.7%) 58 (52.7%)            
## Departure.Time:                                    0.091   
##     Morning              328 (63.3%) 71 (64.5%)            
##     Afternoon            34 (6.56%)  12 (10.9%)            
##     Evening              85 (16.4%)  20 (18.2%)            
##     Others               71 (13.7%)   7 (6.36%)            
## Sidewalk.Clearance:                                0.776   
##     No                   79 (15.3%)  15 (13.6%)            
##     Yes                  439 (84.7%) 95 (86.4%)            
## Lane.Separate:                                     0.527   
##     No                   111 (21.4%) 20 (18.2%)            
##     Yes                  407 (78.6%) 90 (81.8%)            
## Temporary.Stop.Number:                            <0.001   
##     No                   247 (47.7%) 81 (73.6%)            
##     Yes                  271 (52.3%) 29 (26.4%)            
## Mode.Choice.Reason:                                  .     
##     Safety               27 (5.21%)  32 (29.1%)            
##     Comfortable          256 (49.4%) 41 (37.3%)            
##     Low price            24 (4.63%)  34 (30.9%)            
##     Accessibility        161 (31.1%)  1 (0.91%)            
##     Reliability          42 (8.11%)   0 (0.00%)            
##     others                8 (1.54%)   2 (1.82%)            
## Weather:                                          <0.001   
##     Sunny/cool           516 (99.6%) 101 (91.8%)           
##     Rainny                2 (0.39%)   9 (8.18%)            
## Weekend:                                           0.865   
##     No                   403 (77.8%) 87 (79.1%)            
##     Yes                  115 (22.2%) 23 (20.9%)            
## Non.Bus.Reason:                                      .     
##     No Route             122 (23.6%)   0 (.%)              
##     Uncomfortable        160 (30.9%)   0 (.%)              
##     Unsafety              7 (1.35%)    0 (.%)              
##     Long waiting time    159 (30.7%)   0 (.%)              
##     Unreliability        47 (9.07%)    0 (.%)              
##     others               23 (4.44%)    0 (.%)              
## Bus.Stop.Present:                                  0.011   
##     No                   48 (9.27%)  19 (17.3%)            
##     Yes                  415 (80.1%) 86 (78.2%)            
##     Don't know           55 (10.6%)   5 (4.55%)            
## Gender:                                            0.630   
##     Female               223 (43.1%) 44 (40.0%)            
##     Male                 295 (56.9%) 66 (60.0%)            
## Age:                                              <0.001   
##     15-24                215 (41.5%) 64 (58.2%)            
##     25-60                294 (56.8%) 26 (23.6%)            
##     >60                   9 (1.74%)  20 (18.2%)            
## Occupation:                                       <0.001   
##     Pupils/Students      165 (31.9%) 54 (49.1%)            
##     Officers/Workers     153 (29.5%) 14 (12.7%)            
##     Housewife/Unemployed 31 (5.98%)  12 (10.9%)            
##     Free labor           139 (26.8%) 12 (10.9%)            
##     Others               30 (5.79%)  18 (16.4%)            
## Income:                                           <0.001   
##     <8 millions          114 (22.0%) 52 (47.3%)            
##     (8-15) millions      236 (45.6%) 29 (26.4%)            
##     (15-25) millions     129 (24.9%) 26 (23.6%)            
##     >25 millions         39 (7.53%)   3 (2.73%)            
## Number.of.Children:                                0.006   
##     No                   224 (43.2%) 64 (58.2%)            
##     Yes                  294 (56.8%) 46 (41.8%)            
## Motor.Certificate:                                <0.001   
##     No                   25 (4.83%)  39 (35.5%)            
##     Yes                  493 (95.2%) 71 (64.5%)            
## Motor.Owning:                                     <0.001   
##     No                   42 (8.11%)  51 (46.4%)            
##     Yes                  476 (91.9%) 59 (53.6%)            
## Number.of.Motors:                                 <0.001   
##     None                  5 (0.97%)  13 (11.8%)            
##     1                    68 (13.1%)  27 (24.5%)            
##     2                    262 (50.6%) 50 (45.5%)            
##     3                    147 (28.4%) 17 (15.5%)            
##     >3                   36 (6.95%)   3 (2.73%)            
## Distance                 6.31 (5.00) 8.55 (4.73)  <0.001   
## Travel.Period            15.8 (12.8) 21.5 (12.4)  <0.001   
## Cost                     6.33 (4.98) 5.23 (1.05)  <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
   ## Remove variables with p-value > 0.05: Bus.Stop.Condition (2), Departure.Time (6), Sidewalk.Clearance (9), Lane.Separate (10), Weekend (14), Gender (18)
t1 = compareGroups(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Mode.Choice.Reason + Weather + Non.Bus.Reason + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Motor.Owning + Number.of.Motors +Distance + Travel.Period + Cost, data = datMB) # 18 variables
## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect

## Warning in chisq.test(xx, correct = FALSE): Chi-squared approximation may be
## incorrect
createTable(t1)
## 
## --------Summary descriptives table by 'Bus'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=518       N=110              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## Central.Area:                                      0.006   
##     No                   284 (54.8%) 44 (40.0%)            
##     Yes                  234 (45.2%) 66 (60.0%)            
## Purpose:                                           0.019   
##     Work/Study           351 (67.8%) 80 (72.7%)            
##     Picking Children     69 (13.3%)   9 (8.18%)            
##     Entertainment        89 (17.2%)  14 (12.7%)            
##     Others                9 (1.74%)   7 (6.36%)            
## Frequency:                                        <0.001   
##     Once                 23 (4.44%)  13 (11.8%)            
##     2 times              18 (3.47%)  19 (17.3%)            
##     3 times              33 (6.37%)  20 (18.2%)            
##     > 3 times            444 (85.7%) 58 (52.7%)            
## Temporary.Stop.Number:                            <0.001   
##     No                   247 (47.7%) 81 (73.6%)            
##     Yes                  271 (52.3%) 29 (26.4%)            
## Mode.Choice.Reason:                                  .     
##     Safety               27 (5.21%)  32 (29.1%)            
##     Comfortable          256 (49.4%) 41 (37.3%)            
##     Low price            24 (4.63%)  34 (30.9%)            
##     Accessibility        161 (31.1%)  1 (0.91%)            
##     Reliability          42 (8.11%)   0 (0.00%)            
##     others                8 (1.54%)   2 (1.82%)            
## Weather:                                          <0.001   
##     Sunny/cool           516 (99.6%) 101 (91.8%)           
##     Rainny                2 (0.39%)   9 (8.18%)            
## Non.Bus.Reason:                                      .     
##     No Route             122 (23.6%)   0 (.%)              
##     Uncomfortable        160 (30.9%)   0 (.%)              
##     Unsafety              7 (1.35%)    0 (.%)              
##     Long waiting time    159 (30.7%)   0 (.%)              
##     Unreliability        47 (9.07%)    0 (.%)              
##     others               23 (4.44%)    0 (.%)              
## Bus.Stop.Present:                                  0.011   
##     No                   48 (9.27%)  19 (17.3%)            
##     Yes                  415 (80.1%) 86 (78.2%)            
##     Don't know           55 (10.6%)   5 (4.55%)            
## Age:                                              <0.001   
##     15-24                215 (41.5%) 64 (58.2%)            
##     25-60                294 (56.8%) 26 (23.6%)            
##     >60                   9 (1.74%)  20 (18.2%)            
## Occupation:                                       <0.001   
##     Pupils/Students      165 (31.9%) 54 (49.1%)            
##     Officers/Workers     153 (29.5%) 14 (12.7%)            
##     Housewife/Unemployed 31 (5.98%)  12 (10.9%)            
##     Free labor           139 (26.8%) 12 (10.9%)            
##     Others               30 (5.79%)  18 (16.4%)            
## Income:                                           <0.001   
##     <8 millions          114 (22.0%) 52 (47.3%)            
##     (8-15) millions      236 (45.6%) 29 (26.4%)            
##     (15-25) millions     129 (24.9%) 26 (23.6%)            
##     >25 millions         39 (7.53%)   3 (2.73%)            
## Number.of.Children:                                0.006   
##     No                   224 (43.2%) 64 (58.2%)            
##     Yes                  294 (56.8%) 46 (41.8%)            
## Motor.Certificate:                                <0.001   
##     No                   25 (4.83%)  39 (35.5%)            
##     Yes                  493 (95.2%) 71 (64.5%)            
## Motor.Owning:                                     <0.001   
##     No                   42 (8.11%)  51 (46.4%)            
##     Yes                  476 (91.9%) 59 (53.6%)            
## Number.of.Motors:                                 <0.001   
##     None                  5 (0.97%)  13 (11.8%)            
##     1                    68 (13.1%)  27 (24.5%)            
##     2                    262 (50.6%) 50 (45.5%)            
##     3                    147 (28.4%) 17 (15.5%)            
##     >3                   36 (6.95%)   3 (2.73%)            
## Distance                 6.31 (5.00) 8.55 (4.73)  <0.001   
## Travel.Period            15.8 (12.8) 21.5 (12.4)  <0.001   
## Cost                     6.33 (4.98) 5.23 (1.05)  <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

3. Descriptive analyse by graphs

library(magrittr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract()   masks magrittr::extract()
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
datMB %>%
  group_by(Bus, Bus.Stop.Condition) %>%
  count() %>% 
  ggplot(aes(Bus.Stop.Condition, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Bus Stop Condition") +
  ylab("Bus") +
  ggtitle("Proportion of Bus Choice ~ Bus Stop Condition")

datMB %>%
  group_by(Bus, Central.Area) %>%
  count() %>% 
  ggplot(aes(Central.Area, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Central Area") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Central Area")

datMB %>%
  group_by(Bus, Purpose) %>%
  count() %>% 
  ggplot(aes(Purpose, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Purpose of travelling") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Purpose of Travelling")

datMB %>%
  group_by(Bus, Frequency) %>%
  count() %>% 
  ggplot(aes(Frequency, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Frequency of Travelling") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Frequency of Travelling")

datMB %>%
  group_by(Bus, Departure.Time) %>%
  count() %>% 
  ggplot(aes(Departure.Time, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Departure time of travel") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Departure Time of Travel")

datMB %>%
  group_by(Bus, Sidewalk.Clearance) %>%
  count() %>% 
  ggplot(aes(Sidewalk.Clearance, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Sidewalk Clearance of Roads") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Sidewalk Clearance of Roads")

datMB %>%
  group_by(Bus, Lane.Separate) %>%
  count() %>% 
  ggplot(aes(Lane.Separate, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Lane Separate") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Lane Separate of Roads")

datMB %>%
  group_by(Bus, Temporary.Stop.Number) %>%
  count() %>% 
  ggplot(aes(Temporary.Stop.Number, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("The number of temporary stops") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ The number of temporary stops") 

datMB %>%
  group_by(Bus, Mode.Choice.Reason) %>%
  count() %>% 
  ggplot(aes(Mode.Choice.Reason, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("The reason of motor mode choice") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Motor Choice Reason")

datMB %>%
  group_by(Bus, Weather) %>%
  count() %>% 
  ggplot(aes(Weather, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Weather condition") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Weather condition")

datMB %>%
  group_by(Bus, Weekend) %>%
  count() %>% 
  ggplot(aes(Weekend, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Weekend") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Weekend")

datMB %>%
  group_by(Bus, Bus.Stop.Present) %>%
  count() %>% 
  ggplot(aes(Bus.Stop.Present, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("The presence of bus stop") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ The presence of bus stop")

datMB %>%
  group_by(Bus, Gender) %>%
  count() %>% 
  ggplot(aes(Gender, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Gender") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Gender") 

datMB %>%
  group_by(Bus, Age) %>%
  count() %>% 
  ggplot(aes(Age, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Age") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Age")

datMB %>%
  group_by(Bus, Occupation) %>%
  count() %>% 
  ggplot(aes(Occupation, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Occupation") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Occupation")

datMB %>%
  group_by(Bus, Income) %>%
  count() %>% 
  ggplot(aes(Income, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Income") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Income")

datMB %>%
  group_by(Bus, Number.of.Children) %>%
  count() %>% 
  ggplot(aes(Number.of.Children, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Number of Children") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Number of Children in family")

datMB %>%
  group_by(Bus, Motor.Certificate) %>%
  count() %>% 
  ggplot(aes(Motor.Certificate, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Motor Certificate") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Motor certificate")

datMB %>%
  group_by(Bus, Motor.Owning) %>%
  count() %>% 
  ggplot(aes(Motor.Owning, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Motor Owning") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Motor Owning")

datMB %>%
  group_by(Bus, Number.of.Motors) %>%
  count() %>% 
  ggplot(aes(Number.of.Motors, n, fill = Bus)) +
  geom_col(position = "fill") +
  xlab("Number of Motors") +
  ylab("Bus Choice") +
  ggtitle("Proportion of Bus Choice ~ Number of Motors")

## Travel Mode Choice ~ Countinuous Variables (Distance, Travel Period and Cost)
ggplot(datMB, aes(x=Distance, fill = Bus, color = Bus)) +
  geom_histogram (position = "dodge") +
  xlab("Distance") +
  ylab("Count") +
  ggtitle("Histogram of Distance")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(datMB, aes(x=Travel.Period, fill = Bus, color = Bus)) +
  geom_histogram (position = "dodge") +
  xlab("Travel Period") +
  ylab("Count") +
  ggtitle("Histogram of Travel Period")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(datMB, aes(x=Cost, fill = Bus, color = Bus)) +
  geom_histogram (position = "dodge") +
  xlab("Cost") +
  ylab("Count") +
  ggtitle("Histogram of Cost")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Correlation in continuous variables
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
CVBus = data.frame(datMB$Bus, datMB$Distance, datMB$Travel.Period, datMB$Cost)
pairs.panels(CVBus)

CVBus1 = data_frame(datMB$Bus.Stop.Condition, datMB$Central.Area, datMB$Purpose, datMB$Frequency, datMB$Departure.Time, datMB$Sidewalk.Clearance, datMB$Lane.Separate, datMB$Temporary.Stop.Number, datMB$Weather, datMB$Weekend)
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
CVBus2 = data_frame(datMB$Bus.Stop.Present, datMB$Gender, datMB$Age, datMB$Occupation, datMB$Income, datMB$Number.of.Children, datMB$Motor.Certificate, datMB$Motor.Owning, datMB$Number.of.Motors)
pairs.panels(CVBus1)

pairs.panels(CVBus2)

## Statistic Analysis by boxplot (continuous variales)
datMB %>%
  group_by(Distance, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Distance, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus Choice") +
  ylab("Distance") +
  ggtitle("Boxplot of Bus Choice ~ Distance")

datMB %>%
  group_by(Cost, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Cost, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus Choice") +
  ylab("Cost") +
  ggtitle("Boxplot of Bus Choice ~ Cost")

datMB %>%
  group_by(Travel.Period, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Travel.Period, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus Choice") +
  ylab("Travel Period") +
  ggtitle("Boxplot of Bus Choice ~ Travel Period")

### Boxplot of Distance, Travel Period and Cost ~ Gender
datMB %>%
  group_by(Distance, Gender) %>%
  count() %>% 
  ggplot(aes(x = Gender, y = Distance, fill = Gender)) +
  geom_boxplot() +
  xlab("Gender") +
  ylab("Distance") +
  ggtitle("Boxplot of Distance ~ Gender")

datMB %>%
  group_by(Cost, Gender) %>%
  count() %>% 
  ggplot(aes(x = Gender, y = Cost, fill = Gender)) +
  geom_boxplot() +
  xlab("Gender") +
  ylab("Cost") +
  ggtitle("Boxplot of Cost ~ Gender")

datMB %>%
  group_by(Travel.Period, Gender) %>%
  count() %>% 
  ggplot(aes(x = Gender, y = Travel.Period, fill = Gender)) +
  geom_boxplot() +
  xlab("Gender") +
  ylab("Travel Period") +
  ggtitle("Boxplot of Travel Period ~ Gender")

### Boxplot of Distance, Travel Period and Cost ~ Occupation
datMB %>%
  group_by(Distance, Occupation) %>%
  count() %>% 
  ggplot(aes(x = Occupation, y = Distance, fill = Occupation)) +
  geom_boxplot() +
  xlab("Occupation") +
  ylab("Distance") +
  ggtitle("Boxplot of Distance ~ Occupation")

datMB %>%
  group_by(Cost, Occupation) %>%
  count() %>% 
  ggplot(aes(x = Occupation, y = Cost, fill = Occupation)) +
  geom_boxplot() +
  xlab("Occupation") +
  ylab("Cost") +
  ggtitle("Boxplot of Cost ~ Occupation")

datMB %>%
  group_by(Travel.Period, Occupation) %>%
  count() %>% 
  ggplot(aes(x = Occupation, y = Travel.Period, fill = Occupation)) +
  geom_boxplot() +
  xlab("Occupation") +
  ylab("Travel Period") +
  ggtitle("Boxplot of Travel Period ~ Occupation")

summary(datMB)
##  Bus.Stop.Condition Central.Area             Purpose        Frequency  
##  No :316            No :328      Work/Study      :431   Once     : 36  
##  Yes:312            Yes:300      Picking Children: 78   2 times  : 37  
##                                  Entertainment   :103   3 times  : 53  
##                                  Others          : 16   > 3 times:502  
##                                                                        
##                                                                        
##                                                                        
##    Departure.Time    Distance      Travel.Period   Sidewalk.Clearance
##  Morning  :399    Min.   : 0.200   Min.   :  0.0   No : 94           
##  Afternoon: 46    1st Qu.: 3.000   1st Qu.: 10.0   Yes:534           
##  Evening  :105    Median : 5.000   Median : 15.0                     
##  Others   : 78    Mean   : 6.703   Mean   : 16.8                     
##                   3rd Qu.:10.000   3rd Qu.: 20.0                     
##                   Max.   :30.000   Max.   :180.0                     
##                                                                      
##  Lane.Separate Temporary.Stop.Number     Mode.Choice.Reason       Weather   
##  No :131       No :328               Safety       : 59      Sunny/cool:617  
##  Yes:497       Yes:300               Comfortable  :297      Rainny    : 11  
##                                      Low price    : 58                      
##                                      Accessibility:162                      
##                                      Reliability  : 42                      
##                                      others       : 10                      
##                                                                             
##  Weekend             Non.Bus.Reason      Cost          Bus.Stop.Present
##  No :490   No Route         :122    Min.   : 0.000   No        : 67    
##  Yes:138   Uncomfortable    :160    1st Qu.: 3.000   Yes       :501    
##            Unsafety         :  7    Median : 5.000   Don't know: 60    
##            Long waiting time:159    Mean   : 6.139                     
##            Unreliability    : 47    3rd Qu.: 8.000                     
##            others           : 23    Max.   :30.000                     
##            NA's             :110                                       
##     Gender       Age                     Occupation               Income   
##  Female:267   15-24:279   Pupils/Students     :219   <8 millions     :166  
##  Male  :361   25-60:320   Officers/Workers    :167   (8-15) millions :265  
##               >60  : 29   Housewife/Unemployed: 43   (15-25) millions:155  
##                           Free labor          :151   >25 millions    : 42  
##                           Others              : 48                         
##                                                                            
##                                                                            
##  Number.of.Children Motor.Certificate Motor.Owning Number.of.Motors  Bus     
##  No :288            No : 64           No : 93      None: 18         No :518  
##  Yes:340            Yes:564           Yes:535      1   : 95         Yes:110  
##                                                    2   :312                  
##                                                    3   :164                  
##                                                    >3  : 39                  
##                                                                              
## 

3. Binary Model

# Model with multi-variables (remove variables with p-value>0,05 in t1) --> 19 variables. Bus.Stop.Condition (2), Departure.Time (6), Sidewalk.Clearance (9), Lane.Separate (10), Weekend (14), Gender (18) - Besides remove variables: Mode.Choice.Reason (12), Non.Bus.Reason (15) -> 16 variables.
mg = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Motor.Owning + Number.of.Motors +Distance + Travel.Period + Cost, family = binomial, data = datMB)
mg
## 
## Call:  glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Weather + Bus.Stop.Present + Age + Occupation + Income + 
##     Number.of.Children + Motor.Certificate + Motor.Owning + Number.of.Motors + 
##     Distance + Travel.Period + Cost, family = binomial, data = datMB)
## 
## Coefficients:
##                    (Intercept)                 Central.AreaYes  
##                       3.070156                        0.565545  
##        PurposePicking Children            PurposeEntertainment  
##                      -1.232672                       -2.066949  
##                  PurposeOthers                Frequency2 times  
##                       0.172665                        2.121711  
##               Frequency3 times              Frequency> 3 times  
##                       0.633608                       -0.951016  
##       Temporary.Stop.NumberYes                   WeatherRainny  
##                      -1.239462                        2.532484  
##            Bus.Stop.PresentYes      Bus.Stop.PresentDon't know  
##                      -0.746232                       -1.769581  
##                       Age25-60                          Age>60  
##                      -0.230899                        2.825977  
##     OccupationOfficers/Workers  OccupationHousewife/Unemployed  
##                      -0.003997                        0.908776  
##           OccupationFree labor                OccupationOthers  
##                      -1.125910                        1.320413  
##          Income(8-15) millions          Income(15-25) millions  
##                      -1.015551                       -0.641152  
##             Income>25 millions           Number.of.ChildrenYes  
##                      -2.800307                       -0.805410  
##           Motor.CertificateYes                 Motor.OwningYes  
##                      -1.411194                       -2.095355  
##              Number.of.Motors1               Number.of.Motors2  
##                       0.876734                        0.424273  
##              Number.of.Motors3              Number.of.Motors>3  
##                      -0.672451                       -0.381830  
##                       Distance                   Travel.Period  
##                       0.876821                        0.010150  
##                           Cost  
##                      -0.917914  
## 
## Degrees of Freedom: 627 Total (i.e. Null);  597 Residual
## Null Deviance:       582.8 
## Residual Deviance: 206.3     AIC: 268.3
summary(mg) 
## 
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Weather + Bus.Stop.Present + Age + Occupation + Income + 
##     Number.of.Children + Motor.Certificate + Motor.Owning + Number.of.Motors + 
##     Distance + Travel.Period + Cost, family = binomial, data = datMB)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2620  -0.2679  -0.1383  -0.0540   3.4652  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     3.070156   1.613146   1.903  0.05701 .  
## Central.AreaYes                 0.565545   0.419960   1.347  0.17809    
## PurposePicking Children        -1.232672   0.817235  -1.508  0.13147    
## PurposeEntertainment           -2.066949   0.669445  -3.088  0.00202 ** 
## PurposeOthers                   0.172665   1.074156   0.161  0.87229    
## Frequency2 times                2.121711   0.984158   2.156  0.03109 *  
## Frequency3 times                0.633608   0.972582   0.651  0.51474    
## Frequency> 3 times             -0.951016   0.791240  -1.202  0.22939    
## Temporary.Stop.NumberYes       -1.239462   0.433266  -2.861  0.00423 ** 
## WeatherRainny                   2.532484   1.322140   1.915  0.05544 .  
## Bus.Stop.PresentYes            -0.746232   0.606945  -1.229  0.21889    
## Bus.Stop.PresentDon't know     -1.769581   0.961864  -1.840  0.06581 .  
## Age25-60                       -0.230899   0.737307  -0.313  0.75415    
## Age>60                          2.825977   1.113357   2.538  0.01114 *  
## OccupationOfficers/Workers     -0.003997   0.776038  -0.005  0.99589    
## OccupationHousewife/Unemployed  0.908776   1.083856   0.838  0.40177    
## OccupationFree labor           -1.125910   0.788097  -1.429  0.15311    
## OccupationOthers                1.320413   0.937865   1.408  0.15916    
## Income(8-15) millions          -1.015551   0.490945  -2.069  0.03859 *  
## Income(15-25) millions         -0.641152   0.547466  -1.171  0.24155    
## Income>25 millions             -2.800307   1.458481  -1.920  0.05486 .  
## Number.of.ChildrenYes          -0.805410   0.476603  -1.690  0.09105 .  
## Motor.CertificateYes           -1.411194   0.605329  -2.331  0.01974 *  
## Motor.OwningYes                -2.095355   0.541903  -3.867  0.00011 ***
## Number.of.Motors1               0.876734   1.263569   0.694  0.48777    
## Number.of.Motors2               0.424273   1.240399   0.342  0.73232    
## Number.of.Motors3              -0.672451   1.319583  -0.510  0.61034    
## Number.of.Motors>3             -0.381830   1.638538  -0.233  0.81574    
## Distance                        0.876821   0.128842   6.805 1.01e-11 ***
## Travel.Period                   0.010150   0.014937   0.680  0.49680    
## Cost                           -0.917914   0.132180  -6.944 3.80e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 206.29  on 597  degrees of freedom
## AIC: 268.29
## 
## Number of Fisher Scoring iterations: 7
# Model with multi-variables (remove variables with p-value>0,05 in mg): Central.Area (3), Bus.Stop.Present (17), Occupation (20), Number.of.Motors (29), Travel.Period (8), Weather (13) --> 10 variables
mg1 = glm(Bus ~ Purpose + Frequency + Temporary.Stop.Number + Age + Income + Number.of.Children + Motor.Certificate + Motor.Owning + Distance + Cost, family = binomial, data = datMB)
mg1
## 
## Call:  glm(formula = Bus ~ Purpose + Frequency + Temporary.Stop.Number + 
##     Age + Income + Number.of.Children + Motor.Certificate + Motor.Owning + 
##     Distance + Cost, family = binomial, data = datMB)
## 
## Coefficients:
##              (Intercept)   PurposePicking Children      PurposeEntertainment  
##                   3.0872                   -1.0713                   -1.8459  
##            PurposeOthers          Frequency2 times          Frequency3 times  
##                   0.5242                    1.1266                    0.4545  
##       Frequency> 3 times  Temporary.Stop.NumberYes                  Age25-60  
##                  -1.4192                   -1.0269                   -0.2575  
##                   Age>60     Income(8-15) millions    Income(15-25) millions  
##                   3.3903                   -1.1220                   -0.9255  
##       Income>25 millions     Number.of.ChildrenYes      Motor.CertificateYes  
##                  -3.0608                   -0.3426                   -1.2884  
##          Motor.OwningYes                  Distance                      Cost  
##                  -2.1378                    0.9014                   -0.9209  
## 
## Degrees of Freedom: 627 Total (i.e. Null);  610 Residual
## Null Deviance:       582.8 
## Residual Deviance: 233.9     AIC: 269.9
summary(mg1)
## 
## Call:
## glm(formula = Bus ~ Purpose + Frequency + Temporary.Stop.Number + 
##     Age + Income + Number.of.Children + Motor.Certificate + Motor.Owning + 
##     Distance + Cost, family = binomial, data = datMB)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4037  -0.3092  -0.1858  -0.0852   3.3956  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                3.0872     0.8730   3.536 0.000406 ***
## PurposePicking Children   -1.0713     0.7518  -1.425 0.154160    
## PurposeEntertainment      -1.8459     0.6046  -3.053 0.002265 ** 
## PurposeOthers              0.5242     0.8808   0.595 0.551783    
## Frequency2 times           1.1266     0.8800   1.280 0.200455    
## Frequency3 times           0.4545     0.8299   0.548 0.583885    
## Frequency> 3 times        -1.4192     0.7168  -1.980 0.047698 *  
## Temporary.Stop.NumberYes  -1.0269     0.3879  -2.647 0.008113 ** 
## Age25-60                  -0.2575     0.4559  -0.565 0.572229    
## Age>60                     3.3903     0.8730   3.883 0.000103 ***
## Income(8-15) millions     -1.1220     0.4251  -2.640 0.008300 ** 
## Income(15-25) millions    -0.9255     0.4920  -1.881 0.059960 .  
## Income>25 millions        -3.0608     1.3231  -2.313 0.020706 *  
## Number.of.ChildrenYes     -0.3426     0.3992  -0.858 0.390684    
## Motor.CertificateYes      -1.2884     0.5358  -2.404 0.016196 *  
## Motor.OwningYes           -2.1378     0.4664  -4.584 4.57e-06 ***
## Distance                   0.9014     0.1146   7.867 3.65e-15 ***
## Cost                      -0.9209     0.1199  -7.680 1.59e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 233.87  on 610  degrees of freedom
## AIC: 269.87
## 
## Number of Fisher Scoring iterations: 7
## Way 2: Use function lmr in package "rms" - library(rms) - ml = lrm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Mode.Choice.Reason + Weather + Non.Bus.Reason + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Motor.Owning + Number.of.Motors +Distance + Travel.Period + Cost) - ml
# Find Parsimonious model - Tìm mô hình tối ưu
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.5-5)
head(datMB)
##   Bus.Stop.Condition Central.Area    Purpose Frequency Departure.Time Distance
## 2                Yes           No Work/Study > 3 times        Morning        8
## 3                Yes           No Work/Study > 3 times        Evening        5
## 4                Yes           No Work/Study > 3 times        Morning        5
## 5                Yes           No Work/Study > 3 times        Evening        8
## 8                Yes           No Work/Study > 3 times        Morning       10
## 9                Yes           No Work/Study > 3 times      Afternoon       12
##   Travel.Period Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## 2            15                Yes           Yes                   Yes
## 3            15                Yes           Yes                   Yes
## 4            10                Yes           Yes                   Yes
## 5            15                Yes           Yes                    No
## 8            25                Yes           Yes                    No
## 9            30                Yes           Yes                   Yes
##   Mode.Choice.Reason    Weather Weekend    Non.Bus.Reason Cost Bus.Stop.Present
## 2        Comfortable Sunny/cool      No          No Route    8       Don't know
## 3      Accessibility Sunny/cool      No     Uncomfortable    5              Yes
## 4        Comfortable Sunny/cool      No Long waiting time    5              Yes
## 5        Comfortable Sunny/cool      No     Uncomfortable    8       Don't know
## 8        Comfortable Sunny/cool     Yes     Uncomfortable   10              Yes
## 9        Comfortable Sunny/cool      No          No Route   12              Yes
##   Gender   Age       Occupation           Income Number.of.Children
## 2 Female 15-24 Officers/Workers (15-25) millions                Yes
## 3   Male 15-24  Pupils/Students     >25 millions                 No
## 4   Male 15-24  Pupils/Students (15-25) millions                 No
## 5 Female 25-60 Officers/Workers (15-25) millions                Yes
## 8 Female 25-60       Free labor  (8-15) millions                 No
## 9 Female 15-24       Free labor (15-25) millions                Yes
##   Motor.Certificate Motor.Owning Number.of.Motors Bus
## 2               Yes          Yes                3  No
## 3               Yes          Yes                3  No
## 4               Yes          Yes                3  No
## 5               Yes          Yes                2  No
## 8               Yes          Yes                2  No
## 9               Yes          Yes                3  No
dim(datMB)
## [1] 628  25
str(datMB)
## 'data.frame':    628 obs. of  25 variables:
##  $ Bus.Stop.Condition   : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 1 1 ...
##  $ Central.Area         : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Purpose              : Factor w/ 4 levels "Work/Study","Picking Children",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : Factor w/ 4 levels "Once","2 times",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : Factor w/ 4 levels "Morning","Afternoon",..: 1 3 1 3 1 2 1 1 1 1 ...
##  $ Distance             : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ Travel.Period        : num  15 15 10 15 25 30 25 5 5 10 ...
##  $ Sidewalk.Clearance   : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Lane.Separate        : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Temporary.Stop.Number: Factor w/ 2 levels "No","Yes": 2 2 2 1 1 2 2 1 2 1 ...
##  $ Mode.Choice.Reason   : Factor w/ 6 levels "Safety","Comfortable",..: 2 4 2 2 2 2 2 2 4 3 ...
##  $ Weather              : Factor w/ 2 levels "Sunny/cool","Rainny": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Weekend              : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
##  $ Non.Bus.Reason       : Factor w/ 6 levels "No Route","Uncomfortable",..: 1 2 4 2 2 1 4 4 4 2 ...
##  $ Cost                 : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ Bus.Stop.Present     : Factor w/ 3 levels "No","Yes","Don't know": 3 2 2 3 2 2 2 2 2 2 ...
##  $ Gender               : Factor w/ 2 levels "Female","Male": 1 2 2 1 1 1 2 2 2 2 ...
##  $ Age                  : Factor w/ 3 levels "15-24","25-60",..: 1 1 1 2 2 1 2 1 2 1 ...
##  $ Occupation           : Factor w/ 5 levels "Pupils/Students",..: 2 1 1 2 4 4 2 1 4 4 ...
##  $ Income               : Factor w/ 4 levels "<8 millions",..: 3 4 3 3 2 3 3 2 2 1 ...
##  $ Number.of.Children   : Factor w/ 2 levels "No","Yes": 2 1 1 2 1 2 1 1 2 1 ...
##  $ Motor.Certificate    : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 2 2 ...
##  $ Motor.Owning         : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 2 2 ...
##  $ Number.of.Motors     : Factor w/ 5 levels "None","1","2",..: 4 4 4 3 3 4 4 4 3 2 ...
##  $ Bus                  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## all variables, remove 2 variables: Mode.Choice.Reason and Non.Bus.Reason --> 24 variables
xvars <- datMB[,c(1,2,3,4,5,8,9,13,10,12,16,17,18,19,20,21,22,23,24,6,7,15)]
y <- Bus
bma.search <- bic.glm(xvars, y, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search)
## 
## Call:
## bic.glm.data.frame(x = xvars, y = y, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   10  models were selected
##  Best  5  models (cumulative posterior probability =  0.8602 ): 
## 
##                                  p!=0    EV       SD      model 1   
## Intercept                        100     1.38731  0.7215      1.6243
## Bus.Stop.Condition                83.2                              
##                   .Yes                  -0.94588  0.5411     -1.1883
## Central.Area                       0.0                              
##             .Yes                         0.00000  0.0000       .    
## Purpose                            0.0                              
##        .Picking Children                 0.00000  0.0000       .    
##        .Entertainment                    0.00000  0.0000       .    
##        .Others                           0.00000  0.0000       .    
## Frequency                         64.6                              
##          .2 times                        0.95031  0.9546      1.4941
##          .3 times                        0.14306  0.6259      0.1221
##          .> 3 times                     -0.57715  0.6554     -0.9287
## Departure.Time                     0.0                              
##               .Afternoon                 0.00000  0.0000       .    
##               .Evening                   0.00000  0.0000       .    
##               .Others                    0.00000  0.0000       .    
## Sidewalk.Clearance                 0.0                              
##                   .Yes                   0.00000  0.0000       .    
## Lane.Separate                      0.0                              
##              .Yes                        0.00000  0.0000       .    
## Weekend                            0.0                              
##        .Yes                              0.00000  0.0000       .    
## Temporary.Stop.Number            100.0                              
##                      .Yes               -1.34423  0.3662     -1.4018
## Weather                           39.9                              
##        .Rainny                           1.21275  1.6779      3.0692
## Bus.Stop.Present                   0.0                              
##                 .Yes                     0.00000  0.0000       .    
##                 .Don't know              0.00000  0.0000       .    
## Gender                             0.0                              
##       .Male                              0.00000  0.0000       .    
## Age                              100.0                              
##    .25-60                               -0.81142  0.3863     -0.8205
##    .>60                                  2.95947  0.7167      3.0598
## Occupation                         0.0                              
##           .Officers/Workers              0.00000  0.0000       .    
##           .Housewife/Unemployed          0.00000  0.0000       .    
##           .Free labor                    0.00000  0.0000       .    
##           .Others                        0.00000  0.0000       .    
## Income                            11.0                              
##       .(8-15) millions                  -0.13621  0.4088       .    
##       .(15-25) millions                 -0.10695  0.3396       .    
##       .>25 millions                     -0.36119  1.1031       .    
## Number.of.Children                 0.0                              
##                   .Yes                   0.00000  0.0000       .    
## Motor.Certificate                  5.3                              
##                  .Yes                   -0.06683  0.3005       .    
## Motor.Owning                     100.0                              
##             .Yes                        -2.47338  0.4071     -2.4756
## Number.of.Motors                   0.0                              
##                 .1                       0.00000  0.0000       .    
##                 .2                       0.00000  0.0000       .    
##                 .3                       0.00000  0.0000       .    
##                 .>3                      0.00000  0.0000       .    
## Distance                         100.0   0.87811  0.1166      0.8547
## Travel.Period                      0.0   0.00000  0.0000       .    
## Cost                             100.0  -0.91926  0.1217     -0.9098
##                                                                     
## nVar                                                            8   
## BIC                                                       -3719.1690
## post prob                                                     0.343 
##                                  model 2     model 3     model 4     model 5   
## Intercept                            1.4797      1.0001      1.8399      1.1249
## Bus.Stop.Condition                                                             
##                   .Yes              -1.1111     -1.0663     -1.1234       .    
## Central.Area                                                                   
##             .Yes                      .           .           .           .    
## Purpose                                                                        
##        .Picking Children              .           .           .           .    
##        .Entertainment                 .           .           .           .    
##        .Others                        .           .           .           .    
## Frequency                                                                      
##          .2 times                    1.4770       .           .           .    
##          .3 times                    0.3490       .           .           .    
##          .> 3 times                 -0.8570       .           .           .    
## Departure.Time                                                                 
##               .Afternoon              .           .           .           .    
##               .Evening                .           .           .           .    
##               .Others                 .           .           .           .    
## Sidewalk.Clearance                                                             
##                   .Yes                .           .           .           .    
## Lane.Separate                                                                  
##              .Yes                     .           .           .           .    
## Weekend                                                                        
##        .Yes                           .           .           .           .    
## Temporary.Stop.Number                                                          
##                      .Yes           -1.2821     -1.3164     -1.3633     -1.2977
## Weather                                                                        
##        .Rainny                        .           .           .           .    
## Bus.Stop.Present                                                               
##                 .Yes                  .           .           .           .    
##                 .Don't know           .           .           .           .    
## Gender                                                                         
##       .Male                           .           .           .           .    
## Age                                                                            
##    .25-60                           -0.8394     -0.8675     -0.7296     -0.7221
##    .>60                              2.9715      2.9792      3.1317      2.6518
## Occupation                                                                     
##           .Officers/Workers           .           .           .           .    
##           .Housewife/Unemployed       .           .           .           .    
##           .Free labor                 .           .           .           .    
##           .Others                     .           .           .           .    
## Income                                                                         
##       .(8-15) millions                .           .         -1.2668       .    
##       .(15-25) millions               .           .         -0.9489       .    
##       .>25 millions                   .           .         -3.2982       .    
## Number.of.Children                                                             
##                   .Yes                .           .           .           .    
## Motor.Certificate                                                              
##                  .Yes                 .           .           .         -1.2614
## Motor.Owning                                                                   
##             .Yes                    -2.4717     -2.5362     -2.6858     -2.0477
## Number.of.Motors                                                               
##                 .1                    .           .           .           .    
##                 .2                    .           .           .           .    
##                 .3                    .           .           .           .    
##                 .>3                   .           .           .           .    
## Distance                             0.8785      0.9027      0.9739      0.8622
## Travel.Period                         .           .           .           .    
## Cost                                -0.9204     -0.9365     -1.0008     -0.8805
##                                                                                
## nVar                                   7           6           7           6   
## BIC                              -3718.2087  -3717.7152  -3716.3825  -3715.4308
## post prob                            0.212       0.166       0.085       0.053
imageplot.bma(bma.search)

## Remove insignificant variables: remove variables unsignificant in t1 --> 16 variables
xvars1 <- datMB[,c(2,3,4,10,12,16,18,19,20,21,22,23,24,6,7,15)]
y1 <- Bus
bma.search1 <- bic.glm(xvars1, y1, strict = F, OR = 20, glm.family = "binomial")
summary(bma.search1)
## 
## Call:
## bic.glm.data.frame(x = xvars1, y = y1, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   17  models were selected
##  Best  5  models (cumulative posterior probability =  0.5917 ): 
## 
##                                  p!=0    EV       SD      model 1   
## Intercept                        100     0.97293  0.7048   1.125e+00
## Central.Area                      11.5                              
##             .Yes                         0.08340  0.2609       .    
## Purpose                            0.0                              
##        .Picking Children                 0.00000  0.0000       .    
##        .Entertainment                    0.00000  0.0000       .    
##        .Others                           0.00000  0.0000       .    
## Frequency                         39.0                              
##          .2 times                        0.52127  0.8175       .    
##          .3 times                        0.08643  0.4942       .    
##          .> 3 times                     -0.33598  0.5775       .    
## Temporary.Stop.Number            100.0                              
##                      .Yes               -1.34510  0.3649  -1.298e+00
## Weather                           45.1                              
##        .Rainny                           1.34016  1.6925       .    
## Bus.Stop.Present                   0.0                              
##                 .Yes                     0.00000  0.0000       .    
##                 .Don't know              0.00000  0.0000       .    
## Age                              100.0                              
##    .25-60                               -0.73418  0.3851  -7.221e-01
##    .>60                                  2.62032  0.6432   2.652e+00
## Occupation                         0.0                              
##           .Officers/Workers              0.00000  0.0000       .    
##           .Housewife/Unemployed          0.00000  0.0000       .    
##           .Free labor                    0.00000  0.0000       .    
##           .Others                        0.00000  0.0000       .    
## Income                            12.8                              
##       .(8-15) millions                  -0.15571  0.4305       .    
##       .(15-25) millions                 -0.09866  0.3023       .    
##       .>25 millions                     -0.41298  1.1673       .    
## Number.of.Children                 0.0                              
##                   .Yes                   0.00000  0.0000       .    
## Motor.Certificate                 47.4                              
##                  .Yes                   -0.57227  0.6824  -1.261e+00
## Motor.Owning                     100.0                              
##             .Yes                        -2.24197  0.4188  -2.048e+00
## Number.of.Motors                   0.0                              
##                 .1                       0.00000  0.0000       .    
##                 .2                       0.00000  0.0000       .    
##                 .3                       0.00000  0.0000       .    
##                 .>3                      0.00000  0.0000       .    
## Distance                         100.0   0.84141  0.1117   8.622e-01
## Travel.Period                      0.0   0.00000  0.0000       .    
## Cost                             100.0  -0.86873  0.1162  -8.805e-01
##                                                                     
## nVar                                                         6      
## BIC                                                       -3.715e+03
## post prob                                                  0.163    
##                                  model 2     model 3     model 4     model 5   
## Intercept                         1.193e+00   8.674e-01   9.620e-01   3.862e-01
## Central.Area                                                                   
##             .Yes                      .           .           .           .    
## Purpose                                                                        
##        .Picking Children              .           .           .           .    
##        .Entertainment                 .           .           .           .    
##        .Others                        .           .           .           .    
## Frequency                                                                      
##          .2 times                     .       1.334e+00   1.335e+00       .    
##          .3 times                     .       2.773e-01   7.337e-02       .    
##          .> 3 times                   .      -8.726e-01  -9.415e-01       .    
## Temporary.Stop.Number                                                          
##                      .Yes        -1.387e+00  -1.282e+00  -1.370e+00  -1.307e+00
## Weather                                                                        
##        .Rainny                    3.179e+00       .       2.774e+00       .    
## Bus.Stop.Present                                                               
##                 .Yes                  .           .           .           .    
##                 .Don't know           .           .           .           .    
## Age                                                                            
##    .25-60                        -7.168e-01  -7.735e-01  -7.569e-01  -8.230e-01
##    .>60                           2.692e+00   2.546e+00   2.595e+00   2.563e+00
## Occupation                                                                     
##           .Officers/Workers           .           .           .           .    
##           .Housewife/Unemployed       .           .           .           .    
##           .Free labor                 .           .           .           .    
##           .Others                     .           .           .           .    
## Income                                                                         
##       .(8-15) millions                .           .           .           .    
##       .(15-25) millions               .           .           .           .    
##       .>25 millions                   .           .           .           .    
## Number.of.Children                                                             
##                   .Yes                .           .           .           .    
## Motor.Certificate                                                              
##                  .Yes            -1.272e+00       .           .           .    
## Motor.Owning                                                                   
##             .Yes                 -2.048e+00  -2.361e+00  -2.345e+00  -2.411e+00
## Number.of.Motors                                                               
##                 .1                    .           .           .           .    
##                 .2                    .           .           .           .    
##                 .3                    .           .           .           .    
##                 .>3                   .           .           .           .    
## Distance                          8.347e-01   8.314e-01   8.063e-01   8.568e-01
## Travel.Period                         .           .           .           .    
## Cost                             -8.679e-01  -8.603e-01  -8.480e-01  -8.783e-01
##                                                                                
## nVar                                7           6           7           5      
## BIC                              -3.715e+03  -3.715e+03  -3.714e+03  -3.714e+03
## post prob                         0.149       0.103       0.097       0.080
imageplot.bma(bma.search1)

## Choose Model 1 from bma.search : 7 variables (remove Frequency variables because of p-value>0.05)
mg2 = glm(Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + Weather + Age + Motor.Owning + Distance + Cost, family = binomial, data = datMB)
mg2
## 
## Call:  glm(formula = Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + 
##     Weather + Age + Motor.Owning + Distance + Cost, family = binomial, 
##     data = datMB)
## 
## Coefficients:
##              (Intercept)     Bus.Stop.ConditionYes  Temporary.Stop.NumberYes  
##                   1.0827                   -1.1454                   -1.4357  
##            WeatherRainny                  Age25-60                    Age>60  
##                   3.3460                   -0.8581                    3.0726  
##          Motor.OwningYes                  Distance                      Cost  
##                  -2.5521                    0.8812                   -0.9272  
## 
## Degrees of Freedom: 627 Total (i.e. Null);  619 Residual
## Null Deviance:       582.8 
## Residual Deviance: 269.2     AIC: 287.2
summary(mg2)
## 
## Call:
## glm(formula = Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + 
##     Weather + Age + Motor.Owning + Distance + Cost, family = binomial, 
##     data = datMB)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5543  -0.3473  -0.2047  -0.1105   3.5424  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.0827     0.4366   2.480  0.01313 *  
## Bus.Stop.ConditionYes     -1.1454     0.3559  -3.218  0.00129 ** 
## Temporary.Stop.NumberYes  -1.4357     0.3665  -3.917 8.97e-05 ***
## WeatherRainny              3.3460     1.2303   2.720  0.00653 ** 
## Age25-60                  -0.8581     0.3723  -2.305  0.02118 *  
## Age>60                     3.0726     0.6714   4.577 4.73e-06 ***
## Motor.OwningYes           -2.5521     0.3773  -6.763 1.35e-11 ***
## Distance                   0.8812     0.1097   8.034 9.47e-16 ***
## Cost                      -0.9272     0.1168  -7.941 2.00e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 269.20  on 619  degrees of freedom
## AIC: 287.2
## 
## Number of Fisher Scoring iterations: 6
## Choose Model 1 from bma.search1 : 6 variables : Khong chon do Motor.Certificate va Motor.Owning co kha nang tuong quan
mg3 = glm(Bus ~ Temporary.Stop.Number + Age + Motor.Certificate + Motor.Owning + Distance + Cost, family = binomial, data = datMB)
mg3
## 
## Call:  glm(formula = Bus ~ Temporary.Stop.Number + Age + Motor.Certificate + 
##     Motor.Owning + Distance + Cost, family = binomial, data = datMB)
## 
## Coefficients:
##              (Intercept)  Temporary.Stop.NumberYes                  Age25-60  
##                   1.1249                   -1.2977                   -0.7221  
##                   Age>60      Motor.CertificateYes           Motor.OwningYes  
##                   2.6518                   -1.2614                   -2.0477  
##                 Distance                      Cost  
##                   0.8622                   -0.8805  
## 
## Degrees of Freedom: 627 Total (i.e. Null);  620 Residual
## Null Deviance:       582.8 
## Residual Deviance: 278.9     AIC: 294.9
summary(mg3)
## 
## Call:
## glm(formula = Bus ~ Temporary.Stop.Number + Age + Motor.Certificate + 
##     Motor.Owning + Distance + Cost, family = binomial, data = datMB)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6521  -0.3234  -0.2352  -0.1597   3.4706  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.1249     0.4578   2.457 0.014011 *  
## Temporary.Stop.NumberYes  -1.2977     0.3518  -3.689 0.000225 ***
## Age25-60                  -0.7221     0.3726  -1.938 0.052588 .  
## Age>60                     2.6518     0.6239   4.250 2.14e-05 ***
## Motor.CertificateYes      -1.2614     0.4444  -2.839 0.004532 ** 
## Motor.OwningYes           -2.0477     0.3807  -5.378 7.51e-08 ***
## Distance                   0.8622     0.1091   7.900 2.79e-15 ***
## Cost                      -0.8805     0.1135  -7.759 8.56e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 278.94  on 620  degrees of freedom
## AIC: 294.94
## 
## Number of Fisher Scoring iterations: 6
## --> Chọn mg2
## Calculate OR (odd ratio)
exp(mg2$coefficients)
##              (Intercept)    Bus.Stop.ConditionYes Temporary.Stop.NumberYes 
##               2.95276908               0.31809858               0.23795804 
##            WeatherRainny                 Age25-60                   Age>60 
##              28.38966923               0.42398082              21.59839491 
##          Motor.OwningYes                 Distance                     Cost 
##               0.07791836               2.41388678               0.39566691
exp(coef(mg2))
##              (Intercept)    Bus.Stop.ConditionYes Temporary.Stop.NumberYes 
##               2.95276908               0.31809858               0.23795804 
##            WeatherRainny                 Age25-60                   Age>60 
##              28.38966923               0.42398082              21.59839491 
##          Motor.OwningYes                 Distance                     Cost 
##               0.07791836               2.41388678               0.39566691
library(epiDisplay)
## Loading required package: foreign
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: nnet
## 
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
## 
##     alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
## 
##     alpha
logistic.display(mg2)
## 
## Logistic regression predicting Bus : Yes vs No 
##  
##                                  crude OR(95%CI)     adj. OR(95%CI)     
## Bus.Stop.Condition: Yes vs No    0.71 (0.47,1.08)    0.32 (0.16,0.64)   
##                                                                         
## Temporary.Stop.Number: Yes vs No 0.33 (0.21,0.52)    0.24 (0.12,0.49)   
##                                                                         
## Weather: Rainny vs Sunny/cool    22.99 (4.9,107.95)  28.39 (2.55,316.5) 
##                                                                         
## Age: ref.=15-24                                                         
##    25-60                         0.3 (0.18,0.48)     0.42 (0.2,0.88)    
##    >60                           7.47 (3.24,17.2)    21.6 (5.79,80.52)  
##                                                                         
## Motor.Owning: Yes vs No          0.1 (0.06,0.17)     0.08 (0.04,0.16)   
##                                                                         
## Distance (cont. var.)            1.08 (1.04,1.12)    2.41 (1.95,2.99)   
##                                                                         
## Cost (cont. var.)                0.94 (0.89,0.99)    0.4 (0.31,0.5)     
##                                                                         
##                                  P(Wald's test) P(LR-test)
## Bus.Stop.Condition: Yes vs No    0.001          < 0.001   
##                                                           
## Temporary.Stop.Number: Yes vs No < 0.001        < 0.001   
##                                                           
## Weather: Rainny vs Sunny/cool    0.007          0.006     
##                                                           
## Age: ref.=15-24                                 < 0.001   
##    25-60                         0.021                    
##    >60                           < 0.001                  
##                                                           
## Motor.Owning: Yes vs No          < 0.001        < 0.001   
##                                                           
## Distance (cont. var.)            < 0.001        < 0.001   
##                                                           
## Cost (cont. var.)                < 0.001        < 0.001   
##                                                           
## Log-likelihood = -134.5978
## No. of observations = 628
## AIC value = 287.1957
## Thay vì sử dụng hàm glm dùng cách 2 với hàm lrm trong gói rms
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
## 
##     dotplot
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:table1':
## 
##     label, label<-, units
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## Registered S3 method overwritten by 'rms':
##   method       from      
##   print.lrtest epiDisplay
## 
## Attaching package: 'rms'
## The following object is masked from 'package:epiDisplay':
## 
##     lrtest
## The following objects are masked from 'package:car':
## 
##     Predict, vif
mg2.c2 = lrm(Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + Weather + Age + Motor.Owning + Distance + Cost, data = datMB)
mg2.c2
## Logistic Regression Model
##  
##  lrm(formula = Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + 
##      Weather + Age + Motor.Owning + Distance + Cost, data = datMB)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           628    LR chi2     313.55    R2       0.650    C       0.909    
##   No           518    d.f.             8    g        2.814    Dxy     0.818    
##   Yes          110    Pr(> chi2) <0.0001    gr      16.680    gamma   0.819    
##  max |deriv| 1e-08                          gp       0.250    tau-a   0.237    
##                                             Brier    0.055                     
##  
##                            Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept                  1.0827 0.4366  2.48  0.0131  
##  Bus.Stop.Condition=Yes    -1.1454 0.3559 -3.22  0.0013  
##  Temporary.Stop.Number=Yes -1.4357 0.3666 -3.92  <0.0001 
##  Weather=Rainny             3.3460 1.2303  2.72  0.0065  
##  Age=25-60                 -0.8581 0.3723 -2.30  0.0212  
##  Age=>60                    3.0726 0.6714  4.58  <0.0001 
##  Motor.Owning=Yes          -2.5521 0.3774 -6.76  <0.0001 
##  Distance                   0.8812 0.1097  8.03  <0.0001 
##  Cost                      -0.9272 0.1168 -7.94  <0.0001 
## 

4. Test models

# Test and compare  models mg2 và mg3
# 4.1. Deviance - càng nhỏ càng tốt (sai khác giữa giá trị thực tế và giá trị ước lượng theo mô hình)
deviance(mg2) # 7 variables
## [1] 269.1957
deviance(mg3) # 6 variables
## [1] 278.9441
# 4.2. LRT (Likelihood Ratio Test) - Chênh lệch deviance giữa mô hình đơn giản và mô hình phức tạp --> có ý nghĩa thống kê
library(rms)
lrtest(mg2, mg3)
## 
## Model 1: Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + Weather + 
##     Age + Motor.Owning + Distance + Cost
## Model 2: Bus ~ Temporary.Stop.Number + Age + Motor.Certificate + Motor.Owning + 
##     Distance + Cost
## 
##  L.R. Chisq        d.f.           P 
## 9.748373717 1.000000000 0.001794814
# 4.3. AIC (Aikake Information Criterion) - càng nhỏ càng tốt
AIC(mg2)
## [1] 287.1957
AIC(mg3)
## [1] 294.9441
## Calculate MacFadden's R2 voi packages "pscl"
library(pscl)
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(mg2)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -134.5978398 -291.3752088  313.5547380    0.5380601    0.3930391    0.6500434
pR2(mg3)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML         r2CU 
## -139.4720267 -291.3752088  303.8063643    0.5213319    0.3835439    0.6343392
# 4.4. Calcule the number of observation that model predict correctly = Accuracy (Độ chính xác của mô hình)
## Model mg2
contrasts(datMB$Bus)
##     Yes
## No    0
## Yes   1
glm.probs.mg2 <- predict(mg2, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg2 (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng). Có thể dùng hàm probmg2 <- fitted(mg2), head(probmg2), tail(probmg2)
glm.probs.mg2[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên 
##           2           3           4           5           8           9 
## 0.011915126 0.013651950 0.013651950 0.021033808 0.019222677 0.009934699 
##          10          13          15          16 
## 0.004642192 0.450046311 0.020735248 0.160686683
glm.pred.mg2 <- rep("No", 628)
glm.pred.mg2[glm.probs.mg2 >= 0.5] = "Yes"
table(glm.pred.mg2, datMB$Bus)
##             
## glm.pred.mg2  No Yes
##          No  513  36
##          Yes   5  74
mean(glm.pred.mg2 == datMB$Bus) # Tỷ lệ dự báo chính xác của mô hình 93,47%
## [1] 0.9347134
round(prop.table(table(datMB$Bus))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là sd xe máy 82.48% < 93.47% --> KL: mức độ chính xác dự báo của mô hình 2 > lớp có tỷ lệ cao nhất ở biến phụ thuộc
## 
##    No   Yes 
## 82.48 17.52
## Model mg3
contrasts(datMB$Bus)
##     Yes
## No    0
## Yes   1
glm.probs.mg3 <- predict(mg3, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng)
glm.probs.mg3[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên 
##          2          3          4          5          8          9         10 
## 0.02586326 0.02728683 0.02728683 0.04508261 0.04352897 0.02407716 0.01227855 
##         13         15         16 
## 0.74456434 0.01419227 0.09469469
glm.pred.mg3 <- rep("No", 628)
glm.pred.mg3[glm.probs.mg3 >= 0.5] = "Yes"
table(glm.pred.mg3, datMB$Bus)
##             
## glm.pred.mg3  No Yes
##          No  514  36
##          Yes   4  74
mean(glm.pred.mg3 == datMB$Bus) # Tỷ lệ dự báo chính xác của mô hình 93,63% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-95,06 = 5,94%
## [1] 0.9363057
round(prop.table(table(datMB$Bus))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không sd xe buýt 82.48% < 93,63% --> KL: mức độ chính xác dự báo của mô hình > lớp có tỷ lệ cao nhất ở biến phụ thuộc
## 
##    No   Yes 
## 82.48 17.52
# 4.5. Calculate Sensitivity - Độ nhạy 
## Model mg2
513/(513+5) # Đạt 99,03% --> Rất lý tưởng: Mô hình xác định tới 99,03% người sử dụng xe máy
## [1] 0.9903475
## Model mg3
514/(514+4) # Đạt 99,22% --> Rất lý tưởng: Mô hình xác định tới 99,22% người sử dụng xe máy
## [1] 0.992278
74/(74+36) # Đạt 67.27% --> Mô hình xác định được 67.27% người sử dụng xe buýt
## [1] 0.6727273
# 4.6. Calculate Specificity - Độ đặc hiệu
## Model mg2
74/(74+36) # Đạt 67.27% --> Mô hình xác định được 67.27% người sử dụng xe buýt
## [1] 0.6727273
## Model mg3
74/(74+36) # Đạt 67.27% --> Mô hình xác định được 67.27% người sử dụng xe buýt
## [1] 0.6727273
# 4.7. Hosmer-Lemeshow = Goodness of fit test - Kiểm định sự phù hợp của mô hình với Hàm HLgof.test(fit = fitted(mg2), obs = datBus$Bus) trong package "MKmisc" (Version 3.6.3) : Nếu p-value < 0.05 --> MH không phù hợp --> Ít sử dụng
# 4.8. Kiểm tra tính phổ quát của mô hình
   ## Chạy lại hai mô hình với hàm train thuộc package "caret"
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
## The following object is masked from 'package:purrr':
## 
##     lift
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10, classProbs = TRUE, summaryFunction = defaultSummary) # sử dụng kiểm tra chéo 10 lớp (number=10), lặp lại 10 lần (repeats=10) với lựa chọn method="repeatedcv, lựa chọn classProbs = TRUE áp dụng cho mô hình phân loại và lựa chọn summaryFunction = defaultSummary để chỉ thị R tính toán chỉ 2 thông tin về phẩm chất của mô hình là Accuracy và Kappa.
mg2train = train(Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + Weather + Age + Motor.Owning + Distance + Cost, data = datMB, method = "glm", family = "binomial", trControl = ctrl)
summary(mg2train)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5543  -0.3473  -0.2047  -0.1105   3.5424  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.0827     0.4366   2.480  0.01313 *  
## Bus.Stop.ConditionYes     -1.1454     0.3559  -3.218  0.00129 ** 
## Temporary.Stop.NumberYes  -1.4357     0.3665  -3.917 8.97e-05 ***
## WeatherRainny              3.3460     1.2303   2.720  0.00653 ** 
## `Age25-60`                -0.8581     0.3723  -2.305  0.02118 *  
## `Age>60`                   3.0726     0.6714   4.577 4.73e-06 ***
## Motor.OwningYes           -2.5521     0.3773  -6.763 1.35e-11 ***
## Distance                   0.8812     0.1097   8.034 9.47e-16 ***
## Cost                      -0.9272     0.1168  -7.941 2.00e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 269.20  on 619  degrees of freedom
## AIC: 287.2
## 
## Number of Fisher Scoring iterations: 6
mg3train = train(Bus ~ Temporary.Stop.Number + Age + Motor.Certificate + Motor.Owning + Distance + Cost, data = datMB, method = "glm", family = "binomial", trControl = ctrl)
summary(mg3train)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6521  -0.3234  -0.2352  -0.1597   3.4706  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.1249     0.4578   2.457 0.014011 *  
## Temporary.Stop.NumberYes  -1.2977     0.3518  -3.689 0.000225 ***
## `Age25-60`                -0.7221     0.3726  -1.938 0.052588 .  
## `Age>60`                   2.6518     0.6239   4.250 2.14e-05 ***
## Motor.CertificateYes      -1.2614     0.4444  -2.839 0.004532 ** 
## Motor.OwningYes           -2.0477     0.3807  -5.378 7.51e-08 ***
## Distance                   0.8622     0.1091   7.900 2.79e-15 ***
## Cost                      -0.8805     0.1135  -7.759 8.56e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 278.94  on 620  degrees of freedom
## AIC: 294.94
## 
## Number of Fisher Scoring iterations: 6
  ## Các thông tin về khả năng dự báo của mô hình bằng confusionMatrix()
predmg2 <- predict(mg2train, newdata = datMB)
confusionMatrix(data = predmg2, datMB$Bus)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  513  36
##        Yes   5  74
##                                           
##                Accuracy : 0.9347          
##                  95% CI : (0.9125, 0.9527)
##     No Information Rate : 0.8248          
##     P-Value [Acc > NIR] : 4.705e-16       
##                                           
##                   Kappa : 0.7459          
##                                           
##  Mcnemar's Test P-Value : 2.797e-06       
##                                           
##             Sensitivity : 0.9903          
##             Specificity : 0.6727          
##          Pos Pred Value : 0.9344          
##          Neg Pred Value : 0.9367          
##              Prevalence : 0.8248          
##          Detection Rate : 0.8169          
##    Detection Prevalence : 0.8742          
##       Balanced Accuracy : 0.8315          
##                                           
##        'Positive' Class : No              
## 
predmg3 <- predict(mg3train, newdata = datMB)
confusionMatrix(data = predmg3, datMB$Bus)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  514  36
##        Yes   4  74
##                                           
##                Accuracy : 0.9363          
##                  95% CI : (0.9143, 0.9541)
##     No Information Rate : 0.8248          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7511          
##                                           
##  Mcnemar's Test P-Value : 9.509e-07       
##                                           
##             Sensitivity : 0.9923          
##             Specificity : 0.6727          
##          Pos Pred Value : 0.9345          
##          Neg Pred Value : 0.9487          
##              Prevalence : 0.8248          
##          Detection Rate : 0.8185          
##    Detection Prevalence : 0.8758          
##       Balanced Accuracy : 0.8325          
##                                           
##        'Positive' Class : No              
## 
  ## Lấy dataframe với thông tin Accuracy của mô hình
a = mg2train$resample
a = a[, -3] # Trừ cột resample
a$Mohinh = "Logitmg2" # Thêm biến mô hình
a
##      Accuracy     Kappa   Mohinh
## 1   0.8870968 0.6265060 Logitmg2
## 2   0.9354839 0.7422037 Logitmg2
## 3   0.9047619 0.5790646 Logitmg2
## 4   0.9047619 0.6142857 Logitmg2
## 5   0.9206349 0.6645367 Logitmg2
## 6   0.9206349 0.6914789 Logitmg2
## 7   0.9365079 0.7627119 Logitmg2
## 8   0.9206349 0.6914789 Logitmg2
## 9   0.9523810 0.8148874 Logitmg2
## 10  0.9365079 0.7627119 Logitmg2
## 11  0.9365079 0.7627119 Logitmg2
## 12  0.9193548 0.7134935 Logitmg2
## 13  0.9032258 0.6133056 Logitmg2
## 14  0.9523810 0.8148874 Logitmg2
## 15  0.9206349 0.6645367 Logitmg2
## 16  0.9206349 0.6914789 Logitmg2
## 17  0.9365079 0.7627119 Logitmg2
## 18  0.9206349 0.6645367 Logitmg2
## 19  0.9206349 0.6914789 Logitmg2
## 20  0.9365079 0.7627119 Logitmg2
## 21  0.9523810 0.8286491 Logitmg2
## 22  0.9365079 0.7627119 Logitmg2
## 23  0.9193548 0.6637744 Logitmg2
## 24  0.9365079 0.7627119 Logitmg2
## 25  0.9047619 0.5790646 Logitmg2
## 26  0.8888889 0.5303514 Logitmg2
## 27  0.9516129 0.8143713 Logitmg2
## 28  0.9523810 0.8286491 Logitmg2
## 29  0.9523810 0.8286491 Logitmg2
## 30  0.8730159 0.5254237 Logitmg2
## 31  0.9365079 0.7428571 Logitmg2
## 32  0.9365079 0.7627119 Logitmg2
## 33  0.9365079 0.7627119 Logitmg2
## 34  0.9516129 0.8280961 Logitmg2
## 35  0.9206349 0.6645367 Logitmg2
## 36  0.8888889 0.6001813 Logitmg2
## 37  0.9206349 0.6645367 Logitmg2
## 38  0.9516129 0.8143713 Logitmg2
## 39  0.9365079 0.7627119 Logitmg2
## 40  0.9047619 0.5790646 Logitmg2
## 41  0.8730159 0.4857143 Logitmg2
## 42  0.9523810 0.8148874 Logitmg2
## 43  0.9516129 0.8280961 Logitmg2
## 44  0.9206349 0.6645367 Logitmg2
## 45  0.9047619 0.5790646 Logitmg2
## 46  0.9206349 0.6645367 Logitmg2
## 47  0.8888889 0.6278481 Logitmg2
## 48  0.9682540 0.8813559 Logitmg2
## 49  0.9047619 0.6695804 Logitmg2
## 50  0.9193548 0.6906188 Logitmg2
## 51  0.8888889 0.4854142 Logitmg2
## 52  0.9206349 0.6914789 Logitmg2
## 53  0.9523810 0.8148874 Logitmg2
## 54  0.9365079 0.7627119 Logitmg2
## 55  0.9523810 0.8148874 Logitmg2
## 56  0.9047619 0.6440678 Logitmg2
## 57  0.9032258 0.6133056 Logitmg2
## 58  0.9193548 0.6906188 Logitmg2
## 59  0.9047619 0.6695804 Logitmg2
## 60  0.9523810 0.8148874 Logitmg2
## 61  0.9516129 0.8143713 Logitmg2
## 62  0.9523810 0.8286491 Logitmg2
## 63  0.9523810 0.8286491 Logitmg2
## 64  0.9206349 0.6645367 Logitmg2
## 65  0.9032258 0.6133056 Logitmg2
## 66  0.9523810 0.8148874 Logitmg2
## 67  0.9523810 0.8148874 Logitmg2
## 68  0.9047619 0.6142857 Logitmg2
## 69  0.8730159 0.5254237 Logitmg2
## 70  0.9047619 0.5790646 Logitmg2
## 71  0.8870968 0.5668663 Logitmg2
## 72  0.8888889 0.5303514 Logitmg2
## 73  0.9193548 0.6637744 Logitmg2
## 74  0.9523810 0.8405063 Logitmg2
## 75  0.9206349 0.6914789 Logitmg2
## 76  0.9523810 0.8286491 Logitmg2
## 77  0.9523810 0.8148874 Logitmg2
## 78  0.9523810 0.8286491 Logitmg2
## 79  0.9206349 0.6645367 Logitmg2
## 80  0.9047619 0.6142857 Logitmg2
## 81  0.9365079 0.7797203 Logitmg2
## 82  0.9032258 0.6429942 Logitmg2
## 83  0.9047619 0.6142857 Logitmg2
## 84  0.8888889 0.5680705 Logitmg2
## 85  0.9206349 0.6914789 Logitmg2
## 86  1.0000000 1.0000000 Logitmg2
## 87  0.9365079 0.7428571 Logitmg2
## 88  0.9047619 0.6440678 Logitmg2
## 89  0.9206349 0.6914789 Logitmg2
## 90  0.9032258 0.6133056 Logitmg2
## 91  0.9523810 0.8148874 Logitmg2
## 92  0.9193548 0.6637744 Logitmg2
## 93  0.9682540 0.8813559 Logitmg2
## 94  0.9193548 0.7134935 Logitmg2
## 95  0.9206349 0.6645367 Logitmg2
## 96  0.8888889 0.5680705 Logitmg2
## 97  0.9206349 0.6914789 Logitmg2
## 98  0.9047619 0.6142857 Logitmg2
## 99  0.9206349 0.7144152 Logitmg2
## 100 0.9206349 0.6914789 Logitmg2
b = mg3train$resample
b = b[, -3]
b$Mohinh = "Logitmg3"
b
##      Accuracy     Kappa   Mohinh
## 1   0.9838710 0.9426987 Logitmg3
## 2   0.9047619 0.6142857 Logitmg3
## 3   0.9193548 0.6637744 Logitmg3
## 4   0.9523810 0.8148874 Logitmg3
## 5   0.9682540 0.8898601 Logitmg3
## 6   0.9047619 0.6440678 Logitmg3
## 7   0.9365079 0.7428571 Logitmg3
## 8   0.9047619 0.6142857 Logitmg3
## 9   0.8888889 0.5680705 Logitmg3
## 10  0.9365079 0.7428571 Logitmg3
## 11  0.9516129 0.8280961 Logitmg3
## 12  0.9193548 0.7134935 Logitmg3
## 13  0.9206349 0.6645367 Logitmg3
## 14  0.9206349 0.6914789 Logitmg3
## 15  0.8888889 0.5303514 Logitmg3
## 16  0.9523810 0.8286491 Logitmg3
## 17  0.9365079 0.7428571 Logitmg3
## 18  0.9365079 0.7627119 Logitmg3
## 19  0.9365079 0.7428571 Logitmg3
## 20  0.9206349 0.6645367 Logitmg3
## 21  0.9523810 0.8405063 Logitmg3
## 22  0.9523810 0.8286491 Logitmg3
## 23  0.9354839 0.7422037 Logitmg3
## 24  0.8888889 0.4854142 Logitmg3
## 25  0.8888889 0.5303514 Logitmg3
## 26  0.9682540 0.8813559 Logitmg3
## 27  0.9206349 0.6914789 Logitmg3
## 28  0.9677419 0.8809981 Logitmg3
## 29  0.9206349 0.6914789 Logitmg3
## 30  0.9047619 0.6142857 Logitmg3
## 31  0.9047619 0.5790646 Logitmg3
## 32  0.9206349 0.6645367 Logitmg3
## 33  0.8548387 0.5197935 Logitmg3
## 34  0.9193548 0.6637744 Logitmg3
## 35  0.9682540 0.8813559 Logitmg3
## 36  0.9523810 0.8148874 Logitmg3
## 37  0.9206349 0.7144152 Logitmg3
## 38  0.8730159 0.5889070 Logitmg3
## 39  0.9682540 0.8813559 Logitmg3
## 40  0.9206349 0.6914789 Logitmg3
## 41  0.9206349 0.6645367 Logitmg3
## 42  0.9523810 0.8148874 Logitmg3
## 43  0.9047619 0.6142857 Logitmg3
## 44  0.9206349 0.6914789 Logitmg3
## 45  0.9838710 0.9426987 Logitmg3
## 46  0.9677419 0.8894831 Logitmg3
## 47  0.9365079 0.7627119 Logitmg3
## 48  0.8888889 0.5303514 Logitmg3
## 49  0.9206349 0.6645367 Logitmg3
## 50  0.9047619 0.6142857 Logitmg3
## 51  0.9206349 0.6914789 Logitmg3
## 52  0.8870968 0.5292842 Logitmg3
## 53  0.9206349 0.7341772 Logitmg3
## 54  0.8888889 0.4854142 Logitmg3
## 55  0.9682540 0.8813559 Logitmg3
## 56  0.9838710 0.9426987 Logitmg3
## 57  0.9206349 0.6645367 Logitmg3
## 58  0.9206349 0.6645367 Logitmg3
## 59  0.9365079 0.7627119 Logitmg3
## 60  0.9365079 0.7797203 Logitmg3
## 61  0.9365079 0.7797203 Logitmg3
## 62  0.9523810 0.8148874 Logitmg3
## 63  0.9365079 0.7627119 Logitmg3
## 64  0.9516129 0.8143713 Logitmg3
## 65  0.9206349 0.6914789 Logitmg3
## 66  0.9193548 0.6906188 Logitmg3
## 67  0.9047619 0.5790646 Logitmg3
## 68  0.9047619 0.6440678 Logitmg3
## 69  0.9206349 0.6645367 Logitmg3
## 70  0.9523810 0.8148874 Logitmg3
## 71  0.9365079 0.7944535 Logitmg3
## 72  0.9365079 0.7627119 Logitmg3
## 73  0.9047619 0.6142857 Logitmg3
## 74  0.9365079 0.7627119 Logitmg3
## 75  0.9206349 0.6645367 Logitmg3
## 76  0.9354839 0.7422037 Logitmg3
## 77  0.9516129 0.8143713 Logitmg3
## 78  0.9523810 0.8148874 Logitmg3
## 79  0.9047619 0.6142857 Logitmg3
## 80  0.9365079 0.7428571 Logitmg3
## 81  0.9682540 0.8813559 Logitmg3
## 82  0.9523810 0.8148874 Logitmg3
## 83  0.9047619 0.6916803 Logitmg3
## 84  0.8730159 0.5254237 Logitmg3
## 85  0.9523810 0.8148874 Logitmg3
## 86  0.8888889 0.4854142 Logitmg3
## 87  0.9047619 0.5790646 Logitmg3
## 88  0.9677419 0.8809981 Logitmg3
## 89  0.9841270 0.9428830 Logitmg3
## 90  0.9032258 0.5782313 Logitmg3
## 91  0.9365079 0.7627119 Logitmg3
## 92  0.9523810 0.8148874 Logitmg3
## 93  0.9193548 0.6906188 Logitmg3
## 94  0.9523810 0.8148874 Logitmg3
## 95  0.9206349 0.6645367 Logitmg3
## 96  0.9047619 0.6440678 Logitmg3
## 97  0.8888889 0.5303514 Logitmg3
## 98  0.9193548 0.6906188 Logitmg3
## 99  0.9047619 0.6440678 Logitmg3
## 100 0.9682540 0.8813559 Logitmg3
resamplemod = rbind(a, b) # Hợp nhất 2 data frame a và b
resamplemod
##      Accuracy     Kappa   Mohinh
## 1   0.8870968 0.6265060 Logitmg2
## 2   0.9354839 0.7422037 Logitmg2
## 3   0.9047619 0.5790646 Logitmg2
## 4   0.9047619 0.6142857 Logitmg2
## 5   0.9206349 0.6645367 Logitmg2
## 6   0.9206349 0.6914789 Logitmg2
## 7   0.9365079 0.7627119 Logitmg2
## 8   0.9206349 0.6914789 Logitmg2
## 9   0.9523810 0.8148874 Logitmg2
## 10  0.9365079 0.7627119 Logitmg2
## 11  0.9365079 0.7627119 Logitmg2
## 12  0.9193548 0.7134935 Logitmg2
## 13  0.9032258 0.6133056 Logitmg2
## 14  0.9523810 0.8148874 Logitmg2
## 15  0.9206349 0.6645367 Logitmg2
## 16  0.9206349 0.6914789 Logitmg2
## 17  0.9365079 0.7627119 Logitmg2
## 18  0.9206349 0.6645367 Logitmg2
## 19  0.9206349 0.6914789 Logitmg2
## 20  0.9365079 0.7627119 Logitmg2
## 21  0.9523810 0.8286491 Logitmg2
## 22  0.9365079 0.7627119 Logitmg2
## 23  0.9193548 0.6637744 Logitmg2
## 24  0.9365079 0.7627119 Logitmg2
## 25  0.9047619 0.5790646 Logitmg2
## 26  0.8888889 0.5303514 Logitmg2
## 27  0.9516129 0.8143713 Logitmg2
## 28  0.9523810 0.8286491 Logitmg2
## 29  0.9523810 0.8286491 Logitmg2
## 30  0.8730159 0.5254237 Logitmg2
## 31  0.9365079 0.7428571 Logitmg2
## 32  0.9365079 0.7627119 Logitmg2
## 33  0.9365079 0.7627119 Logitmg2
## 34  0.9516129 0.8280961 Logitmg2
## 35  0.9206349 0.6645367 Logitmg2
## 36  0.8888889 0.6001813 Logitmg2
## 37  0.9206349 0.6645367 Logitmg2
## 38  0.9516129 0.8143713 Logitmg2
## 39  0.9365079 0.7627119 Logitmg2
## 40  0.9047619 0.5790646 Logitmg2
## 41  0.8730159 0.4857143 Logitmg2
## 42  0.9523810 0.8148874 Logitmg2
## 43  0.9516129 0.8280961 Logitmg2
## 44  0.9206349 0.6645367 Logitmg2
## 45  0.9047619 0.5790646 Logitmg2
## 46  0.9206349 0.6645367 Logitmg2
## 47  0.8888889 0.6278481 Logitmg2
## 48  0.9682540 0.8813559 Logitmg2
## 49  0.9047619 0.6695804 Logitmg2
## 50  0.9193548 0.6906188 Logitmg2
## 51  0.8888889 0.4854142 Logitmg2
## 52  0.9206349 0.6914789 Logitmg2
## 53  0.9523810 0.8148874 Logitmg2
## 54  0.9365079 0.7627119 Logitmg2
## 55  0.9523810 0.8148874 Logitmg2
## 56  0.9047619 0.6440678 Logitmg2
## 57  0.9032258 0.6133056 Logitmg2
## 58  0.9193548 0.6906188 Logitmg2
## 59  0.9047619 0.6695804 Logitmg2
## 60  0.9523810 0.8148874 Logitmg2
## 61  0.9516129 0.8143713 Logitmg2
## 62  0.9523810 0.8286491 Logitmg2
## 63  0.9523810 0.8286491 Logitmg2
## 64  0.9206349 0.6645367 Logitmg2
## 65  0.9032258 0.6133056 Logitmg2
## 66  0.9523810 0.8148874 Logitmg2
## 67  0.9523810 0.8148874 Logitmg2
## 68  0.9047619 0.6142857 Logitmg2
## 69  0.8730159 0.5254237 Logitmg2
## 70  0.9047619 0.5790646 Logitmg2
## 71  0.8870968 0.5668663 Logitmg2
## 72  0.8888889 0.5303514 Logitmg2
## 73  0.9193548 0.6637744 Logitmg2
## 74  0.9523810 0.8405063 Logitmg2
## 75  0.9206349 0.6914789 Logitmg2
## 76  0.9523810 0.8286491 Logitmg2
## 77  0.9523810 0.8148874 Logitmg2
## 78  0.9523810 0.8286491 Logitmg2
## 79  0.9206349 0.6645367 Logitmg2
## 80  0.9047619 0.6142857 Logitmg2
## 81  0.9365079 0.7797203 Logitmg2
## 82  0.9032258 0.6429942 Logitmg2
## 83  0.9047619 0.6142857 Logitmg2
## 84  0.8888889 0.5680705 Logitmg2
## 85  0.9206349 0.6914789 Logitmg2
## 86  1.0000000 1.0000000 Logitmg2
## 87  0.9365079 0.7428571 Logitmg2
## 88  0.9047619 0.6440678 Logitmg2
## 89  0.9206349 0.6914789 Logitmg2
## 90  0.9032258 0.6133056 Logitmg2
## 91  0.9523810 0.8148874 Logitmg2
## 92  0.9193548 0.6637744 Logitmg2
## 93  0.9682540 0.8813559 Logitmg2
## 94  0.9193548 0.7134935 Logitmg2
## 95  0.9206349 0.6645367 Logitmg2
## 96  0.8888889 0.5680705 Logitmg2
## 97  0.9206349 0.6914789 Logitmg2
## 98  0.9047619 0.6142857 Logitmg2
## 99  0.9206349 0.7144152 Logitmg2
## 100 0.9206349 0.6914789 Logitmg2
## 101 0.9838710 0.9426987 Logitmg3
## 102 0.9047619 0.6142857 Logitmg3
## 103 0.9193548 0.6637744 Logitmg3
## 104 0.9523810 0.8148874 Logitmg3
## 105 0.9682540 0.8898601 Logitmg3
## 106 0.9047619 0.6440678 Logitmg3
## 107 0.9365079 0.7428571 Logitmg3
## 108 0.9047619 0.6142857 Logitmg3
## 109 0.8888889 0.5680705 Logitmg3
## 110 0.9365079 0.7428571 Logitmg3
## 111 0.9516129 0.8280961 Logitmg3
## 112 0.9193548 0.7134935 Logitmg3
## 113 0.9206349 0.6645367 Logitmg3
## 114 0.9206349 0.6914789 Logitmg3
## 115 0.8888889 0.5303514 Logitmg3
## 116 0.9523810 0.8286491 Logitmg3
## 117 0.9365079 0.7428571 Logitmg3
## 118 0.9365079 0.7627119 Logitmg3
## 119 0.9365079 0.7428571 Logitmg3
## 120 0.9206349 0.6645367 Logitmg3
## 121 0.9523810 0.8405063 Logitmg3
## 122 0.9523810 0.8286491 Logitmg3
## 123 0.9354839 0.7422037 Logitmg3
## 124 0.8888889 0.4854142 Logitmg3
## 125 0.8888889 0.5303514 Logitmg3
## 126 0.9682540 0.8813559 Logitmg3
## 127 0.9206349 0.6914789 Logitmg3
## 128 0.9677419 0.8809981 Logitmg3
## 129 0.9206349 0.6914789 Logitmg3
## 130 0.9047619 0.6142857 Logitmg3
## 131 0.9047619 0.5790646 Logitmg3
## 132 0.9206349 0.6645367 Logitmg3
## 133 0.8548387 0.5197935 Logitmg3
## 134 0.9193548 0.6637744 Logitmg3
## 135 0.9682540 0.8813559 Logitmg3
## 136 0.9523810 0.8148874 Logitmg3
## 137 0.9206349 0.7144152 Logitmg3
## 138 0.8730159 0.5889070 Logitmg3
## 139 0.9682540 0.8813559 Logitmg3
## 140 0.9206349 0.6914789 Logitmg3
## 141 0.9206349 0.6645367 Logitmg3
## 142 0.9523810 0.8148874 Logitmg3
## 143 0.9047619 0.6142857 Logitmg3
## 144 0.9206349 0.6914789 Logitmg3
## 145 0.9838710 0.9426987 Logitmg3
## 146 0.9677419 0.8894831 Logitmg3
## 147 0.9365079 0.7627119 Logitmg3
## 148 0.8888889 0.5303514 Logitmg3
## 149 0.9206349 0.6645367 Logitmg3
## 150 0.9047619 0.6142857 Logitmg3
## 151 0.9206349 0.6914789 Logitmg3
## 152 0.8870968 0.5292842 Logitmg3
## 153 0.9206349 0.7341772 Logitmg3
## 154 0.8888889 0.4854142 Logitmg3
## 155 0.9682540 0.8813559 Logitmg3
## 156 0.9838710 0.9426987 Logitmg3
## 157 0.9206349 0.6645367 Logitmg3
## 158 0.9206349 0.6645367 Logitmg3
## 159 0.9365079 0.7627119 Logitmg3
## 160 0.9365079 0.7797203 Logitmg3
## 161 0.9365079 0.7797203 Logitmg3
## 162 0.9523810 0.8148874 Logitmg3
## 163 0.9365079 0.7627119 Logitmg3
## 164 0.9516129 0.8143713 Logitmg3
## 165 0.9206349 0.6914789 Logitmg3
## 166 0.9193548 0.6906188 Logitmg3
## 167 0.9047619 0.5790646 Logitmg3
## 168 0.9047619 0.6440678 Logitmg3
## 169 0.9206349 0.6645367 Logitmg3
## 170 0.9523810 0.8148874 Logitmg3
## 171 0.9365079 0.7944535 Logitmg3
## 172 0.9365079 0.7627119 Logitmg3
## 173 0.9047619 0.6142857 Logitmg3
## 174 0.9365079 0.7627119 Logitmg3
## 175 0.9206349 0.6645367 Logitmg3
## 176 0.9354839 0.7422037 Logitmg3
## 177 0.9516129 0.8143713 Logitmg3
## 178 0.9523810 0.8148874 Logitmg3
## 179 0.9047619 0.6142857 Logitmg3
## 180 0.9365079 0.7428571 Logitmg3
## 181 0.9682540 0.8813559 Logitmg3
## 182 0.9523810 0.8148874 Logitmg3
## 183 0.9047619 0.6916803 Logitmg3
## 184 0.8730159 0.5254237 Logitmg3
## 185 0.9523810 0.8148874 Logitmg3
## 186 0.8888889 0.4854142 Logitmg3
## 187 0.9047619 0.5790646 Logitmg3
## 188 0.9677419 0.8809981 Logitmg3
## 189 0.9841270 0.9428830 Logitmg3
## 190 0.9032258 0.5782313 Logitmg3
## 191 0.9365079 0.7627119 Logitmg3
## 192 0.9523810 0.8148874 Logitmg3
## 193 0.9193548 0.6906188 Logitmg3
## 194 0.9523810 0.8148874 Logitmg3
## 195 0.9206349 0.6645367 Logitmg3
## 196 0.9047619 0.6440678 Logitmg3
## 197 0.8888889 0.5303514 Logitmg3
## 198 0.9193548 0.6906188 Logitmg3
## 199 0.9047619 0.6440678 Logitmg3
## 200 0.9682540 0.8813559 Logitmg3
   ## Phân tích hình ảnh
library(ggplot2)
library(ggthemes)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
h1 <- ggplot(resamplemod) + geom_boxplot(aes(x = Mohinh, y = Accuracy, fill = Mohinh)) + coord_flip() + geom_hline(yintercept = 0.8248, color = "blue") + theme_wsj() # Giá trị 0.8248 = 82.48% là tỷ lệ nhóm chiếm ưu thế (không sử dụng xe buýt-sử dụng xe máy)
h1 # Nhận xét: Cả 2 mô hình đều có khả năng phân loại tốt - độ chính xác accuracy của cả 2 mô hình đều lớn hơn 87.01% (đường màu xanh)

h2 <- ggplot(resamplemod, aes(Accuracy, fill = Mohinh)) + geom_density(alpha = 0.3) + theme_wsj() + facet_wrap(~ Mohinh)
h2

   ## Kiểm định t-test để đánh giá sự khác biệt về Accuracy và Kappa của 2 mô hình: p<0.05 có ý nghĩa thống kê về sự khác biệt Accuracy và Kappa của 2 mô hình
t.test(resamplemod$Accuracy ~ resamplemod$Mohinh)
## 
##  Welch Two Sample t-test
## 
## data:  resamplemod$Accuracy by resamplemod$Mohinh
## t = -1.1014, df = 194.58, p-value = 0.2721
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.011216872  0.003177957
## sample estimates:
## mean in group Logitmg2 mean in group Logitmg3 
##              0.9245110              0.9285305
summary(a)
##     Accuracy          Kappa           Mohinh         
##  Min.   :0.8730   Min.   :0.4854   Length:100        
##  1st Qu.:0.9048   1st Qu.:0.6275   Class :character  
##  Median :0.9206   Median :0.6915   Mode  :character  
##  Mean   :0.9245   Mean   :0.7045                     
##  3rd Qu.:0.9516   3rd Qu.:0.8144                     
##  Max.   :1.0000   Max.   :1.0000
summary(b)
##     Accuracy          Kappa           Mohinh         
##  Min.   :0.8548   Min.   :0.4854   Length:100        
##  1st Qu.:0.9048   1st Qu.:0.6441   Class :character  
##  Median :0.9206   Median :0.7026   Mode  :character  
##  Mean   :0.9285   Mean   :0.7184                     
##  3rd Qu.:0.9524   3rd Qu.:0.8149                     
##  Max.   :0.9841   Max.   :0.9429
t.test(resamplemod$Kappa ~ resamplemod$Mohinh)
## 
##  Welch Two Sample t-test
## 
## data:  resamplemod$Kappa by resamplemod$Mohinh
## t = -0.89788, df = 193.91, p-value = 0.3704
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04415241  0.01652762
## sample estimates:
## mean in group Logitmg2 mean in group Logitmg3 
##              0.7045402              0.7183526
# 4.9. Calculate AUC (Area under Curve) - Diện tích dưới đường cong ROC (Receiver Operating Characteristics), đánh giá mức độ lồi về phía trên của đường ROC (Tiêu chí thường được sử dụng khi so sánh các mô hình phân loại với nhau): sử dụng hàm roc trong gói "pROC": AUC càng gần 1 càng tốt, >0.7 xem như chấp nhận được
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
## 
##     ci
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
    ## Mô hình mg2 
mg2train1 <- train(Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + Weather + Age + Motor.Owning + Distance + Cost, data = datMB, method = "glm", family = "binomial")
summary(mg2train1)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5543  -0.3473  -0.2047  -0.1105   3.5424  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.0827     0.4366   2.480  0.01313 *  
## Bus.Stop.ConditionYes     -1.1454     0.3559  -3.218  0.00129 ** 
## Temporary.Stop.NumberYes  -1.4357     0.3665  -3.917 8.97e-05 ***
## WeatherRainny              3.3460     1.2303   2.720  0.00653 ** 
## `Age25-60`                -0.8581     0.3723  -2.305  0.02118 *  
## `Age>60`                   3.0726     0.6714   4.577 4.73e-06 ***
## Motor.OwningYes           -2.5521     0.3773  -6.763 1.35e-11 ***
## Distance                   0.8812     0.1097   8.034 9.47e-16 ***
## Cost                      -0.9272     0.1168  -7.941 2.00e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 269.20  on 619  degrees of freedom
## AIC: 287.2
## 
## Number of Fisher Scoring iterations: 6
pred1mg2 <- predict(mg2train1, newdata = datMB, type = "prob")
rmg2 <- roc(datMB$Bus, pred1mg2$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg2$auc
## Area under the curve: 0.9092
auc(rmg2)
## Area under the curve: 0.9092
plot.roc(rmg2)

ci(rmg2)
## 95% CI: 0.867-0.9514 (DeLong)
plot(rmg2, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)

plot(smooth(rmg2), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)

   ## Mô hình mg3 
mg3train1 <- train(Bus ~ Temporary.Stop.Number + Age + Motor.Certificate + Motor.Owning + Distance + Cost, method = "glm", family = "binomial", data = datMB)
summary(mg3train1)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6521  -0.3234  -0.2352  -0.1597   3.4706  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.1249     0.4578   2.457 0.014011 *  
## Temporary.Stop.NumberYes  -1.2977     0.3518  -3.689 0.000225 ***
## `Age25-60`                -0.7221     0.3726  -1.938 0.052588 .  
## `Age>60`                   2.6518     0.6239   4.250 2.14e-05 ***
## Motor.CertificateYes      -1.2614     0.4444  -2.839 0.004532 ** 
## Motor.OwningYes           -2.0477     0.3807  -5.378 7.51e-08 ***
## Distance                   0.8622     0.1091   7.900 2.79e-15 ***
## Cost                      -0.8805     0.1135  -7.759 8.56e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 278.94  on 620  degrees of freedom
## AIC: 294.94
## 
## Number of Fisher Scoring iterations: 6
pred1mg3 <- predict(mg3train1, newdata = datMB, type = "prob")
rmg3 <- roc(datMB$Bus, pred1mg3$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg3$auc
## Area under the curve: 0.9006
plot(rmg3, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)

plot(smooth(rmg3), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)

   ## Có thể dùng các lệnh đơn giản
auc(rmg3)
## Area under the curve: 0.9006
plot.roc(rmg3)

ci(rmg3)
## 95% CI: 0.8539-0.9472 (DeLong)
      ## Tính hệ số Gini = 2AUC-1
Ginimg3 <- 2*(rmg3$auc)-1
Ginimg3
## [1] 0.8011232
Ginimg2 <- 2*(rmg2$auc)-1
Ginimg2
## [1] 0.8183573

5. Importance of variables in models

library(caret)
varImp(mg2)
##                           Overall
## Bus.Stop.ConditionYes    3.218460
## Temporary.Stop.NumberYes 3.916837
## WeatherRainny            2.719747
## Age25-60                 2.304714
## Age>60                   4.576534
## Motor.OwningYes          6.763419
## Distance                 8.033518
## Cost                     7.941463
plot(varImp(mg2))

varImp(mg3)
##                           Overall
## Temporary.Stop.NumberYes 3.688996
## Age25-60                 1.938286
## Age>60                   4.250078
## Motor.CertificateYes     2.838573
## Motor.OwningYes          5.378406
## Distance                 7.900110
## Cost                     7.759013
plot(varImp(mg3))

6. Probility of bus use - Prohibit model

# Prohibit Models
library(aod)
## 
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
## 
##     rats
      ## Model mg2
prohibitmg2 <- glm(Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + Weather + Age + Motor.Owning + Distance + Cost, family = binomial(link = "probit"), data = datMB)
summary(prohibitmg2)
## 
## Call:
## glm(formula = Bus ~ Bus.Stop.Condition + Temporary.Stop.Number + 
##     Weather + Age + Motor.Owning + Distance + Cost, family = binomial(link = "probit"), 
##     data = datMB)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4479  -0.3810  -0.2122  -0.1005   3.7567  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               0.50335    0.22669   2.220  0.02639 *  
## Bus.Stop.ConditionYes    -0.53362    0.17272  -3.089  0.00201 ** 
## Temporary.Stop.NumberYes -0.65542    0.17624  -3.719  0.00020 ***
## WeatherRainny             1.72021    0.66545   2.585  0.00974 ** 
## Age25-60                 -0.45173    0.17979  -2.513  0.01199 *  
## Age>60                    1.57540    0.35924   4.385 1.16e-05 ***
## Motor.OwningYes          -1.33955    0.19510  -6.866 6.60e-12 ***
## Distance                  0.43489    0.05126   8.484  < 2e-16 ***
## Cost                     -0.45880    0.05449  -8.420  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 279.55  on 619  degrees of freedom
## AIC: 297.55
## 
## Number of Fisher Scoring iterations: 7
     ## Model mg3
prohibitmg3 <- glm(Bus ~ Temporary.Stop.Number + Age + Motor.Certificate + Motor.Owning + Distance + Cost, family = binomial(link = "probit"), data = datMB)
summary(prohibitmg3)
## 
## Call:
## glm(formula = Bus ~ Temporary.Stop.Number + Age + Motor.Certificate + 
##     Motor.Owning + Distance + Cost, family = binomial(link = "probit"), 
##     data = datMB)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5733  -0.3513  -0.2548  -0.1541   3.6886  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               0.59639    0.24722   2.412 0.015849 *  
## Temporary.Stop.NumberYes -0.60283    0.16986  -3.549 0.000387 ***
## Age25-60                 -0.36339    0.17770  -2.045 0.040864 *  
## Age>60                    1.40849    0.34567   4.075 4.61e-05 ***
## Motor.CertificateYes     -0.63545    0.24188  -2.627 0.008610 ** 
## Motor.OwningYes          -1.13928    0.20348  -5.599 2.16e-08 ***
## Distance                  0.42121    0.05073   8.303  < 2e-16 ***
## Cost                     -0.43561    0.05332  -8.170 3.09e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 582.75  on 627  degrees of freedom
## Residual deviance: 289.24  on 620  degrees of freedom
## AIC: 305.24
## 
## Number of Fisher Scoring iterations: 7
# Đánh giá tác động tổng thể Age (Total effect of Age) trong mô hinhg mg2 có ảnh hưởng hay không đến khả năng chọn xe buýt (thứ tự hệ số ứng với Age là từ 7:9)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 7:9) # Giá trị 107.6 ứng với p-value < 0.05 --> KL: tác động tổng thể về Purpose (mục đich chuyến đi) của người sử dụng lên khả năng chọn xe buýt là tồn tại và có ý nghĩa thống kê
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 107.6, df = 3, P(> X2) = 0.0
# Đánh giá tác động tổng thể Temporary.Stop.Number (Total effect of Temporary.Stop.Number) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chọn xe buýt (thứ tự hệ số ứng với Temporary.Stop.Number là từ 1:2)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 1:2)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 10.6, df = 2, P(> X2) = 0.0051

7. Reason why choose or doesn’t choose bus

t = "E:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 1-Mode choice\\SO LIEU R\\Mode choice in DN.csv"
MC = read.csv(t, header = T)
head(MC)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency Departure.Time
## 1           6                  1            0       3         3              1
## 2           3                  1            0       1         4              1
## 3           3                  1            0       1         4              3
## 4           3                  1            0       1         4              1
## 5           3                  1            0       1         4              3
## 6           4                  1            0       1         4              1
##   Distance Travel.Period Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## 1        2            10                  1             1                     0
## 2        8            15                  1             1                     1
## 3        5            15                  1             1                     1
## 4        5            10                  1             1                     1
## 5        8            15                  1             1                     0
## 6       20            30                  1             1                     0
##   Mode.Choice.Reason Weather Weekend Non.Bus.Reason Cost Bus.Stop.Present
## 1                  4       1       0              1   12                1
## 2                  2       3       0              1    8                2
## 3                  4       1       0              2    5                1
## 4                  2       1       0              4    5                1
## 5                  2       3       0              2    8                2
## 6                  5       3       0              2   40                2
##   Gender Age Occupation Income Number.of.Children Motor.Certificate
## 1      0   5          4      2                  2                 0
## 2      0   3          6      3                  1                 1
## 3      1   3          2      4                  0                 1
## 4      1   3          2      3                  0                 1
## 5      0   4          6      3                  1                 1
## 6      1   4          3      4                  2                 1
##   Car.Certificate Bicycle.Owning Motor.Owning Car.Owning Number.of.Bicycles
## 1               0              0            0          0                  1
## 2               0              1            1          0                  1
## 3               0              0            1          0                  0
## 4               0              1            1          0                  1
## 5               0              0            1          0                  0
## 6               1              1            1          1                  1
##   Number.of.Motors Number.of.Car
## 1                2             1
## 2                3             0
## 3                3             0
## 4                3             0
## 5                2             0
## 6                2             1
str(MC)
## 'data.frame':    847 obs. of  30 variables:
##  $ Travel.Mode          : int  6 3 3 3 3 4 4 3 3 3 ...
##  $ Bus.Stop.Condition   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Central.Area         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Purpose              : int  3 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : int  3 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : int  1 1 3 1 3 1 1 1 2 1 ...
##  $ Distance             : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ Travel.Period        : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ Sidewalk.Clearance   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Lane.Separate        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Temporary.Stop.Number: int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Mode.Choice.Reason   : int  4 2 4 2 2 5 5 2 2 2 ...
##  $ Weather              : int  1 3 1 1 3 3 3 3 1 3 ...
##  $ Weekend              : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ Non.Bus.Reason       : int  1 1 2 4 2 2 4 2 1 4 ...
##  $ Cost                 : int  12 8 5 5 8 40 30 10 12 10 ...
##  $ Bus.Stop.Present     : int  1 2 1 1 2 2 2 1 1 1 ...
##  $ Gender               : int  0 0 1 1 0 1 1 0 0 1 ...
##  $ Age                  : int  5 3 3 3 4 4 5 4 3 4 ...
##  $ Occupation           : int  4 6 2 2 6 3 3 7 7 3 ...
##  $ Income               : int  2 3 4 3 3 4 4 2 3 3 ...
##  $ Number.of.Children   : int  2 1 0 0 1 2 2 0 1 0 ...
##  $ Motor.Certificate    : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Car.Certificate      : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Bicycle.Owning       : int  0 1 0 1 0 1 0 0 1 0 ...
##  $ Motor.Owning         : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Car.Owning           : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Number.of.Bicycles   : int  1 1 0 1 0 1 0 0 1 0 ...
##  $ Number.of.Motors     : int  2 3 3 3 2 2 2 2 3 3 ...
##  $ Number.of.Car        : int  1 0 0 0 0 1 1 0 0 0 ...
str(MC$Travel.Mode)
##  int [1:847] 6 3 3 3 3 4 4 3 3 3 ...
# View data (number of rows and colume)
dim(MC) 
## [1] 847  30
# Sumerize Data
summary(MC) 
##   Travel.Mode    Bus.Stop.Condition  Central.Area      Purpose     
##  Min.   :1.000   Min.   :0.0000     Min.   :0.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:0.0000     1st Qu.:0.000   1st Qu.:1.000  
##  Median :3.000   Median :1.0000     Median :0.000   Median :1.000  
##  Mean   :3.646   Mean   :0.5089     Mean   :0.477   Mean   :1.568  
##  3rd Qu.:4.000   3rd Qu.:1.0000     3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :1.0000     Max.   :1.000   Max.   :4.000  
##                                                                    
##    Frequency     Departure.Time     Distance      Travel.Period   
##  Min.   :1.000   Min.   :1.000   Min.   : 0.015   Min.   :  0.00  
##  1st Qu.:4.000   1st Qu.:1.000   1st Qu.: 3.000   1st Qu.: 10.00  
##  Median :4.000   Median :1.000   Median : 5.000   Median : 15.00  
##  Mean   :3.548   Mean   :1.843   Mean   : 6.492   Mean   : 17.42  
##  3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:10.000   3rd Qu.: 20.00  
##  Max.   :4.000   Max.   :4.000   Max.   :35.000   Max.   :180.00  
##                                                                   
##  Sidewalk.Clearance Lane.Separate    Temporary.Stop.Number Mode.Choice.Reason
##  Min.   :0.0000     Min.   :0.0000   Min.   :0.0000        Min.   :1.000     
##  1st Qu.:1.0000     1st Qu.:1.0000   1st Qu.:0.0000        1st Qu.:2.000     
##  Median :1.0000     Median :1.0000   Median :0.0000        Median :2.000     
##  Mean   :0.8666     Mean   :0.7898   Mean   :0.6824        Mean   :2.713     
##  3rd Qu.:1.0000     3rd Qu.:1.0000   3rd Qu.:1.0000        3rd Qu.:4.000     
##  Max.   :1.0000     Max.   :1.0000   Max.   :3.0000        Max.   :6.000     
##                                                                              
##     Weather         Weekend       Non.Bus.Reason       Cost        
##  Min.   :1.000   Min.   :0.0000   Min.   :1.000   Min.   :  0.000  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:  3.000  
##  Median :1.000   Median :0.0000   Median :2.000   Median :  5.000  
##  Mean   :1.646   Mean   :0.2444   Mean   :2.912   Mean   :  8.685  
##  3rd Qu.:3.000   3rd Qu.:0.0000   3rd Qu.:4.000   3rd Qu.: 10.000  
##  Max.   :3.000   Max.   :1.0000   Max.   :6.000   Max.   :285.000  
##                                   NA's   :110                      
##  Bus.Stop.Present     Gender            Age         Occupation   
##  Min.   :0.000    Min.   :0.0000   Min.   :1.00   Min.   :1.000  
##  1st Qu.:1.000    1st Qu.:0.0000   1st Qu.:3.00   1st Qu.:2.000  
##  Median :1.000    Median :1.0000   Median :4.00   Median :3.000  
##  Mean   :1.004    Mean   :0.5832   Mean   :3.56   Mean   :4.152  
##  3rd Qu.:1.000    3rd Qu.:1.0000   3rd Qu.:4.00   3rd Qu.:7.000  
##  Max.   :2.000    Max.   :1.0000   Max.   :6.00   Max.   :8.000  
##                                                                  
##      Income      Number.of.Children Motor.Certificate Car.Certificate
##  Min.   :1.000   Min.   :0.0000     Min.   :0.0000    Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:0.0000     1st Qu.:1.0000    1st Qu.:0.000  
##  Median :2.000   Median :1.0000     Median :1.0000    Median :0.000  
##  Mean   :2.205   Mean   :0.7674     Mean   :0.8453    Mean   :0.183  
##  3rd Qu.:3.000   3rd Qu.:1.0000     3rd Qu.:1.0000    3rd Qu.:0.000  
##  Max.   :4.000   Max.   :3.0000     Max.   :1.0000    Max.   :1.000  
##                                                                      
##  Bicycle.Owning    Motor.Owning      Car.Owning     Number.of.Bicycles
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000    
##  1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000    
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :1.0000    
##  Mean   :0.4109   Mean   :0.7851   Mean   :0.1405   Mean   :0.7084    
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:1.0000    
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :3.0000    
##                                                                       
##  Number.of.Motors Number.of.Car   
##  Min.   :0.000    Min.   :0.0000  
##  1st Qu.:2.000    1st Qu.:0.0000  
##  Median :2.000    Median :0.0000  
##  Mean   :2.118    Mean   :0.2456  
##  3rd Qu.:3.000    3rd Qu.:0.0000  
##  Max.   :4.000    Max.   :2.0000  
## 
# Create dat - MotorBus (Bus và xe máy)
datMB <- MC[MC$Travel.Mode %in% c(3,7),]
head(datMB)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency Departure.Time
## 2           3                  1            0       1         4              1
## 3           3                  1            0       1         4              3
## 4           3                  1            0       1         4              1
## 5           3                  1            0       1         4              3
## 8           3                  1            0       1         4              1
## 9           3                  1            0       1         4              2
##   Distance Travel.Period Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## 2        8            15                  1             1                     1
## 3        5            15                  1             1                     1
## 4        5            10                  1             1                     1
## 5        8            15                  1             1                     0
## 8       10            25                  1             1                     0
## 9       12            30                  1             1                     1
##   Mode.Choice.Reason Weather Weekend Non.Bus.Reason Cost Bus.Stop.Present
## 2                  2       3       0              1    8                2
## 3                  4       1       0              2    5                1
## 4                  2       1       0              4    5                1
## 5                  2       3       0              2    8                2
## 8                  2       3       1              2   10                1
## 9                  2       1       0              1   12                1
##   Gender Age Occupation Income Number.of.Children Motor.Certificate
## 2      0   3          6      3                  1                 1
## 3      1   3          2      4                  0                 1
## 4      1   3          2      3                  0                 1
## 5      0   4          6      3                  1                 1
## 8      0   4          7      2                  0                 1
## 9      0   3          7      3                  1                 1
##   Car.Certificate Bicycle.Owning Motor.Owning Car.Owning Number.of.Bicycles
## 2               0              1            1          0                  1
## 3               0              0            1          0                  0
## 4               0              1            1          0                  1
## 5               0              0            1          0                  0
## 8               0              0            1          0                  0
## 9               0              1            1          0                  1
##   Number.of.Motors Number.of.Car
## 2                3             0
## 3                3             0
## 4                3             0
## 5                2             0
## 8                2             0
## 9                3             0
dim(datMB)
## [1] 628  30
datMB$Bus[datMB$Travel.Mode == 3] <- 0
datMB$Bus[datMB$Travel.Mode == 7] <- 1
head(datMB)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency Departure.Time
## 2           3                  1            0       1         4              1
## 3           3                  1            0       1         4              3
## 4           3                  1            0       1         4              1
## 5           3                  1            0       1         4              3
## 8           3                  1            0       1         4              1
## 9           3                  1            0       1         4              2
##   Distance Travel.Period Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## 2        8            15                  1             1                     1
## 3        5            15                  1             1                     1
## 4        5            10                  1             1                     1
## 5        8            15                  1             1                     0
## 8       10            25                  1             1                     0
## 9       12            30                  1             1                     1
##   Mode.Choice.Reason Weather Weekend Non.Bus.Reason Cost Bus.Stop.Present
## 2                  2       3       0              1    8                2
## 3                  4       1       0              2    5                1
## 4                  2       1       0              4    5                1
## 5                  2       3       0              2    8                2
## 8                  2       3       1              2   10                1
## 9                  2       1       0              1   12                1
##   Gender Age Occupation Income Number.of.Children Motor.Certificate
## 2      0   3          6      3                  1                 1
## 3      1   3          2      4                  0                 1
## 4      1   3          2      3                  0                 1
## 5      0   4          6      3                  1                 1
## 8      0   4          7      2                  0                 1
## 9      0   3          7      3                  1                 1
##   Car.Certificate Bicycle.Owning Motor.Owning Car.Owning Number.of.Bicycles
## 2               0              1            1          0                  1
## 3               0              0            1          0                  0
## 4               0              1            1          0                  1
## 5               0              0            1          0                  0
## 8               0              0            1          0                  0
## 9               0              1            1          0                  1
##   Number.of.Motors Number.of.Car Bus
## 2                3             0   0
## 3                3             0   0
## 4                3             0   0
## 5                2             0   0
## 8                2             0   0
## 9                3             0   0
dim(datMB) 
## [1] 628  31
str(datMB)
## 'data.frame':    628 obs. of  31 variables:
##  $ Travel.Mode          : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ Bus.Stop.Condition   : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ Central.Area         : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ Purpose              : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : int  1 3 1 3 1 2 1 1 1 1 ...
##  $ Distance             : num  8 5 5 8 10 12 10 3 2 4 ...
##  $ Travel.Period        : num  15 15 10 15 25 30 25 5 5 10 ...
##  $ Sidewalk.Clearance   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Lane.Separate        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Temporary.Stop.Number: int  1 1 1 0 0 1 1 0 1 0 ...
##  $ Mode.Choice.Reason   : int  2 4 2 2 2 2 2 2 4 3 ...
##  $ Weather              : int  3 1 1 3 3 1 3 1 3 1 ...
##  $ Weekend              : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ Non.Bus.Reason       : int  1 2 4 2 2 1 4 4 4 2 ...
##  $ Cost                 : int  8 5 5 8 10 12 10 3 2 4 ...
##  $ Bus.Stop.Present     : int  2 1 1 2 1 1 1 1 1 1 ...
##  $ Gender               : int  0 1 1 0 0 0 1 1 1 1 ...
##  $ Age                  : int  3 3 3 4 4 3 4 2 4 3 ...
##  $ Occupation           : int  6 2 2 6 7 7 3 1 7 7 ...
##  $ Income               : int  3 4 3 3 2 3 3 2 2 1 ...
##  $ Number.of.Children   : int  1 0 0 1 0 1 0 0 1 0 ...
##  $ Motor.Certificate    : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ Car.Certificate      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bicycle.Owning       : int  1 0 1 0 0 1 0 1 0 0 ...
##  $ Motor.Owning         : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ Car.Owning           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Number.of.Bicycles   : int  1 0 1 0 0 1 0 1 2 0 ...
##  $ Number.of.Motors     : int  3 3 3 2 2 3 3 3 2 1 ...
##  $ Number.of.Car        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bus                  : num  0 0 0 0 0 0 0 0 0 0 ...
# Vẽ biểu đồ reason why choose bus or doesn't choose bus
library(magrittr)
library(tidyverse)
library(ggplot2)
library(car)
## Lấy 1 phần dữ liệu gồm Bus, Mode.Choice.Reason, Non.Bus.Reason
dat1 <- datMB[,c(12,15,31)]
head(dat1)
##   Mode.Choice.Reason Non.Bus.Reason Bus
## 2                  2              1   0
## 3                  4              2   0
## 4                  2              4   0
## 5                  2              2   0
## 8                  2              2   0
## 9                  2              1   0
dim(dat1)
## [1] 628   3
## Dữ liệu sử dụng xe buýt
datBusonly <- subset(dat1, Bus == 1)
dim(datBusonly)
## [1] 110   3
datBusonly$Bus.Choice.Reason <- datBusonly$Mode.Choice.Reason
head(datBusonly)
##     Mode.Choice.Reason Non.Bus.Reason Bus Bus.Choice.Reason
## 60                   1             NA   1                 1
## 68                   3             NA   1                 3
## 128                  1             NA   1                 1
## 247                  2             NA   1                 2
## 280                  2             NA   1                 2
## 344                  1             NA   1                 1
str(datBusonly)
## 'data.frame':    110 obs. of  4 variables:
##  $ Mode.Choice.Reason: int  1 3 1 2 2 1 2 1 1 3 ...
##  $ Non.Bus.Reason    : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Bus               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Bus.Choice.Reason : int  1 3 1 2 2 1 2 1 1 3 ...
## reformat variables in factor variables
attach(datBusonly)
## The following objects are masked from datMB (pos = 39):
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
## The following objects are masked from datMB (pos = 40):
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
datBusonly = within(datBusonly, {
  Bus.Choice.Reason = factor(Bus.Choice.Reason, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "others"))
  } )
head(datBusonly)
##     Mode.Choice.Reason Non.Bus.Reason Bus Bus.Choice.Reason
## 60                   1             NA   1            Safety
## 68                   3             NA   1         Low price
## 128                  1             NA   1            Safety
## 247                  2             NA   1       Comfortable
## 280                  2             NA   1       Comfortable
## 344                  1             NA   1            Safety
str(datBusonly)
## 'data.frame':    110 obs. of  4 variables:
##  $ Mode.Choice.Reason: int  1 3 1 2 2 1 2 1 1 3 ...
##  $ Non.Bus.Reason    : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Bus               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Bus.Choice.Reason : Factor w/ 5 levels "Safety","Comfortable",..: 1 3 1 2 2 1 2 1 1 3 ...
dim(datBusonly)
## [1] 110   4
## Reasons choose bus
p.bus = ggplot(datBusonly, aes(x = Bus.Choice.Reason, fill = Bus.Choice.Reason))
p.bus = p.bus +  geom_bar(width = 0.8, col = "blue")
p.bus = p.bus + theme(legend.position = "none")
p.bus = p.bus + coord_flip()
p.bus 

# Dữ liệu không sử dụng xe buýt - sử dụng xe máy
datMotoronly <- subset(dat1, Bus == 0)
head(datMotoronly)
##   Mode.Choice.Reason Non.Bus.Reason Bus
## 2                  2              1   0
## 3                  4              2   0
## 4                  2              4   0
## 5                  2              2   0
## 8                  2              2   0
## 9                  2              1   0
str(datMotoronly)
## 'data.frame':    518 obs. of  3 variables:
##  $ Mode.Choice.Reason: int  2 4 2 2 2 2 2 2 4 3 ...
##  $ Non.Bus.Reason    : int  1 2 4 2 2 1 4 4 4 2 ...
##  $ Bus               : num  0 0 0 0 0 0 0 0 0 0 ...
attach(datMotoronly)
## The following objects are masked from datBusonly:
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
## 
## The following objects are masked from datMB (pos = 40):
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
## The following objects are masked from datMB (pos = 41):
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
datMotoronly = within(datMotoronly, {
  Non.Bus.Reason = factor(Non.Bus.Reason, labels = c("No Route", "Inconvenient", "Unsafety", "Long waiting time", "Unreliability", "others"))
  } )
str(datMotoronly)
## 'data.frame':    518 obs. of  3 variables:
##  $ Mode.Choice.Reason: int  2 4 2 2 2 2 2 2 4 3 ...
##  $ Non.Bus.Reason    : Factor w/ 6 levels "No Route","Inconvenient",..: 1 2 4 2 2 1 4 4 4 2 ...
##  $ Bus               : num  0 0 0 0 0 0 0 0 0 0 ...
dim(datMotoronly)
## [1] 518   3
## Reasons motor users not choose bus
p.nonbus = ggplot(datMotoronly, aes(x = Non.Bus.Reason, fill = Non.Bus.Reason))
p.nonbus = p.nonbus +  geom_bar(width = 0.8, col = "blue")
p.nonbus = p.nonbus + theme(legend.position = "none")
p.nonbus = p.nonbus + coord_flip()
p.nonbus

## Reasons choose motor
datMotoronly$Motor.Choice.Reason <- datMotoronly$Mode.Choice.Reason
head(datMotoronly)
##   Mode.Choice.Reason    Non.Bus.Reason Bus Motor.Choice.Reason
## 2                  2          No Route   0                   2
## 3                  4      Inconvenient   0                   4
## 4                  2 Long waiting time   0                   2
## 5                  2      Inconvenient   0                   2
## 8                  2      Inconvenient   0                   2
## 9                  2          No Route   0                   2
str(datMotoronly)
## 'data.frame':    518 obs. of  4 variables:
##  $ Mode.Choice.Reason : int  2 4 2 2 2 2 2 2 4 3 ...
##  $ Non.Bus.Reason     : Factor w/ 6 levels "No Route","Inconvenient",..: 1 2 4 2 2 1 4 4 4 2 ...
##  $ Bus                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Motor.Choice.Reason: int  2 4 2 2 2 2 2 2 4 3 ...
dim(datMotoronly)
## [1] 518   4
attach(datMotoronly)
## The following objects are masked from datMotoronly (pos = 3):
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
## The following objects are masked from datBusonly:
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
## The following objects are masked from datMB (pos = 41):
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
## The following objects are masked from datMB (pos = 42):
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
datMotoronly = within(datMotoronly, {
  Motor.Choice.Reason = factor(Motor.Choice.Reason, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "Reliability", "others"))
  } )
head(datMotoronly)
##   Mode.Choice.Reason    Non.Bus.Reason Bus Motor.Choice.Reason
## 2                  2          No Route   0         Comfortable
## 3                  4      Inconvenient   0       Accessibility
## 4                  2 Long waiting time   0         Comfortable
## 5                  2      Inconvenient   0         Comfortable
## 8                  2      Inconvenient   0         Comfortable
## 9                  2          No Route   0         Comfortable
str(datMotoronly)
## 'data.frame':    518 obs. of  4 variables:
##  $ Mode.Choice.Reason : int  2 4 2 2 2 2 2 2 4 3 ...
##  $ Non.Bus.Reason     : Factor w/ 6 levels "No Route","Inconvenient",..: 1 2 4 2 2 1 4 4 4 2 ...
##  $ Bus                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Motor.Choice.Reason: Factor w/ 6 levels "Safety","Comfortable",..: 2 4 2 2 2 2 2 2 4 3 ...
dim(datMotoronly)
## [1] 518   4
p.motor = ggplot(datMotoronly, aes(x = Motor.Choice.Reason, fill = Motor.Choice.Reason))
p.motor = p.motor +  geom_bar(width = 0.8, col = "blue")
p.motor = p.motor + theme(legend.position = "none")
p.motor = p.motor + coord_flip()
p.motor