1. Read and Create Data

t = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 1-Mode choice\\SO LIEU R\\Bus choice in DN.csv"
MC = read.csv(t, header = T)
head(MC)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency
## 1           6                  1            0       3         3
## 2           3                  1            0       1         4
## 3           3                  1            0       1         4
## 4           3                  1            0       1         4
## 5           3                  1            0       1         4
## 6           4                  1            0       1         4
##   Departure.Time Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 1              1        2            10                  1             1
## 2              1        8            15                  1             1
## 3              3        5            15                  1             1
## 4              1        5            10                  1             1
## 5              3        8            15                  1             1
## 6              1       20            30                  1             1
##   Temporary.Stop.Number Mode.Choice.Reason Weather Weekend Non.Bus.Reason
## 1                     0                  4       1       0              1
## 2                     1                  2       3       0              1
## 3                     1                  4       1       0              2
## 4                     1                  2       1       0              4
## 5                     0                  2       3       0              2
## 6                     0                  5       3       0              2
##   Cost Bus.Stop.Present Gender Age Occupation Income Number.of.Children
## 1   12                1      0   5          4      2                  2
## 2    8                2      0   3          6      3                  1
## 3    5                1      1   3          2      4                  0
## 4    5                1      1   3          2      3                  0
## 5    8                2      0   4          6      3                  1
## 6   40                2      1   4          3      4                  2
##   Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1                 0               0              0            0          0
## 2                 1               0              1            1          0
## 3                 1               0              0            1          0
## 4                 1               0              1            1          0
## 5                 1               0              0            1          0
## 6                 1               1              1            1          1
##   Number.of.Bicycles Number.of.Motors Number.of.Car
## 1                  1                2             1
## 2                  1                3             0
## 3                  0                3             0
## 4                  1                3             0
## 5                  0                2             0
## 6                  1                2             1
head(MC$Age)
## [1] 5 3 3 3 4 4
str(MC$Travel.Mode)
##  int [1:809] 6 3 3 3 3 4 4 3 3 3 ...
# View data (number of rows and colume)
dim(MC) 
## [1] 809  30
# Sumerize Data
summary(MC) 
##   Travel.Mode    Bus.Stop.Condition  Central.Area      Purpose     
##  Min.   :1.000   Min.   :0.0000     Min.   :0.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:0.0000     1st Qu.:0.000   1st Qu.:1.000  
##  Median :3.000   Median :1.0000     Median :0.000   Median :1.000  
##  Mean   :3.663   Mean   :0.5117     Mean   :0.471   Mean   :1.576  
##  3rd Qu.:4.000   3rd Qu.:1.0000     3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :1.0000     Max.   :1.000   Max.   :4.000  
##                                                                    
##    Frequency     Departure.Time     Distance      Travel.Period   
##  Min.   :1.000   Min.   :1.000   Min.   : 0.015   Min.   :  0.00  
##  1st Qu.:4.000   1st Qu.:1.000   1st Qu.: 3.000   1st Qu.: 10.00  
##  Median :4.000   Median :1.000   Median : 5.000   Median : 15.00  
##  Mean   :3.555   Mean   :1.832   Mean   : 6.654   Mean   : 17.55  
##  3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:10.000   3rd Qu.: 20.00  
##  Max.   :4.000   Max.   :4.000   Max.   :35.000   Max.   :180.00  
##                                                                   
##  Sidewalk.Clearance Lane.Separate    Temporary.Stop.Number
##  Min.   :0.000      Min.   :0.0000   Min.   :0.0000       
##  1st Qu.:1.000      1st Qu.:1.0000   1st Qu.:0.0000       
##  Median :1.000      Median :1.0000   Median :0.0000       
##  Mean   :0.864      Mean   :0.7886   Mean   :0.6922       
##  3rd Qu.:1.000      3rd Qu.:1.0000   3rd Qu.:1.0000       
##  Max.   :1.000      Max.   :1.0000   Max.   :3.0000       
##                                                           
##  Mode.Choice.Reason    Weather        Weekend       Non.Bus.Reason 
##  Min.   :1.000      Min.   :1.00   Min.   :0.0000   Min.   :1.000  
##  1st Qu.:2.000      1st Qu.:1.00   1st Qu.:0.0000   1st Qu.:2.000  
##  Median :2.000      Median :1.00   Median :0.0000   Median :2.000  
##  Mean   :2.714      Mean   :1.64   Mean   :0.2435   Mean   :2.882  
##  3rd Qu.:4.000      3rd Qu.:3.00   3rd Qu.:0.0000   3rd Qu.:4.000  
##  Max.   :6.000      Max.   :3.00   Max.   :1.0000   Max.   :6.000  
##                                                     NA's   :106    
##       Cost         Bus.Stop.Present     Gender           Age       
##  Min.   :  0.000   Min.   :0.000    Min.   :0.000   Min.   :1.000  
##  1st Qu.:  3.000   1st Qu.:1.000    1st Qu.:0.000   1st Qu.:3.000  
##  Median :  5.000   Median :1.000    Median :1.000   Median :4.000  
##  Mean   :  8.754   Mean   :1.005    Mean   :0.581   Mean   :3.677  
##  3rd Qu.: 10.000   3rd Qu.:1.000    3rd Qu.:1.000   3rd Qu.:4.000  
##  Max.   :285.000   Max.   :2.000    Max.   :1.000   Max.   :6.000  
##                                                                    
##    Occupation        Income      Number.of.Children Motor.Certificate
##  Min.   :1.000   Min.   :1.000   Min.   :0.0000     Min.   :0.0000   
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:0.0000     1st Qu.:1.0000   
##  Median :3.000   Median :2.000   Median :1.0000     Median :1.0000   
##  Mean   :4.298   Mean   :2.206   Mean   :0.7651     Mean   :0.8838   
##  3rd Qu.:7.000   3rd Qu.:3.000   3rd Qu.:1.0000     3rd Qu.:1.0000   
##  Max.   :8.000   Max.   :4.000   Max.   :3.0000     Max.   :1.0000   
##                                                                      
##  Car.Certificate  Bicycle.Owning    Motor.Owning      Car.Owning    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :1.0000   Median :0.0000  
##  Mean   :0.1916   Mean   :0.3956   Mean   :0.8183   Mean   :0.1471  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##  Number.of.Bicycles Number.of.Motors Number.of.Car   
##  Min.   :0.0000     Min.   :0.00     Min.   :0.0000  
##  1st Qu.:0.0000     1st Qu.:2.00     1st Qu.:0.0000  
##  Median :1.0000     Median :2.00     Median :0.0000  
##  Mean   :0.6873     Mean   :2.13     Mean   :0.2447  
##  3rd Qu.:1.0000     3rd Qu.:3.00     3rd Qu.:0.0000  
##  Max.   :3.0000     Max.   :4.00     Max.   :2.0000  
## 
# Create new variable - Bus 
MC$Bus[MC$Travel.Mode < 7] <- 0
MC$Bus[MC$Travel.Mode == 7] <- 1
# Đổi biến Age
MC$Age[MC$Age == 2] <- 1
MC$Age[MC$Age == 3] <- 1
MC$Age[MC$Age == 4] <- 2
MC$Age[MC$Age == 5] <- 2
MC$Age[MC$Age == 6] <- 3
# Đổi biến Occupation
MC$Occupation[MC$Occupation == 2] <- 1
MC$Occupation[MC$Occupation == 3] <- 2
MC$Occupation[MC$Occupation == 6] <- 2
MC$Occupation[MC$Occupation == 4] <- 3
MC$Occupation[MC$Occupation == 5] <- 3
MC$Occupation[MC$Occupation == 7] <- 4
MC$Occupation[MC$Occupation == 8] <- 5
# Đổi biến Temporary Stop Number
MC$Temporary.Stop.Number[MC$Temporary.Stop.Number == 3] <- 2
head(MC)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency
## 1           6                  1            0       3         3
## 2           3                  1            0       1         4
## 3           3                  1            0       1         4
## 4           3                  1            0       1         4
## 5           3                  1            0       1         4
## 6           4                  1            0       1         4
##   Departure.Time Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 1              1        2            10                  1             1
## 2              1        8            15                  1             1
## 3              3        5            15                  1             1
## 4              1        5            10                  1             1
## 5              3        8            15                  1             1
## 6              1       20            30                  1             1
##   Temporary.Stop.Number Mode.Choice.Reason Weather Weekend Non.Bus.Reason
## 1                     0                  4       1       0              1
## 2                     1                  2       3       0              1
## 3                     1                  4       1       0              2
## 4                     1                  2       1       0              4
## 5                     0                  2       3       0              2
## 6                     0                  5       3       0              2
##   Cost Bus.Stop.Present Gender Age Occupation Income Number.of.Children
## 1   12                1      0   2          3      2                  2
## 2    8                2      0   1          2      3                  1
## 3    5                1      1   1          1      4                  0
## 4    5                1      1   1          1      3                  0
## 5    8                2      0   2          2      3                  1
## 6   40                2      1   2          2      4                  2
##   Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1                 0               0              0            0          0
## 2                 1               0              1            1          0
## 3                 1               0              0            1          0
## 4                 1               0              1            1          0
## 5                 1               0              0            1          0
## 6                 1               1              1            1          1
##   Number.of.Bicycles Number.of.Motors Number.of.Car Bus
## 1                  1                2             1   0
## 2                  1                3             0   0
## 3                  0                3             0   0
## 4                  1                3             0   0
## 5                  0                2             0   0
## 6                  1                2             1   0
dim(MC)
## [1] 809  31
str(MC)
## 'data.frame':    809 obs. of  31 variables:
##  $ Travel.Mode          : int  6 3 3 3 3 4 4 3 3 3 ...
##  $ Bus.Stop.Condition   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Central.Area         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Purpose              : int  3 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : int  3 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : int  1 1 3 1 3 1 1 1 2 1 ...
##  $ Distance             : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ Travel.Period        : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ Sidewalk.Clearance   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Lane.Separate        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Temporary.Stop.Number: num  0 1 1 1 0 0 0 0 1 1 ...
##  $ Mode.Choice.Reason   : int  4 2 4 2 2 5 5 2 2 2 ...
##  $ Weather              : int  1 3 1 1 3 3 3 3 1 3 ...
##  $ Weekend              : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ Non.Bus.Reason       : int  1 1 2 4 2 2 4 2 1 4 ...
##  $ Cost                 : int  12 8 5 5 8 40 30 10 12 10 ...
##  $ Bus.Stop.Present     : int  1 2 1 1 2 2 2 1 1 1 ...
##  $ Gender               : int  0 0 1 1 0 1 1 0 0 1 ...
##  $ Age                  : num  2 1 1 1 2 2 2 2 1 2 ...
##  $ Occupation           : num  3 2 1 1 2 2 2 4 4 2 ...
##  $ Income               : int  2 3 4 3 3 4 4 2 3 3 ...
##  $ Number.of.Children   : int  2 1 0 0 1 2 2 0 1 0 ...
##  $ Motor.Certificate    : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Car.Certificate      : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Bicycle.Owning       : int  0 1 0 1 0 1 0 0 1 0 ...
##  $ Motor.Owning         : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Car.Owning           : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Number.of.Bicycles   : int  1 1 0 1 0 1 0 0 1 0 ...
##  $ Number.of.Motors     : int  2 3 3 3 2 2 2 2 3 3 ...
##  $ Number.of.Car        : int  1 0 0 0 0 1 1 0 0 0 ...
##  $ Bus                  : num  0 0 0 0 0 0 0 0 0 0 ...
# reformat variables in factor variables
attach(MC)
MC = within(MC, {
  Travel.Mode = factor(Travel.Mode, labels = c("Walk", "Bicycle", "Motorbike", "Car", "Hichhiking", "App-based Motor/Car", "Bus"))
  Bus.Stop.Condition = factor(Bus.Stop.Condition,labels = c("No", "Yes"))
  Central.Area = factor(Central.Area, labels = c("No", "Yes"))
  Purpose = factor(Purpose, labels = c("Work/Study", "Picking Children", "Entertainment", "Others"))
  Frequency = factor(Frequency, labels = c("Once", "2 times", "3 times", "> 3 times"))
  Departure.Time = factor(Departure.Time, labels = c("Morning", "Afternoon", "Evening", "Others"))
  Sidewalk.Clearance = factor(Sidewalk.Clearance, labels = c("No", "Yes"))
  Lane.Separate = factor(Lane.Separate, labels = c("No", "Yes"))
  Temporary.Stop.Number = factor(Temporary.Stop.Number, labels = c("None", "1 stop", ">=2 stops"))
  Mode.Choice.Reason = factor(Mode.Choice.Reason, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "Reliability", "others"))
  Weather = factor(Weather, labels = c("Sunny", "Rainny", "Cool"))
  Weekend = factor(Weekend, labels = c("No", "Yes"))
  Non.Bus.Reason = factor(Non.Bus.Reason, labels = c("No Route", "Uncomfortable", "Unsafety", "Long waiting time", "Unreliability", "others"))
  Bus.Stop.Present = factor(Bus.Stop.Present, labels = c("No", "Yes", "Don't know"))
  Gender = factor(Gender, labels = c("Female", "Male"))
  Age = factor(Age, labels = c("15-24", "25-60", ">60"))
  Occupation = factor(Occupation, labels = c("Pupils/Students", "Officers/Workers", "Housewife/Unemployed", "Free labor", "Others"))
  Number.of.Children = factor(Number.of.Children, labels = c("None", "1 child", "2 children", ">= 3 children"))
  Motor.Certificate = factor(Motor.Certificate, labels = c("No", "Yes"))
  Car.Certificate = factor(Car.Certificate, labels = c("No", "Yes"))
  Bicycle.Owning = factor(Bicycle.Owning, labels = c("No", "Yes"))
  Motor.Owning = factor(Motor.Owning, labels = c("No", "Yes"))
  Car.Owning = factor(Car.Owning, labels = c("No", "Yes"))
  Number.of.Bicycles = factor(Number.of.Bicycles, labels = c("None", "1", "2", ">=3"))
  Number.of.Motors = factor(Number.of.Motors, labels = c("None", "1", "2", "3", ">3"))
  Number.of.Car = factor(Number.of.Car, labels = c("None", "1", ">=2"))
  Income = factor(Income, labels = c("<8 millions", "(8-15) millions", "(15-25) millions",  ">25 millions"))
  Bus = factor(Bus, labels = c("No", "Yes"))
  Distance = as.numeric(Distance)
  Travel.Period = as.numeric(Travel.Period)
  Cost = as.numeric(Cost)
    } )
head(MC)
##           Travel.Mode Bus.Stop.Condition Central.Area       Purpose
## 1 App-based Motor/Car                Yes           No Entertainment
## 2           Motorbike                Yes           No    Work/Study
## 3           Motorbike                Yes           No    Work/Study
## 4           Motorbike                Yes           No    Work/Study
## 5           Motorbike                Yes           No    Work/Study
## 6                 Car                Yes           No    Work/Study
##   Frequency Departure.Time Distance Travel.Period Sidewalk.Clearance
## 1   3 times        Morning        2            10                Yes
## 2 > 3 times        Morning        8            15                Yes
## 3 > 3 times        Evening        5            15                Yes
## 4 > 3 times        Morning        5            10                Yes
## 5 > 3 times        Evening        8            15                Yes
## 6 > 3 times        Morning       20            30                Yes
##   Lane.Separate Temporary.Stop.Number Mode.Choice.Reason Weather Weekend
## 1           Yes                  None      Accessibility   Sunny      No
## 2           Yes                1 stop        Comfortable    Cool      No
## 3           Yes                1 stop      Accessibility   Sunny      No
## 4           Yes                1 stop        Comfortable   Sunny      No
## 5           Yes                  None        Comfortable    Cool      No
## 6           Yes                  None        Reliability    Cool      No
##      Non.Bus.Reason Cost Bus.Stop.Present Gender   Age
## 1          No Route   12              Yes Female 25-60
## 2          No Route    8       Don't know Female 15-24
## 3     Uncomfortable    5              Yes   Male 15-24
## 4 Long waiting time    5              Yes   Male 15-24
## 5     Uncomfortable    8       Don't know Female 25-60
## 6     Uncomfortable   40       Don't know   Male 25-60
##             Occupation           Income Number.of.Children
## 1 Housewife/Unemployed  (8-15) millions         2 children
## 2     Officers/Workers (15-25) millions            1 child
## 3      Pupils/Students     >25 millions               None
## 4      Pupils/Students (15-25) millions               None
## 5     Officers/Workers (15-25) millions            1 child
## 6     Officers/Workers     >25 millions         2 children
##   Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1                No              No             No           No         No
## 2               Yes              No            Yes          Yes         No
## 3               Yes              No             No          Yes         No
## 4               Yes              No            Yes          Yes         No
## 5               Yes              No             No          Yes         No
## 6               Yes             Yes            Yes          Yes        Yes
##   Number.of.Bicycles Number.of.Motors Number.of.Car Bus
## 1                  1                2             1  No
## 2                  1                3          None  No
## 3               None                3          None  No
## 4                  1                3          None  No
## 5               None                2          None  No
## 6                  1                2             1  No
str(MC)
## 'data.frame':    809 obs. of  31 variables:
##  $ Travel.Mode          : Factor w/ 7 levels "Walk","Bicycle",..: 6 3 3 3 3 4 4 3 3 3 ...
##  $ Bus.Stop.Condition   : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Central.Area         : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Purpose              : Factor w/ 4 levels "Work/Study","Picking Children",..: 3 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : Factor w/ 4 levels "Once","2 times",..: 3 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : Factor w/ 4 levels "Morning","Afternoon",..: 1 1 3 1 3 1 1 1 2 1 ...
##  $ Distance             : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ Travel.Period        : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ Sidewalk.Clearance   : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Lane.Separate        : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Temporary.Stop.Number: Factor w/ 3 levels "None","1 stop",..: 1 2 2 2 1 1 1 1 2 2 ...
##  $ Mode.Choice.Reason   : Factor w/ 6 levels "Safety","Comfortable",..: 4 2 4 2 2 5 5 2 2 2 ...
##  $ Weather              : Factor w/ 3 levels "Sunny","Rainny",..: 1 3 1 1 3 3 3 3 1 3 ...
##  $ Weekend              : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
##  $ Non.Bus.Reason       : Factor w/ 6 levels "No Route","Uncomfortable",..: 1 1 2 4 2 2 4 2 1 4 ...
##  $ Cost                 : num  12 8 5 5 8 40 30 10 12 10 ...
##  $ Bus.Stop.Present     : Factor w/ 3 levels "No","Yes","Don't know": 2 3 2 2 3 3 3 2 2 2 ...
##  $ Gender               : Factor w/ 2 levels "Female","Male": 1 1 2 2 1 2 2 1 1 2 ...
##  $ Age                  : Factor w/ 3 levels "15-24","25-60",..: 2 1 1 1 2 2 2 2 1 2 ...
##  $ Occupation           : Factor w/ 5 levels "Pupils/Students",..: 3 2 1 1 2 2 2 4 4 2 ...
##  $ Income               : Factor w/ 4 levels "<8 millions",..: 2 3 4 3 3 4 4 2 3 3 ...
##  $ Number.of.Children   : Factor w/ 4 levels "None","1 child",..: 3 2 1 1 2 3 3 1 2 1 ...
##  $ Motor.Certificate    : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
##  $ Car.Certificate      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
##  $ Bicycle.Owning       : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 2 1 1 2 1 ...
##  $ Motor.Owning         : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
##  $ Car.Owning           : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
##  $ Number.of.Bicycles   : Factor w/ 4 levels "None","1","2",..: 2 2 1 2 1 2 1 1 2 1 ...
##  $ Number.of.Motors     : Factor w/ 5 levels "None","1","2",..: 3 4 4 4 3 3 3 3 4 4 ...
##  $ Number.of.Car        : Factor w/ 3 levels "None","1",">=2": 2 1 1 1 1 2 2 1 1 1 ...
##  $ Bus                  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
# Create new data - datBus (Bus và tất cả các loại phương tiện còn lại)
datBus <- MC[, c(2:31)]
head(datBus)
##   Bus.Stop.Condition Central.Area       Purpose Frequency Departure.Time
## 1                Yes           No Entertainment   3 times        Morning
## 2                Yes           No    Work/Study > 3 times        Morning
## 3                Yes           No    Work/Study > 3 times        Evening
## 4                Yes           No    Work/Study > 3 times        Morning
## 5                Yes           No    Work/Study > 3 times        Evening
## 6                Yes           No    Work/Study > 3 times        Morning
##   Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 1        2            10                Yes           Yes
## 2        8            15                Yes           Yes
## 3        5            15                Yes           Yes
## 4        5            10                Yes           Yes
## 5        8            15                Yes           Yes
## 6       20            30                Yes           Yes
##   Temporary.Stop.Number Mode.Choice.Reason Weather Weekend
## 1                  None      Accessibility   Sunny      No
## 2                1 stop        Comfortable    Cool      No
## 3                1 stop      Accessibility   Sunny      No
## 4                1 stop        Comfortable   Sunny      No
## 5                  None        Comfortable    Cool      No
## 6                  None        Reliability    Cool      No
##      Non.Bus.Reason Cost Bus.Stop.Present Gender   Age
## 1          No Route   12              Yes Female 25-60
## 2          No Route    8       Don't know Female 15-24
## 3     Uncomfortable    5              Yes   Male 15-24
## 4 Long waiting time    5              Yes   Male 15-24
## 5     Uncomfortable    8       Don't know Female 25-60
## 6     Uncomfortable   40       Don't know   Male 25-60
##             Occupation           Income Number.of.Children
## 1 Housewife/Unemployed  (8-15) millions         2 children
## 2     Officers/Workers (15-25) millions            1 child
## 3      Pupils/Students     >25 millions               None
## 4      Pupils/Students (15-25) millions               None
## 5     Officers/Workers (15-25) millions            1 child
## 6     Officers/Workers     >25 millions         2 children
##   Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1                No              No             No           No         No
## 2               Yes              No            Yes          Yes         No
## 3               Yes              No             No          Yes         No
## 4               Yes              No            Yes          Yes         No
## 5               Yes              No             No          Yes         No
## 6               Yes             Yes            Yes          Yes        Yes
##   Number.of.Bicycles Number.of.Motors Number.of.Car Bus
## 1                  1                2             1  No
## 2                  1                3          None  No
## 3               None                3          None  No
## 4                  1                3          None  No
## 5               None                2          None  No
## 6                  1                2             1  No
str(datBus)
## 'data.frame':    809 obs. of  30 variables:
##  $ Bus.Stop.Condition   : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Central.Area         : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Purpose              : Factor w/ 4 levels "Work/Study","Picking Children",..: 3 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : Factor w/ 4 levels "Once","2 times",..: 3 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : Factor w/ 4 levels "Morning","Afternoon",..: 1 1 3 1 3 1 1 1 2 1 ...
##  $ Distance             : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ Travel.Period        : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ Sidewalk.Clearance   : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Lane.Separate        : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Temporary.Stop.Number: Factor w/ 3 levels "None","1 stop",..: 1 2 2 2 1 1 1 1 2 2 ...
##  $ Mode.Choice.Reason   : Factor w/ 6 levels "Safety","Comfortable",..: 4 2 4 2 2 5 5 2 2 2 ...
##  $ Weather              : Factor w/ 3 levels "Sunny","Rainny",..: 1 3 1 1 3 3 3 3 1 3 ...
##  $ Weekend              : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
##  $ Non.Bus.Reason       : Factor w/ 6 levels "No Route","Uncomfortable",..: 1 1 2 4 2 2 4 2 1 4 ...
##  $ Cost                 : num  12 8 5 5 8 40 30 10 12 10 ...
##  $ Bus.Stop.Present     : Factor w/ 3 levels "No","Yes","Don't know": 2 3 2 2 3 3 3 2 2 2 ...
##  $ Gender               : Factor w/ 2 levels "Female","Male": 1 1 2 2 1 2 2 1 1 2 ...
##  $ Age                  : Factor w/ 3 levels "15-24","25-60",..: 2 1 1 1 2 2 2 2 1 2 ...
##  $ Occupation           : Factor w/ 5 levels "Pupils/Students",..: 3 2 1 1 2 2 2 4 4 2 ...
##  $ Income               : Factor w/ 4 levels "<8 millions",..: 2 3 4 3 3 4 4 2 3 3 ...
##  $ Number.of.Children   : Factor w/ 4 levels "None","1 child",..: 3 2 1 1 2 3 3 1 2 1 ...
##  $ Motor.Certificate    : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
##  $ Car.Certificate      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
##  $ Bicycle.Owning       : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 2 1 1 2 1 ...
##  $ Motor.Owning         : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
##  $ Car.Owning           : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
##  $ Number.of.Bicycles   : Factor w/ 4 levels "None","1","2",..: 2 2 1 2 1 2 1 1 2 1 ...
##  $ Number.of.Motors     : Factor w/ 5 levels "None","1","2",..: 3 4 4 4 3 3 3 3 4 4 ...
##  $ Number.of.Car        : Factor w/ 3 levels "None","1",">=2": 2 1 1 1 1 2 2 1 1 1 ...
##  $ Bus                  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
dim(datBus)
## [1] 809  30

2. Descriptive analyse

# Descriptive analyse with package "table 1" - sort by Bus variable
  ## install.packages("table1", dependencies = T)
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1 (~ Bus.Stop.Condition + Central.Area + Purpose + Frequency + Departure.Time + Sidewalk.Clearance + Lane.Separate + Temporary.Stop.Number + Mode.Choice.Reason + Weather + Weekend + Non.Bus.Reason + Bus.Stop.Present + Gender + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Bicycle.Owning + Motor.Owning + Car.Owning + Number.of.Bicycles + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost| Bus , data = datBus)
No
(n=703)
Yes
(n=106)
Overall
(n=809)
Bus.Stop.Condition
No 334 (47.5%) 61 (57.5%) 395 (48.8%)
Yes 369 (52.5%) 45 (42.5%) 414 (51.2%)
Central.Area
No 387 (55.0%) 41 (38.7%) 428 (52.9%)
Yes 316 (45.0%) 65 (61.3%) 381 (47.1%)
Purpose
Work/Study 463 (65.9%) 77 (72.6%) 540 (66.7%)
Picking Children 85 (12.1%) 9 (8.5%) 94 (11.6%)
Entertainment 140 (19.9%) 13 (12.3%) 153 (18.9%)
Others 15 (2.1%) 7 (6.6%) 22 (2.7%)
Frequency
Once 42 (6.0%) 13 (12.3%) 55 (6.8%)
2 times 44 (6.3%) 18 (17.0%) 62 (7.7%)
3 times 52 (7.4%) 19 (17.9%) 71 (8.8%)
> 3 times 565 (80.4%) 56 (52.8%) 621 (76.8%)
Departure.Time
Morning 425 (60.5%) 71 (67.0%) 496 (61.3%)
Afternoon 55 (7.8%) 11 (10.4%) 66 (8.2%)
Evening 117 (16.6%) 17 (16.0%) 134 (16.6%)
Others 106 (15.1%) 7 (6.6%) 113 (14.0%)
Sidewalk.Clearance
No 95 (13.5%) 15 (14.2%) 110 (13.6%)
Yes 608 (86.5%) 91 (85.8%) 699 (86.4%)
Lane.Separate
No 151 (21.5%) 20 (18.9%) 171 (21.1%)
Yes 552 (78.5%) 86 (81.1%) 638 (78.9%)
Temporary.Stop.Number
None 349 (49.6%) 77 (72.6%) 426 (52.7%)
1 stop 255 (36.3%) 10 (9.4%) 265 (32.8%)
>=2 stops 99 (14.1%) 19 (17.9%) 118 (14.6%)
Mode.Choice.Reason
Safety 84 (11.9%) 31 (29.2%) 115 (14.2%)
Comfortable 324 (46.1%) 39 (36.8%) 363 (44.9%)
Low price 40 (5.7%) 33 (31.1%) 73 (9.0%)
Accessibility 177 (25.2%) 1 (0.9%) 178 (22.0%)
Reliability 56 (8.0%) 0 (0%) 56 (6.9%)
others 22 (3.1%) 2 (1.9%) 24 (3.0%)
Weather
Sunny 469 (66.7%) 72 (67.9%) 541 (66.9%)
Rainny 10 (1.4%) 8 (7.5%) 18 (2.2%)
Cool 224 (31.9%) 26 (24.5%) 250 (30.9%)
Weekend
No 528 (75.1%) 84 (79.2%) 612 (75.6%)
Yes 175 (24.9%) 22 (20.8%) 197 (24.4%)
Non.Bus.Reason
No Route 159 (22.6%) 0 (0%) 159 (19.7%)
Uncomfortable 218 (31.0%) 0 (0%) 218 (26.9%)
Unsafety 18 (2.6%) 0 (0%) 18 (2.2%)
Long waiting time 204 (29.0%) 0 (0%) 204 (25.2%)
Unreliability 63 (9.0%) 0 (0%) 63 (7.8%)
others 41 (5.8%) 0 (0%) 41 (5.1%)
Missing 0 (0%) 106 (100%) 106 (13.1%)
Bus.Stop.Present
No 64 (9.1%) 17 (16.0%) 81 (10.0%)
Yes 559 (79.5%) 84 (79.2%) 643 (79.5%)
Don't know 80 (11.4%) 5 (4.7%) 85 (10.5%)
Gender
Female 297 (42.2%) 42 (39.6%) 339 (41.9%)
Male 406 (57.8%) 64 (60.4%) 470 (58.1%)
Age
15-24 296 (42.1%) 60 (56.6%) 356 (44.0%)
25-60 389 (55.3%) 26 (24.5%) 415 (51.3%)
>60 18 (2.6%) 20 (18.9%) 38 (4.7%)
Occupation
Pupils/Students 234 (33.3%) 50 (47.2%) 284 (35.1%)
Officers/Workers 204 (29.0%) 14 (13.2%) 218 (26.9%)
Housewife/Unemployed 42 (6.0%) 12 (11.3%) 54 (6.7%)
Free labor 172 (24.5%) 12 (11.3%) 184 (22.7%)
Others 51 (7.3%) 18 (17.0%) 69 (8.5%)
Income
<8 millions 151 (21.5%) 48 (45.3%) 199 (24.6%)
(8-15) millions 297 (42.2%) 29 (27.4%) 326 (40.3%)
(15-25) millions 176 (25.0%) 26 (24.5%) 202 (25.0%)
>25 millions 79 (11.2%) 3 (2.8%) 82 (10.1%)
Number.of.Children
None 295 (42.0%) 63 (59.4%) 358 (44.3%)
1 child 285 (40.5%) 23 (21.7%) 308 (38.1%)
2 children 100 (14.2%) 18 (17.0%) 118 (14.6%)
>= 3 children 23 (3.3%) 2 (1.9%) 25 (3.1%)
Motor.Certificate
No 59 (8.4%) 35 (33.0%) 94 (11.6%)
Yes 644 (91.6%) 71 (67.0%) 715 (88.4%)
Car.Certificate
No 554 (78.8%) 100 (94.3%) 654 (80.8%)
Yes 149 (21.2%) 6 (5.7%) 155 (19.2%)
Bicycle.Owning
No 429 (61.0%) 60 (56.6%) 489 (60.4%)
Yes 274 (39.0%) 46 (43.4%) 320 (39.6%)
Motor.Owning
No 99 (14.1%) 48 (45.3%) 147 (18.2%)
Yes 604 (85.9%) 58 (54.7%) 662 (81.8%)
Car.Owning
No 588 (83.6%) 102 (96.2%) 690 (85.3%)
Yes 115 (16.4%) 4 (3.8%) 119 (14.7%)
Number.of.Bicycles
None 326 (46.4%) 38 (35.8%) 364 (45.0%)
1 287 (40.8%) 58 (54.7%) 345 (42.6%)
2 80 (11.4%) 9 (8.5%) 89 (11.0%)
>=3 10 (1.4%) 1 (0.9%) 11 (1.4%)
Number.of.Motors
None 19 (2.7%) 13 (12.3%) 32 (4.0%)
1 102 (14.5%) 26 (24.5%) 128 (15.8%)
2 356 (50.6%) 48 (45.3%) 404 (49.9%)
3 177 (25.2%) 16 (15.1%) 193 (23.9%)
>3 49 (7.0%) 3 (2.8%) 52 (6.4%)
Number.of.Car
None 533 (75.8%) 97 (91.5%) 630 (77.9%)
1 152 (21.6%) 8 (7.5%) 160 (19.8%)
>=2 18 (2.6%) 1 (0.9%) 19 (2.3%)
Distance
Mean (SD) 6.36 (5.52) 8.59 (4.77) 6.65 (5.48)
Median [Min, Max] 5.00 [0.0150, 35.0] 8.00 [0.500, 25.0] 5.00 [0.0150, 35.0]
Travel.Period
Mean (SD) 16.9 (15.6) 21.6 (12.5) 17.5 (15.3)
Median [Min, Max] 15.0 [0.00, 180] 20.0 [3.00, 60.0] 15.0 [0.00, 180]
Cost
Mean (SD) 9.28 (17.2) 5.24 (1.07) 8.75 (16.1)
Median [Min, Max] 5.00 [0.00, 285] 5.00 [5.00, 10.0] 5.00 [0.00, 285]
# Descriptive analyse with package "compareGroups" - sort by Bus variable (hon table1 la cung cap them tri so p. Doi voi bien phan loai, tri so p cung duoc tinh theo Chi-Square)
library(compareGroups)
## Loading required package: SNPassoc
## Loading required package: haplo.stats
## Loading required package: survival
## Loading required package: mvtnorm
## Loading required package: parallel
## Registered S3 method overwritten by 'SNPassoc':
##   method            from       
##   summary.haplo.glm haplo.stats
t = compareGroups(Bus ~ Bus.Stop.Condition + Central.Area + Purpose + Frequency + Departure.Time + Sidewalk.Clearance + Lane.Separate + Temporary.Stop.Number + Mode.Choice.Reason + Weather + Weekend + Non.Bus.Reason + Bus.Stop.Present + Gender + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Bicycle.Owning + Motor.Owning + Car.Owning + Number.of.Bicycles + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, data = datBus)
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## [1] "Error in binom.test(nn[i, j], n.ij, conf.level = conf.level) : \n  'n' must be a positive integer >= 'x'\n"
## attr(,"class")
## [1] "try-error"
## attr(,"condition")
## <simpleError in binom.test(nn[i, j], n.ij, conf.level = conf.level): 'n' must be a positive integer >= 'x'>
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in compareGroups.fit(X = X, y = y, include.label = include.label, :
## Variables 'Non.Bus.Reason' have been removed since some errors occurred
createTable(t)
## 
## --------Summary descriptives table by 'Bus'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=703       N=106              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## Bus.Stop.Condition:                                0.068   
##     No                   334 (47.5%) 61 (57.5%)            
##     Yes                  369 (52.5%) 45 (42.5%)            
## Central.Area:                                      0.002   
##     No                   387 (55.0%) 41 (38.7%)            
##     Yes                  316 (45.0%) 65 (61.3%)            
## Purpose:                                           0.014   
##     Work/Study           463 (65.9%) 77 (72.6%)            
##     Picking Children     85 (12.1%)   9 (8.49%)            
##     Entertainment        140 (19.9%) 13 (12.3%)            
##     Others               15 (2.13%)   7 (6.60%)            
## Frequency:                                        <0.001   
##     Once                 42 (5.97%)  13 (12.3%)            
##     2 times              44 (6.26%)  18 (17.0%)            
##     3 times              52 (7.40%)  19 (17.9%)            
##     > 3 times            565 (80.4%) 56 (52.8%)            
## Departure.Time:                                    0.105   
##     Morning              425 (60.5%) 71 (67.0%)            
##     Afternoon            55 (7.82%)  11 (10.4%)            
##     Evening              117 (16.6%) 17 (16.0%)            
##     Others               106 (15.1%)  7 (6.60%)            
## Sidewalk.Clearance:                                0.979   
##     No                   95 (13.5%)  15 (14.2%)            
##     Yes                  608 (86.5%) 91 (85.8%)            
## Lane.Separate:                                     0.627   
##     No                   151 (21.5%) 20 (18.9%)            
##     Yes                  552 (78.5%) 86 (81.1%)            
## Temporary.Stop.Number:                            <0.001   
##     None                 349 (49.6%) 77 (72.6%)            
##     1 stop               255 (36.3%) 10 (9.43%)            
##     >=2 stops            99 (14.1%)  19 (17.9%)            
## Mode.Choice.Reason:                                  .     
##     Safety               84 (11.9%)  31 (29.2%)            
##     Comfortable          324 (46.1%) 39 (36.8%)            
##     Low price            40 (5.69%)  33 (31.1%)            
##     Accessibility        177 (25.2%)  1 (0.94%)            
##     Reliability          56 (7.97%)   0 (0.00%)            
##     others               22 (3.13%)   2 (1.89%)            
## Weather:                                           0.001   
##     Sunny                469 (66.7%) 72 (67.9%)            
##     Rainny               10 (1.42%)   8 (7.55%)            
##     Cool                 224 (31.9%) 26 (24.5%)            
## Weekend:                                           0.421   
##     No                   528 (75.1%) 84 (79.2%)            
##     Yes                  175 (24.9%) 22 (20.8%)            
## Bus.Stop.Present:                                  0.016   
##     No                   64 (9.10%)  17 (16.0%)            
##     Yes                  559 (79.5%) 84 (79.2%)            
##     Don't know           80 (11.4%)   5 (4.72%)            
## Gender:                                            0.685   
##     Female               297 (42.2%) 42 (39.6%)            
##     Male                 406 (57.8%) 64 (60.4%)            
## Age:                                              <0.001   
##     15-24                296 (42.1%) 60 (56.6%)            
##     25-60                389 (55.3%) 26 (24.5%)            
##     >60                  18 (2.56%)  20 (18.9%)            
## Occupation:                                       <0.001   
##     Pupils/Students      234 (33.3%) 50 (47.2%)            
##     Officers/Workers     204 (29.0%) 14 (13.2%)            
##     Housewife/Unemployed 42 (5.97%)  12 (11.3%)            
##     Free labor           172 (24.5%) 12 (11.3%)            
##     Others               51 (7.25%)  18 (17.0%)            
## Income:                                           <0.001   
##     <8 millions          151 (21.5%) 48 (45.3%)            
##     (8-15) millions      297 (42.2%) 29 (27.4%)            
##     (15-25) millions     176 (25.0%) 26 (24.5%)            
##     >25 millions         79 (11.2%)   3 (2.83%)            
## Number.of.Children:                                0.001   
##     None                 295 (42.0%) 63 (59.4%)            
##     1 child              285 (40.5%) 23 (21.7%)            
##     2 children           100 (14.2%) 18 (17.0%)            
##     >= 3 children        23 (3.27%)   2 (1.89%)            
## Motor.Certificate:                                <0.001   
##     No                   59 (8.39%)  35 (33.0%)            
##     Yes                  644 (91.6%) 71 (67.0%)            
## Car.Certificate:                                  <0.001   
##     No                   554 (78.8%) 100 (94.3%)           
##     Yes                  149 (21.2%)  6 (5.66%)            
## Bicycle.Owning:                                    0.447   
##     No                   429 (61.0%) 60 (56.6%)            
##     Yes                  274 (39.0%) 46 (43.4%)            
## Motor.Owning:                                     <0.001   
##     No                   99 (14.1%)  48 (45.3%)            
##     Yes                  604 (85.9%) 58 (54.7%)            
## Car.Owning:                                        0.001   
##     No                   588 (83.6%) 102 (96.2%)           
##     Yes                  115 (16.4%)  4 (3.77%)            
## Number.of.Bicycles:                                0.065   
##     None                 326 (46.4%) 38 (35.8%)            
##     1                    287 (40.8%) 58 (54.7%)            
##     2                    80 (11.4%)   9 (8.49%)            
##     >=3                  10 (1.42%)   1 (0.94%)            
## Number.of.Motors:                                 <0.001   
##     None                 19 (2.70%)  13 (12.3%)            
##     1                    102 (14.5%) 26 (24.5%)            
##     2                    356 (50.6%) 48 (45.3%)            
##     3                    177 (25.2%) 16 (15.1%)            
##     >3                   49 (6.97%)   3 (2.83%)            
## Number.of.Car:                                     0.001   
##     None                 533 (75.8%) 97 (91.5%)            
##     1                    152 (21.6%)  8 (7.55%)            
##     >=2                  18 (2.56%)   1 (0.94%)            
## Distance                 6.36 (5.52) 8.59 (4.77)  <0.001   
## Travel.Period            16.9 (15.6) 21.6 (12.5)   0.001   
## Cost                     9.28 (17.2) 5.24 (1.07)  <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05: Bus.Stop.Condition, Departure.Time, Sidewalk.Clearance, Lane.Separate, Mode.Choice.Reason, Non.Bus.Reason, Weekend, Gender, Bicycle.Owning, Number.of.Bicycles
t1 = compareGroups(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, data = datBus)
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect

## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
createTable(t1)
## 
## --------Summary descriptives table by 'Bus'---------
## 
## __________________________________________________________ 
##                              No          Yes     p.overall 
##                             N=703       N=106              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## Central.Area:                                      0.002   
##     No                   387 (55.0%) 41 (38.7%)            
##     Yes                  316 (45.0%) 65 (61.3%)            
## Purpose:                                           0.014   
##     Work/Study           463 (65.9%) 77 (72.6%)            
##     Picking Children     85 (12.1%)   9 (8.49%)            
##     Entertainment        140 (19.9%) 13 (12.3%)            
##     Others               15 (2.13%)   7 (6.60%)            
## Frequency:                                        <0.001   
##     Once                 42 (5.97%)  13 (12.3%)            
##     2 times              44 (6.26%)  18 (17.0%)            
##     3 times              52 (7.40%)  19 (17.9%)            
##     > 3 times            565 (80.4%) 56 (52.8%)            
## Temporary.Stop.Number:                            <0.001   
##     None                 349 (49.6%) 77 (72.6%)            
##     1 stop               255 (36.3%) 10 (9.43%)            
##     >=2 stops            99 (14.1%)  19 (17.9%)            
## Weather:                                           0.001   
##     Sunny                469 (66.7%) 72 (67.9%)            
##     Rainny               10 (1.42%)   8 (7.55%)            
##     Cool                 224 (31.9%) 26 (24.5%)            
## Bus.Stop.Present:                                  0.016   
##     No                   64 (9.10%)  17 (16.0%)            
##     Yes                  559 (79.5%) 84 (79.2%)            
##     Don't know           80 (11.4%)   5 (4.72%)            
## Age:                                              <0.001   
##     15-24                296 (42.1%) 60 (56.6%)            
##     25-60                389 (55.3%) 26 (24.5%)            
##     >60                  18 (2.56%)  20 (18.9%)            
## Occupation:                                       <0.001   
##     Pupils/Students      234 (33.3%) 50 (47.2%)            
##     Officers/Workers     204 (29.0%) 14 (13.2%)            
##     Housewife/Unemployed 42 (5.97%)  12 (11.3%)            
##     Free labor           172 (24.5%) 12 (11.3%)            
##     Others               51 (7.25%)  18 (17.0%)            
## Income:                                           <0.001   
##     <8 millions          151 (21.5%) 48 (45.3%)            
##     (8-15) millions      297 (42.2%) 29 (27.4%)            
##     (15-25) millions     176 (25.0%) 26 (24.5%)            
##     >25 millions         79 (11.2%)   3 (2.83%)            
## Number.of.Children:                                0.001   
##     None                 295 (42.0%) 63 (59.4%)            
##     1 child              285 (40.5%) 23 (21.7%)            
##     2 children           100 (14.2%) 18 (17.0%)            
##     >= 3 children        23 (3.27%)   2 (1.89%)            
## Motor.Certificate:                                <0.001   
##     No                   59 (8.39%)  35 (33.0%)            
##     Yes                  644 (91.6%) 71 (67.0%)            
## Car.Certificate:                                  <0.001   
##     No                   554 (78.8%) 100 (94.3%)           
##     Yes                  149 (21.2%)  6 (5.66%)            
## Motor.Owning:                                     <0.001   
##     No                   99 (14.1%)  48 (45.3%)            
##     Yes                  604 (85.9%) 58 (54.7%)            
## Car.Owning:                                        0.001   
##     No                   588 (83.6%) 102 (96.2%)           
##     Yes                  115 (16.4%)  4 (3.77%)            
## Number.of.Motors:                                 <0.001   
##     None                 19 (2.70%)  13 (12.3%)            
##     1                    102 (14.5%) 26 (24.5%)            
##     2                    356 (50.6%) 48 (45.3%)            
##     3                    177 (25.2%) 16 (15.1%)            
##     >3                   49 (6.97%)   3 (2.83%)            
## Number.of.Car:                                     0.001   
##     None                 533 (75.8%) 97 (91.5%)            
##     1                    152 (21.6%)  8 (7.55%)            
##     >=2                  18 (2.56%)   1 (0.94%)            
## Distance                 6.36 (5.52) 8.59 (4.77)  <0.001   
## Travel.Period            16.9 (15.6) 21.6 (12.5)   0.001   
## Cost                     9.28 (17.2) 5.24 (1.07)  <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

3. Descriptive analyse by graphs

library(magrittr)
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
## x tidyr::extract()   masks magrittr::extract()
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(car)
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
datBus %>%
  group_by(Bus.Stop.Condition, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Bus.Stop.Condition)) +
  geom_col(position = "fill") +
  xlab("Bus") +
  ylab("Bus Stop Condition") +
  ggtitle("Proportion of Bus Choice ~ Bus Stop Condition")

datBus %>%
  group_by(Central.Area, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Central.Area)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Central Area") +
  ggtitle("Proportion of Bus Choice ~ Central Area")

datBus %>%
  group_by(Purpose, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Purpose)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Purpose of travelling") +
  ggtitle("Proportion of Bus Choice ~ Purpose of Travelling")

datBus %>%
  group_by(Frequency, Purpose) %>%
  count() %>% 
  ggplot(aes(Purpose, n, fill = Frequency)) +
  geom_col(position = "fill") +
  xlab("Purpose") +
  ylab("Frequency") +
  ggtitle("Proportion of Purpose ~ Frequency")

datBus %>%
  group_by(Frequency, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Frequency)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Frequency of Travelling") +
  ggtitle("Proportion of Bus Choice ~ Frequency of Travelling")

datBus %>%
  group_by(Departure.Time, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Departure.Time)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Departure time of travel") +
  ggtitle("Proportion of Bus Choice ~ Departure Time of Travel")

datBus %>%
  group_by(Sidewalk.Clearance, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Sidewalk.Clearance)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Sidewalk Clearance of Roads") +
  ggtitle("Proportion of Bus Choice ~ Sidewalk Clearance of Roads")

datBus %>%
  group_by(Lane.Separate, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Lane.Separate)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Lane Separate") +
  ggtitle("Proportion of Bus Choice ~ Lane Separate of Roads")

datBus %>%
  group_by(Temporary.Stop.Number, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Temporary.Stop.Number)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("The number of temporary stops") +
  ggtitle("Proportion of Bus Choice ~ The number of temporary stops") 

datBus %>%
  group_by(Mode.Choice.Reason, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Mode.Choice.Reason)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("The reason of motor mode choice") +
  ggtitle("Proportion of Bus Choice ~ Motor Choice Reason")

datBus %>%
  group_by(Weather, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Weather)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Weather condition") +
  ggtitle("Proportion of Bus Choice ~ Weather condition")

datBus %>%
  group_by(Weekend, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Weekend)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Weekend") +
  ggtitle("Proportion of Bus Choice ~ Weekend")

datBus %>%
  group_by(Bus.Stop.Present, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Bus.Stop.Present)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("The presence of bus stop") +
  ggtitle("Proportion of Bus Choice ~ The presence of bus stop")

datBus %>%
  group_by(Gender, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Gender)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Gender") +
  ggtitle("Proportion of Bus Choice ~ Gender") 

datBus %>%
  group_by(Age, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Age)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Age") +
  ggtitle("Proportion of Bus Choice ~ Age")

datBus %>%
  group_by(Occupation, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Occupation)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Occupation") +
  ggtitle("Proportion of Bus Choice ~ Occupation")

datBus %>%
  group_by(Income, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Income)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Income") +
  ggtitle("Proportion of Bus Choice ~ Income")

datBus %>%
  group_by(Number.of.Children, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Number.of.Children)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Number of Children") +
  ggtitle("Proportion of Bus Choice ~ Number of Children in family")

datBus %>%
  group_by(Motor.Certificate, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Motor.Certificate)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Motor Certificate") +
  ggtitle("Proportion of Bus Choice ~ Motor certificate")

datBus %>%
  group_by(Car.Certificate, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Car.Certificate)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Car Certificate") +
  ggtitle("Proportion of Bus Choice ~ Car certificate")

datBus %>%
  group_by(Bicycle.Owning, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Bicycle.Owning)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Bicycle Owning") +
  ggtitle("Proportion of Bus Choice ~ Bicycle Owning")

datBus %>%
  group_by(Motor.Owning, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Motor.Owning)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Motor Owning") +
  ggtitle("Proportion of Bus Choice ~ Motor Owning")

datBus %>%
  group_by(Car.Owning, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Car.Owning)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Car Owning") +
  ggtitle("Proportion of Bus Choice ~ Car Owning")

datBus %>%
  group_by(Number.of.Bicycles, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Number.of.Bicycles)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Number of Bicycle") +
  ggtitle("Proportion of Bus Choice ~ Number of Bicycle")

datBus %>%
  group_by(Number.of.Motors, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Number.of.Motors)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Number of Motors") +
  ggtitle("Proportion of Bus Choice ~ Number of Motors")

datBus %>%
  group_by(Number.of.Car, Bus) %>%
  count() %>% 
  ggplot(aes(Bus, n, fill = Number.of.Car)) +
  geom_col(position = "fill") +
  xlab("Bus Choice") +
  ylab("Number of Cars") +
  ggtitle("Proportion of Bus Choice ~ Number of Cars")

## Travel Mode Choice ~ Countinuous Variables (Distance, Travel Period and Cost)
ggplot(datBus, aes(x=Distance, fill = Bus, color = Bus)) +
  geom_histogram (position = "dodge") +
  xlab("Distance") +
  ylab("Count") +
  ggtitle("Histogram of Distance")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(datBus, aes(x=Travel.Period, fill = Bus, color = Bus)) +
  geom_histogram (position = "dodge") +
  xlab("Travel Period") +
  ylab("Count") +
  ggtitle("Histogram of Travel Period")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(datBus, aes(x=Cost, fill = Bus, color = Bus)) +
  geom_histogram (position = "dodge") +
  xlab("Cost") +
  ylab("Count") +
  ggtitle("Histogram of Cost")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Correlation in continuous variables
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
CVBus = data.frame(datBus$Bus, datBus$Distance, datBus$Travel.Period, datBus$Cost)
pairs.panels(CVBus)

## Statistic Analysis by boxplot (continuous variales)
datBus %>%
  group_by(Distance, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Distance, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus Choice") +
  ylab("Distance") +
  ggtitle("Boxplot of Bus Choice ~ Distance")

datBus %>%
  group_by(Cost, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Cost, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus Choice") +
  ylab("Cost") +
  ggtitle("Boxplot of Bus Choice ~ Cost")

datBus %>%
  group_by(Travel.Period, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Travel.Period, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus Choice") +
  ylab("Travel Period") +
  ggtitle("Boxplot of Bus Choice ~ Travel Period")

### Boxplot of Distance, Travel Period and Cost ~ Gender
datBus %>%
  group_by(Distance, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Distance, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus Choice") +
  ylab("Distance") +
  ggtitle("Boxplot of Distance ~ Bus Choice")

datBus %>%
  group_by(Cost, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Cost, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus") +
  ylab("Cost") +
  ggtitle("Boxplot of Cost ~ Bus")

datBus %>%
  group_by(Travel.Period, Bus) %>%
  count() %>% 
  ggplot(aes(x = Bus, y = Travel.Period, fill = Bus)) +
  geom_boxplot() +
  xlab("Bus") +
  ylab("Travel Period") +
  ggtitle("Boxplot of Travel Period ~ Bus")

### Boxplot of Distance, Travel Period and Cost ~ Occupation
datBus %>%
  group_by(Distance, Occupation) %>%
  count() %>% 
  ggplot(aes(x = Occupation, y = Distance, fill = Occupation)) +
  geom_boxplot() +
  xlab("Occupation") +
  ylab("Distance") +
  ggtitle("Boxplot of Distance ~ Occupation")

datBus %>%
  group_by(Cost, Occupation) %>%
  count() %>% 
  ggplot(aes(x = Occupation, y = Cost, fill = Occupation)) +
  geom_boxplot() +
  xlab("Occupation") +
  ylab("Cost") +
  ggtitle("Boxplot of Cost ~ Occupation")

datBus %>%
  group_by(Travel.Period, Occupation) %>%
  count() %>% 
  ggplot(aes(x = Occupation, y = Travel.Period, fill = Occupation)) +
  geom_boxplot() +
  xlab("Occupation") +
  ylab("Travel Period") +
  ggtitle("Boxplot of Travel Period ~ Occupation")

summary(datBus)
##  Bus.Stop.Condition Central.Area             Purpose        Frequency  
##  No :395            No :428      Work/Study      :540   Once     : 55  
##  Yes:414            Yes:381      Picking Children: 94   2 times  : 62  
##                                  Entertainment   :153   3 times  : 71  
##                                  Others          : 22   > 3 times:621  
##                                                                        
##                                                                        
##                                                                        
##    Departure.Time    Distance      Travel.Period    Sidewalk.Clearance
##  Morning  :496    Min.   : 0.015   Min.   :  0.00   No :110           
##  Afternoon: 66    1st Qu.: 3.000   1st Qu.: 10.00   Yes:699           
##  Evening  :134    Median : 5.000   Median : 15.00                     
##  Others   :113    Mean   : 6.654   Mean   : 17.55                     
##                   3rd Qu.:10.000   3rd Qu.: 20.00                     
##                   Max.   :35.000   Max.   :180.00                     
##                                                                       
##  Lane.Separate Temporary.Stop.Number     Mode.Choice.Reason   Weather   
##  No :171       None     :426         Safety       :115      Sunny :541  
##  Yes:638       1 stop   :265         Comfortable  :363      Rainny: 18  
##                >=2 stops:118         Low price    : 73      Cool  :250  
##                                      Accessibility:178                  
##                                      Reliability  : 56                  
##                                      others       : 24                  
##                                                                         
##  Weekend             Non.Bus.Reason      Cost           Bus.Stop.Present
##  No :612   No Route         :159    Min.   :  0.000   No        : 81    
##  Yes:197   Uncomfortable    :218    1st Qu.:  3.000   Yes       :643    
##            Unsafety         : 18    Median :  5.000   Don't know: 85    
##            Long waiting time:204    Mean   :  8.754                     
##            Unreliability    : 63    3rd Qu.: 10.000                     
##            others           : 41    Max.   :285.000                     
##            NA's             :106                                        
##     Gender       Age                     Occupation 
##  Female:339   15-24:356   Pupils/Students     :284  
##  Male  :470   25-60:415   Officers/Workers    :218  
##               >60  : 38   Housewife/Unemployed: 54  
##                           Free labor          :184  
##                           Others              : 69  
##                                                     
##                                                     
##               Income        Number.of.Children Motor.Certificate
##  <8 millions     :199   None         :358      No : 94          
##  (8-15) millions :326   1 child      :308      Yes:715          
##  (15-25) millions:202   2 children   :118                       
##  >25 millions    : 82   >= 3 children: 25                       
##                                                                 
##                                                                 
##                                                                 
##  Car.Certificate Bicycle.Owning Motor.Owning Car.Owning Number.of.Bicycles
##  No :654         No :489        No :147      No :690    None:364          
##  Yes:155         Yes:320        Yes:662      Yes:119    1   :345          
##                                                         2   : 89          
##                                                         >=3 : 11          
##                                                                           
##                                                                           
##                                                                           
##  Number.of.Motors Number.of.Car  Bus     
##  None: 32         None:630      No :703  
##  1   :128         1   :160      Yes:106  
##  2   :404         >=2 : 19               
##  3   :193                                
##  >3  : 52                                
##                                          
## 

3. Binary Model

# Model with multi-variables (remove variables with p-value>0,05 in t1) --> 19 variables
mg = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, family = binomial, data = datBus)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
mg
## 
## Call:  glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Weather + Bus.Stop.Present + Age + Occupation + Income + 
##     Number.of.Children + Motor.Certificate + Car.Certificate + 
##     Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + 
##     Distance + Travel.Period + Cost, family = binomial, data = datBus)
## 
## Coefficients:
##                     (Intercept)                  Central.AreaYes  
##                        0.693170                         1.153570  
##         PurposePicking Children             PurposeEntertainment  
##                       -0.846264                        -2.128209  
##                   PurposeOthers                 Frequency2 times  
##                       -0.241582                         1.663772  
##                Frequency3 times               Frequency> 3 times  
##                        0.551286                        -1.081913  
##     Temporary.Stop.Number1 stop   Temporary.Stop.Number>=2 stops  
##                       -2.088256                         0.007966  
##                   WeatherRainny                      WeatherCool  
##                        1.875750                        -0.560382  
##             Bus.Stop.PresentYes       Bus.Stop.PresentDon't know  
##                       -0.648223                        -1.082582  
##                        Age25-60                           Age>60  
##                       -0.681496                         2.254365  
##      OccupationOfficers/Workers   OccupationHousewife/Unemployed  
##                        0.203182                         2.166113  
##            OccupationFree labor                 OccupationOthers  
##                       -0.369467                         1.387882  
##           Income(8-15) millions           Income(15-25) millions  
##                       -0.937300                        -0.285344  
##              Income>25 millions        Number.of.Children1 child  
##                       -2.316472                        -0.666788  
##    Number.of.Children2 children  Number.of.Children>= 3 children  
##                       -0.302515                        -0.920554  
##            Motor.CertificateYes               Car.CertificateYes  
##                       -1.343825                         0.038586  
##                 Motor.OwningYes                    Car.OwningYes  
##                       -0.746040                         0.036269  
##               Number.of.Motors1                Number.of.Motors2  
##                        0.618812                         0.222040  
##               Number.of.Motors3               Number.of.Motors>3  
##                       -0.523223                        -0.510007  
##                  Number.of.Car1                 Number.of.Car>=2  
##                       -1.162398                        -1.072850  
##                        Distance                    Travel.Period  
##                        0.688046                         0.009410  
##                            Cost  
##                       -0.618352  
## 
## Degrees of Freedom: 808 Total (i.e. Null);  770 Residual
## Null Deviance:       628.3 
## Residual Deviance: 254.3     AIC: 332.3
summary(mg) 
## 
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Weather + Bus.Stop.Present + Age + Occupation + Income + 
##     Number.of.Children + Motor.Certificate + Car.Certificate + 
##     Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + 
##     Distance + Travel.Period + Cost, family = binomial, data = datBus)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5799  -0.2733  -0.1226  -0.0225   3.5052  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      0.693170   1.122728   0.617 0.536972    
## Central.AreaYes                  1.153570   0.369569   3.121 0.001800 ** 
## PurposePicking Children         -0.846264   0.775170  -1.092 0.274959    
## PurposeEntertainment            -2.128209   0.613702  -3.468 0.000525 ***
## PurposeOthers                   -0.241582   0.938275  -0.257 0.796812    
## Frequency2 times                 1.663772   0.810579   2.053 0.040114 *  
## Frequency3 times                 0.551286   0.817643   0.674 0.500160    
## Frequency> 3 times              -1.081913   0.669912  -1.615 0.106309    
## Temporary.Stop.Number1 stop     -2.088256   0.549980  -3.797 0.000146 ***
## Temporary.Stop.Number>=2 stops   0.007966   0.491717   0.016 0.987075    
## WeatherRainny                    1.875750   1.022332   1.835 0.066539 .  
## WeatherCool                     -0.560382   0.483314  -1.159 0.246270    
## Bus.Stop.PresentYes             -0.648223   0.566922  -1.143 0.252869    
## Bus.Stop.PresentDon't know      -1.082582   0.864050  -1.253 0.210236    
## Age25-60                        -0.681496   0.689868  -0.988 0.323219    
## Age>60                           2.254365   0.977059   2.307 0.021038 *  
## OccupationOfficers/Workers       0.203182   0.735185   0.276 0.782265    
## OccupationHousewife/Unemployed   2.166113   1.018430   2.127 0.033427 *  
## OccupationFree labor            -0.369467   0.702315  -0.526 0.598840    
## OccupationOthers                 1.387882   0.899394   1.543 0.122799    
## Income(8-15) millions           -0.937300   0.434752  -2.156 0.031088 *  
## Income(15-25) millions          -0.285344   0.510462  -0.559 0.576167    
## Income>25 millions              -2.316472   1.157201  -2.002 0.045307 *  
## Number.of.Children1 child       -0.666788   0.428135  -1.557 0.119370    
## Number.of.Children2 children    -0.302515   0.608433  -0.497 0.619045    
## Number.of.Children>= 3 children -0.920554   1.460357  -0.630 0.528457    
## Motor.CertificateYes            -1.343825   0.544182  -2.469 0.013532 *  
## Car.CertificateYes               0.038586   0.945690   0.041 0.967454    
## Motor.OwningYes                 -0.746040   0.527259  -1.415 0.157086    
## Car.OwningYes                    0.036269   1.448918   0.025 0.980030    
## Number.of.Motors1                0.618812   0.808973   0.765 0.444310    
## Number.of.Motors2                0.222040   0.802489   0.277 0.782019    
## Number.of.Motors3               -0.523223   0.881065  -0.594 0.552610    
## Number.of.Motors>3              -0.510007   1.380742  -0.369 0.711851    
## Number.of.Car1                  -1.162398   0.858510  -1.354 0.175745    
## Number.of.Car>=2                -1.072850   2.226960  -0.482 0.629980    
## Distance                         0.688046   0.098829   6.962 3.36e-12 ***
## Travel.Period                    0.009410   0.015030   0.626 0.531270    
## Cost                            -0.618352   0.094949  -6.512 7.39e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 254.29  on 770  degrees of freedom
## AIC: 332.29
## 
## Number of Fisher Scoring iterations: 9
## Remove insignificant variables: Occupation (19), Number.of.Children (21), Car.Certificate (23), Car.Owning (26), Number.of.Motors (28), Number.of.Car (29), Distance (6) --> 12 variables
mg1 = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
mg1
## 
## Call:  glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + 
##     Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
## 
## Coefficients:
##                    (Intercept)                 Central.AreaYes  
##                       1.802519                        0.905130  
##        PurposePicking Children            PurposeEntertainment  
##                      -1.087577                       -2.484922  
##                  PurposeOthers                Frequency2 times  
##                      -1.010247                        0.386235  
##               Frequency3 times              Frequency> 3 times  
##                       0.128561                       -1.882813  
##    Temporary.Stop.Number1 stop  Temporary.Stop.Number>=2 stops  
##                      -1.754352                       -0.050375  
##                  WeatherRainny                     WeatherCool  
##                       2.362667                        0.009817  
##            Bus.Stop.PresentYes      Bus.Stop.PresentDon't know  
##                      -0.576247                       -0.808657  
##                       Age25-60                          Age>60  
##                      -0.180929                        2.886523  
##          Income(8-15) millions          Income(15-25) millions  
##                      -0.609947                       -0.447394  
##             Income>25 millions            Motor.CertificateYes  
##                      -2.362587                       -1.332178  
##                Motor.OwningYes                   Travel.Period  
##                      -0.455052                        0.036497  
##                           Cost  
##                      -0.092782  
## 
## Degrees of Freedom: 808 Total (i.e. Null);  786 Residual
## Null Deviance:       628.3 
## Residual Deviance: 382.3     AIC: 428.3
summary(mg1)
## 
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + 
##     Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6136  -0.4119  -0.2060  -0.0914   3.2337  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     1.802519   0.677629   2.660 0.007813 ** 
## Central.AreaYes                 0.905130   0.292539   3.094 0.001974 ** 
## PurposePicking Children        -1.087577   0.597356  -1.821 0.068660 .  
## PurposeEntertainment           -2.484922   0.483074  -5.144 2.69e-07 ***
## PurposeOthers                  -1.010247   0.761896  -1.326 0.184851    
## Frequency2 times                0.386235   0.572849   0.674 0.500162    
## Frequency3 times                0.128561   0.575826   0.223 0.823330    
## Frequency> 3 times             -1.882813   0.479622  -3.926 8.65e-05 ***
## Temporary.Stop.Number1 stop    -1.754352   0.412500  -4.253 2.11e-05 ***
## Temporary.Stop.Number>=2 stops -0.050375   0.377640  -0.133 0.893881    
## WeatherRainny                   2.362667   0.718862   3.287 0.001014 ** 
## WeatherCool                     0.009817   0.345467   0.028 0.977330    
## Bus.Stop.PresentYes            -0.576247   0.416923  -1.382 0.166928    
## Bus.Stop.PresentDon't know     -0.808657   0.674309  -1.199 0.230435    
## Age25-60                       -0.180929   0.327045  -0.553 0.580111    
## Age>60                          2.886523   0.573149   5.036 4.75e-07 ***
## Income(8-15) millions          -0.609947   0.322454  -1.892 0.058547 .  
## Income(15-25) millions         -0.447394   0.373637  -1.197 0.231150    
## Income>25 millions             -2.362587   0.789739  -2.992 0.002775 ** 
## Motor.CertificateYes           -1.332178   0.391963  -3.399 0.000677 ***
## Motor.OwningYes                -0.455052   0.355347  -1.281 0.200340    
## Travel.Period                   0.036497   0.009735   3.749 0.000177 ***
## Cost                           -0.092782   0.031029  -2.990 0.002788 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 382.27  on 786  degrees of freedom
## AIC: 428.27
## 
## Number of Fisher Scoring iterations: 7
## Way 2: Use function lmr in package "rms" - library(rms) - ml = lrm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost) - ml

# Find Parsimonious model - Tìm mô hình tối ưu
library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.4-9)
xvars <- datBus[, c(2,3,4,10,12,16,18,19,20,21,22,23,25,26,28,29,6,7,15)]
y <- Bus
bma.search <- bic.glm(xvars, y, strict = F, OR = 20, glm.family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(bma.search)
## 
## Call:
## bic.glm.data.frame(x = xvars, y = y, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   7  models were selected
##  Best  5  models (cumulative posterior probability =  0.9455 ): 
## 
##                                   p!=0    EV       SD       model 1   
## Intercept                         100    -1.41592  0.66406     -1.7933
## Central.Area                      100.0                               
##             .Yes                          1.13384  0.31484      1.1049
## Purpose                            18.3                               
##        .Picking Children                 -0.12694  0.38890       .    
##        .Entertainment                    -0.38154  0.83803       .    
##        .Others                           -0.05927  0.34303       .    
## Frequency                          51.5                               
##          .2 times                         0.47757  0.66588       .    
##          .3 times                         0.17333  0.50950       .    
##          .> 3 times                      -0.55581  0.70787       .    
## Temporary.Stop.Number             100.0                               
##                      .1 stop             -1.80988  0.45388     -1.8490
##                      .>=2 stops          -0.45764  0.44603     -0.5760
## Weather                            17.9                               
##        .Rainny                            0.26507  0.68785       .    
##        .Cool                             -0.16431  0.39171       .    
## Bus.Stop.Present                    0.0                               
##                 .Yes                      0.00000  0.00000       .    
##                 .Don't know               0.00000  0.00000       .    
## Age                               100.0                               
##    .25-60                                -0.59843  0.35579     -0.6181
##    .>60                                   2.38357  0.61606      2.1969
## Occupation                          0.0                               
##           .Officers/Workers               0.00000  0.00000       .    
##           .Housewife/Unemployed           0.00000  0.00000       .    
##           .Free labor                     0.00000  0.00000       .    
##           .Others                         0.00000  0.00000       .    
## Income                              0.0                               
##       .(8-15) millions                    0.00000  0.00000       .    
##       .(15-25) millions                   0.00000  0.00000       .    
##       .>25 millions                       0.00000  0.00000       .    
## Number.of.Children                  0.0                               
##                   .1 child                0.00000  0.00000       .    
##                   .2 children             0.00000  0.00000       .    
##                   .>= 3 children          0.00000  0.00000       .    
## Motor.Certificate                   5.5                               
##                  .Yes                    -0.03976  0.19314       .    
## Car.Certificate                     0.0                               
##                .Yes                       0.00000  0.00000       .    
## Motor.Owning                      100.0                               
##             .Yes                         -1.58958  0.35390     -1.6317
## Car.Owning                          0.0                               
##           .Yes                            0.00000  0.00000       .    
## Number.of.Motors                    0.0                               
##                 .1                        0.00000  0.00000       .    
##                 .2                        0.00000  0.00000       .    
##                 .3                        0.00000  0.00000       .    
##                 .>3                       0.00000  0.00000       .    
## Number.of.Car                       0.0                               
##              .1                           0.00000  0.00000       .    
##              .>=2                         0.00000  0.00000       .    
## Distance                          100.0   0.68250  0.08231      0.6873
## Travel.Period                       0.0   0.00000  0.00000       .    
## Cost                              100.0  -0.60391  0.08271     -0.6075
##                                                                       
## nVar                                                              6   
## BIC                                                         -5020.4280
## post prob                                                       0.409 
##                                   model 2     model 3     model 4   
## Intercept                            -1.3985     -0.6710     -1.2074
## Central.Area                                                        
##             .Yes                      1.1290      1.1376      1.2050
## Purpose                                                             
##        .Picking Children               .         -0.6985       .    
##        .Entertainment                  .         -2.0828       .    
##        .Others                         .         -0.3124       .    
## Frequency                                                           
##          .2 times                     0.8604      1.0177      0.9266
##          .3 times                     0.4297      0.1439      0.4171
##          .> 3 times                  -0.8089     -1.5182     -0.9287
## Temporary.Stop.Number                                               
##                      .1 stop         -1.8603     -1.7650     -1.6729
##                      .>=2 stops      -0.5737     -0.2161     -0.2698
## Weather                                                             
##        .Rainny                         .           .          1.1680
##        .Cool                           .           .         -1.0502
## Bus.Stop.Present                                                    
##                 .Yes                   .           .           .    
##                 .Don't know            .           .           .    
## Age                                                                 
##    .25-60                            -0.6070     -0.5824     -0.5700
##    .>60                               2.2247      2.8801      2.5260
## Occupation                                                          
##           .Officers/Workers            .           .           .    
##           .Housewife/Unemployed        .           .           .    
##           .Free labor                  .           .           .    
##           .Others                      .           .           .    
## Income                                                              
##       .(8-15) millions                 .           .           .    
##       .(15-25) millions                .           .           .    
##       .>25 millions                    .           .           .    
## Number.of.Children                                                  
##                   .1 child             .           .           .    
##                   .2 children          .           .           .    
##                   .>= 3 children       .           .           .    
## Motor.Certificate                                                   
##                  .Yes                  .           .           .    
## Car.Certificate                                                     
##                .Yes                    .           .           .    
## Motor.Owning                                                        
##             .Yes                     -1.5595     -1.6229     -1.5423
## Car.Owning                                                          
##           .Yes                         .           .           .    
## Number.of.Motors                                                    
##                 .1                     .           .           .    
##                 .2                     .           .           .    
##                 .3                     .           .           .    
##                 .>3                    .           .           .    
## Number.of.Car                                                       
##              .1                        .           .           .    
##              .>=2                      .           .           .    
## Distance                              0.6700      0.6682      0.7037
## Travel.Period                          .           .           .    
## Cost                                 -0.5982     -0.5742     -0.6380
##                                                                     
## nVar                                    7           8           8   
## BIC                               -5019.0262  -5018.4877  -5017.6613
## post prob                             0.203       0.155       0.102 
##                                   model 5   
## Intercept                            -1.7367
## Central.Area                                
##             .Yes                      1.2087
## Purpose                                     
##        .Picking Children               .    
##        .Entertainment                  .    
##        .Others                         .    
## Frequency                                   
##          .2 times                      .    
##          .3 times                      .    
##          .> 3 times                    .    
## Temporary.Stop.Number                       
##                      .1 stop         -1.7093
##                      .>=2 stops      -0.3905
## Weather                                     
##        .Rainny                        1.8979
##        .Cool                         -0.7401
## Bus.Stop.Present                            
##                 .Yes                   .    
##                 .Don't know            .    
## Age                                         
##    .25-60                            -0.5820
##    .>60                               2.4384
## Occupation                                  
##           .Officers/Workers            .    
##           .Housewife/Unemployed        .    
##           .Free labor                  .    
##           .Others                      .    
## Income                                      
##       .(8-15) millions                 .    
##       .(15-25) millions                .    
##       .>25 millions                    .    
## Number.of.Children                          
##                   .1 child             .    
##                   .2 children          .    
##                   .>= 3 children       .    
## Motor.Certificate                           
##                  .Yes                  .    
## Car.Certificate                             
##                .Yes                    .    
## Motor.Owning                                
##             .Yes                     -1.6291
## Car.Owning                                  
##           .Yes                         .    
## Number.of.Motors                            
##                 .1                     .    
##                 .2                     .    
##                 .3                     .    
##                 .>3                    .    
## Number.of.Car                               
##              .1                        .    
##              .>=2                      .    
## Distance                              0.7075
## Travel.Period                          .    
## Cost                                 -0.6365
##                                             
## nVar                                    7   
## BIC                               -5017.0791
## post prob                             0.077
imageplot.bma(bma.search)

## Remove insignificant variables: Occupation (19), Number.of.Children (21), Car.Certificate (23), Car.Owning (26), Number.of.Motors (28), Number.of.Car (29), Distance (6)
library(BMA)
xvars1 <- datBus[, c(2,3,4,10,12,16,18,20,22,25,7,15)]
y1 <- Bus
bma.search1 <- bic.glm(xvars1, y1, strict = F, OR = 20, glm.family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(bma.search1)
## 
## Call:
## bic.glm.data.frame(x = xvars1, y = y1, glm.family = "binomial",     strict = F, OR = 20)
## 
## 
##   5  models were selected
##  Best  5  models (cumulative posterior probability =  1 ): 
## 
##                                  p!=0    EV       SD        model 1   
## Intercept                        100     0.90900  0.591728   7.761e-01
## Central.Area                      83.4                                
##             .Yes                         0.69811  0.402090   8.031e-01
## Purpose                          100.0                                
##        .Picking Children                -1.04356  0.568641  -1.029e+00
##        .Entertainment                   -2.39775  0.456826  -2.369e+00
##        .Others                          -0.98505  0.746565  -9.703e-01
## Frequency                        100.0                                
##          .2 times                        0.42454  0.559143   4.687e-01
##          .3 times                        0.16370  0.548297   2.054e-01
##          .> 3 times                     -1.84363  0.465893  -1.801e+00
## Temporary.Stop.Number            100.0                                
##                      .1 stop            -1.74026  0.399078  -1.749e+00
##                      .>=2 stops          0.11400  0.351863   1.309e-01
## Weather                           24.1                                
##        .Rainny                           0.51796  0.974498       .    
##        .Cool                            -0.04228  0.179591       .    
## Bus.Stop.Present                   0.0                                
##                 .Yes                     0.00000  0.000000       .    
##                 .Don't know              0.00000  0.000000       .    
## Age                              100.0                                
##    .25-60                               -0.42415  0.308633  -4.506e-01
##    .>60                                  2.71003  0.561479   2.663e+00
## Income                             4.5                                
##       .(8-15) millions                  -0.03090  0.157895       .    
##       .(15-25) millions                 -0.02235  0.128148       .    
##       .>25 millions                     -0.10611  0.518043       .    
## Motor.Certificate                100.0                                
##                  .Yes                   -1.57939  0.332623  -1.590e+00
## Motor.Owning                       3.0                                
##             .Yes                        -0.01707  0.113970       .    
## Travel.Period                    100.0   0.04064  0.009331   4.184e-02
## Cost                             100.0  -0.09959  0.028185  -9.852e-02
##                                                                       
## nVar                                                           8      
## BIC                                                         -4.905e+03
## post prob                                                    0.548    
##                                  model 2     model 3     model 4   
## Intercept                         8.100e-01   1.370e+00   1.278e+00
## Central.Area                                                       
##             .Yes                  9.122e-01       .       8.527e-01
## Purpose                                                            
##        .Picking Children         -1.000e+00  -1.137e+00  -1.115e+00
##        .Entertainment            -2.469e+00  -2.378e+00  -2.453e+00
##        .Others                   -1.102e+00  -9.134e-01  -7.229e-01
## Frequency                                                          
##          .2 times                 2.975e-01   4.681e-01   4.199e-01
##          .3 times                 1.350e-01   8.485e-02   1.495e-01
##          .> 3 times              -1.911e+00  -1.873e+00  -1.893e+00
## Temporary.Stop.Number                                              
##                      .1 stop     -1.676e+00  -1.784e+00  -1.824e+00
##                      .>=2 stops   7.599e-02   1.285e-01   7.867e-02
## Weather                                                            
##        .Rainny                    2.136e+00       .           .    
##        .Cool                     -1.811e-01       .           .    
## Bus.Stop.Present                                                   
##                 .Yes                  .           .           .    
##                 .Don't know           .           .           .    
## Age                                                                
##    .25-60                        -4.025e-01  -4.205e-01  -2.879e-01
##    .>60                           2.850e+00   2.661e+00   2.712e+00
## Income                                                             
##       .(8-15) millions                .           .      -6.929e-01
##       .(15-25) millions               .           .      -5.012e-01
##       .>25 millions                   .           .      -2.380e+00
## Motor.Certificate                                                  
##                  .Yes            -1.583e+00  -1.585e+00  -1.608e+00
## Motor.Owning                                                       
##             .Yes                      .           .           .    
## Travel.Period                     4.071e-02   3.670e-02   4.105e-02
## Cost                             -1.051e-01  -9.813e-02  -9.229e-02
##                                                                    
## nVar                                9           7           9      
## BIC                              -4.903e+03  -4.903e+03  -4.900e+03
## post prob                         0.212       0.166       0.045    
##                                  model 5   
## Intercept                         9.432e-01
## Central.Area                               
##             .Yes                  8.984e-01
## Purpose                                    
##        .Picking Children         -9.919e-01
##        .Entertainment            -2.443e+00
##        .Others                   -1.217e+00
## Frequency                                  
##          .2 times                 2.794e-01
##          .3 times                 6.074e-02
##          .> 3 times              -1.911e+00
## Temporary.Stop.Number                      
##                      .1 stop     -1.670e+00
##                      .>=2 stops   4.538e-02
## Weather                                    
##        .Rainny                    2.214e+00
##        .Cool                     -1.329e-01
## Bus.Stop.Present                           
##                 .Yes                  .    
##                 .Don't know           .    
## Age                                        
##    .25-60                        -3.158e-01
##    .>60                           2.842e+00
## Income                                     
##       .(8-15) millions                .    
##       .(15-25) millions               .    
##       .>25 millions                   .    
## Motor.Certificate                          
##                  .Yes            -1.277e+00
## Motor.Owning                               
##             .Yes                 -5.745e-01
## Travel.Period                     3.922e-02
## Cost                             -9.891e-02
##                                            
## nVar                                10     
## BIC                              -4.899e+03
## post prob                         0.030
imageplot.bma(bma.search1)

## Choose Model 1 from bma.search1 : remove 4 variables (Weather (12), Bus.Stop.Present (16), Motor.Owning (25), Income (20)) and modelling again --> 8 variables
mg2 = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, family = binomial, data = datBus)
mg2
## 
## Call:  glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Age + Motor.Certificate + Travel.Period + Cost, family = binomial, 
##     data = datBus)
## 
## Coefficients:
##                    (Intercept)                 Central.AreaYes  
##                        0.77606                         0.80313  
##        PurposePicking Children            PurposeEntertainment  
##                       -1.02909                        -2.36946  
##                  PurposeOthers                Frequency2 times  
##                       -0.97031                         0.46866  
##               Frequency3 times              Frequency> 3 times  
##                        0.20536                        -1.80115  
##    Temporary.Stop.Number1 stop  Temporary.Stop.Number>=2 stops  
##                       -1.74861                         0.13088  
##                       Age25-60                          Age>60  
##                       -0.45058                         2.66331  
##           Motor.CertificateYes                   Travel.Period  
##                       -1.59018                         0.04184  
##                           Cost  
##                       -0.09852  
## 
## Degrees of Freedom: 808 Total (i.e. Null);  794 Residual
## Null Deviance:       628.3 
## Residual Deviance: 411.5     AIC: 441.5
summary(mg2)
## 
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Age + Motor.Certificate + Travel.Period + Cost, family = binomial, 
##     data = datBus)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4647  -0.4365  -0.2316  -0.1143   3.1417  
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     0.77606    0.54706   1.419 0.156015    
## Central.AreaYes                 0.80313    0.27165   2.957 0.003111 ** 
## PurposePicking Children        -1.02909    0.56419  -1.824 0.068150 .  
## PurposeEntertainment           -2.36946    0.44919  -5.275 1.33e-07 ***
## PurposeOthers                  -0.97031    0.73516  -1.320 0.186878    
## Frequency2 times                0.46866    0.55317   0.847 0.396874    
## Frequency3 times                0.20536    0.54499   0.377 0.706315    
## Frequency> 3 times             -1.80115    0.46243  -3.895 9.82e-05 ***
## Temporary.Stop.Number1 stop    -1.74861    0.39604  -4.415 1.01e-05 ***
## Temporary.Stop.Number>=2 stops  0.13088    0.34701   0.377 0.706052    
## Age25-60                       -0.45058    0.30490  -1.478 0.139470    
## Age>60                          2.66331    0.55312   4.815 1.47e-06 ***
## Motor.CertificateYes           -1.59017    0.32545  -4.886 1.03e-06 ***
## Travel.Period                   0.04184    0.00925   4.524 6.08e-06 ***
## Cost                           -0.09852    0.02766  -3.562 0.000368 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 411.48  on 794  degrees of freedom
## AIC: 441.48
## 
## Number of Fisher Scoring iterations: 7
## Calculate OR (odd ratio)
exp(mg2$coefficients)
##                    (Intercept)                Central.AreaYes 
##                     2.17288728                     2.23251131 
##        PurposePicking Children           PurposeEntertainment 
##                     0.35733306                     0.09353129 
##                  PurposeOthers               Frequency2 times 
##                     0.37896411                     1.59784912 
##               Frequency3 times             Frequency> 3 times 
##                     1.22796259                     0.16510910 
##    Temporary.Stop.Number1 stop Temporary.Stop.Number>=2 stops 
##                     0.17401624                     1.13982907 
##                       Age25-60                         Age>60 
##                     0.63725954                    14.34371919 
##           Motor.CertificateYes                  Travel.Period 
##                     0.20388992                     1.04273033 
##                           Cost 
##                     0.90617421
exp(coef(mg2))
##                    (Intercept)                Central.AreaYes 
##                     2.17288728                     2.23251131 
##        PurposePicking Children           PurposeEntertainment 
##                     0.35733306                     0.09353129 
##                  PurposeOthers               Frequency2 times 
##                     0.37896411                     1.59784912 
##               Frequency3 times             Frequency> 3 times 
##                     1.22796259                     0.16510910 
##    Temporary.Stop.Number1 stop Temporary.Stop.Number>=2 stops 
##                     0.17401624                     1.13982907 
##                       Age25-60                         Age>60 
##                     0.63725954                    14.34371919 
##           Motor.CertificateYes                  Travel.Period 
##                     0.20388992                     1.04273033 
##                           Cost 
##                     0.90617421
library(epiDisplay)
## Warning: package 'epiDisplay' was built under R version 3.6.3
## Loading required package: foreign
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: nnet
## Registered S3 method overwritten by 'epiDisplay':
##   method       from
##   print.lrtest rms
## 
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
## 
##     alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
## 
##     alpha
logistic.display(mg2)
## 
## Logistic regression predicting Bus : Yes vs No 
##  
##                                  crude OR(95%CI)    adj. OR(95%CI)     
## Central.Area: Yes vs No          1.94 (1.28,2.95)   2.23 (1.31,3.8)    
##                                                                        
## Purpose: ref.=Work/Study                                               
##    Picking Children              0.64 (0.31,1.32)   0.36 (0.12,1.08)   
##    Entertainment                 0.56 (0.3,1.04)    0.09 (0.04,0.23)   
##    Others                        2.81 (1.11,7.1)    0.38 (0.09,1.6)    
##                                                                        
## Frequency: ref.=Once                                                   
##    2 times                       1.32 (0.58,3.03)   1.6 (0.54,4.72)    
##    3 times                       1.18 (0.52,2.66)   1.23 (0.42,3.57)   
##    > 3 times                     0.32 (0.16,0.63)   0.17 (0.07,0.41)   
##                                                                        
## Temporary.Stop.Number: ref.=None                                       
##    1 stop                        0.18 (0.09,0.35)   0.17 (0.08,0.38)   
##    >=2 stops                     0.87 (0.5,1.51)    1.14 (0.58,2.25)   
##                                                                        
## Age: ref.=15-24                                                        
##    25-60                         0.33 (0.2,0.54)    0.64 (0.35,1.16)   
##    >60                           5.48 (2.74,10.98)  14.34 (4.85,42.41) 
##                                                                        
## Motor.Certificate: Yes vs No     0.19 (0.11,0.3)    0.2 (0.11,0.39)    
##                                                                        
## Travel.Period (cont. var.)       1.01 (1,1.03)      1.04 (1.02,1.06)   
##                                                                        
## Cost (cont. var.)                0.94 (0.9,0.98)    0.91 (0.86,0.96)   
##                                                                        
##                                  P(Wald's test) P(LR-test)
## Central.Area: Yes vs No          0.003          0.003     
##                                                           
## Purpose: ref.=Work/Study                        < 0.001   
##    Picking Children              0.068                    
##    Entertainment                 < 0.001                  
##    Others                        0.187                    
##                                                           
## Frequency: ref.=Once                            < 0.001   
##    2 times                       0.397                    
##    3 times                       0.706                    
##    > 3 times                     < 0.001                  
##                                                           
## Temporary.Stop.Number: ref.=None                < 0.001   
##    1 stop                        < 0.001                  
##    >=2 stops                     0.706                    
##                                                           
## Age: ref.=15-24                                 < 0.001   
##    25-60                         0.139                    
##    >60                           < 0.001                  
##                                                           
## Motor.Certificate: Yes vs No     < 0.001        < 0.001   
##                                                           
## Travel.Period (cont. var.)       < 0.001        < 0.001   
##                                                           
## Cost (cont. var.)                < 0.001        < 0.001   
##                                                           
## Log-likelihood = -205.7404
## No. of observations = 809
## AIC value = 441.4808
## Thay vì sử dụng hàm glm dùng cách 2 với hàm lrm trong gói rms
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
## 
##     dotplot
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:table1':
## 
##     label, label<-, units
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## Attaching package: 'rms'
## The following object is masked from 'package:epiDisplay':
## 
##     lrtest
## The following objects are masked from 'package:car':
## 
##     Predict, vif
mg2.c2 = lrm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, data = datBus)
mg2.c2
## Logistic Regression Model
##  
##  lrm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##      Age + Motor.Certificate + Travel.Period + Cost, data = datBus)
##  
##                        Model Likelihood     Discrimination    Rank Discrim.    
##                           Ratio Test           Indexes           Indexes       
##  Obs           809    LR chi2     216.84    R2       0.435    C       0.882    
##   No           703    d.f.            14    g        2.277    Dxy     0.764    
##   Yes          106    Pr(> chi2) <0.0001    gr       9.752    gamma   0.765    
##  max |deriv| 2e-05                          gp       0.173    tau-a   0.174    
##                                             Brier    0.074                     
##  
##                                  Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept                        0.7761 0.5471  1.42  0.1560  
##  Central.Area=Yes                 0.8031 0.2716  2.96  0.0031  
##  Purpose=Picking Children        -1.0291 0.5642 -1.82  0.0682  
##  Purpose=Entertainment           -2.3695 0.4492 -5.27  <0.0001 
##  Purpose=Others                  -0.9703 0.7352 -1.32  0.1869  
##  Frequency=2 times                0.4687 0.5532  0.85  0.3969  
##  Frequency=3 times                0.2054 0.5450  0.38  0.7063  
##  Frequency=> 3 times             -1.8011 0.4624 -3.89  <0.0001 
##  Temporary.Stop.Number=1 stop    -1.7486 0.3960 -4.42  <0.0001 
##  Temporary.Stop.Number=>=2 stops  0.1309 0.3470  0.38  0.7061  
##  Age=25-60                       -0.4506 0.3049 -1.48  0.1395  
##  Age=>60                          2.6633 0.5531  4.82  <0.0001 
##  Motor.Certificate=Yes           -1.5902 0.3255 -4.89  <0.0001 
##  Travel.Period                    0.0418 0.0092  4.52  <0.0001 
##  Cost                            -0.0985 0.0277 -3.56  0.0004  
## 
mg1.c2 = lrm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + Motor.Owning + Travel.Period + Cost, data = datBus)
mg1.c2
## Logistic Regression Model
##  
##  lrm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##      Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + 
##      Motor.Owning + Travel.Period + Cost, data = datBus)
##  
##                        Model Likelihood     Discrimination    Rank Discrim.    
##                           Ratio Test           Indexes           Indexes       
##  Obs           809    LR chi2     246.05    R2       0.486    C       0.892    
##   No           703    d.f.            22    g        2.520    Dxy     0.783    
##   Yes          106    Pr(> chi2) <0.0001    gr      12.430    gamma   0.784    
##  max |deriv| 1e-09                          gp       0.182    tau-a   0.179    
##                                             Brier    0.067                     
##  
##                                  Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept                        1.8025 0.6776  2.66  0.0078  
##  Central.Area=Yes                 0.9051 0.2925  3.09  0.0020  
##  Purpose=Picking Children        -1.0876 0.5974 -1.82  0.0687  
##  Purpose=Entertainment           -2.4849 0.4831 -5.14  <0.0001 
##  Purpose=Others                  -1.0102 0.7619 -1.33  0.1849  
##  Frequency=2 times                0.3862 0.5729  0.67  0.5002  
##  Frequency=3 times                0.1286 0.5758  0.22  0.8233  
##  Frequency=> 3 times             -1.8828 0.4796 -3.93  <0.0001 
##  Temporary.Stop.Number=1 stop    -1.7544 0.4125 -4.25  <0.0001 
##  Temporary.Stop.Number=>=2 stops -0.0504 0.3776 -0.13  0.8939  
##  Weather=Rainny                   2.3627 0.7189  3.29  0.0010  
##  Weather=Cool                     0.0098 0.3455  0.03  0.9773  
##  Bus.Stop.Present=Yes            -0.5762 0.4169 -1.38  0.1669  
##  Bus.Stop.Present=Don't know     -0.8087 0.6743 -1.20  0.2304  
##  Age=25-60                       -0.1809 0.3270 -0.55  0.5801  
##  Age=>60                          2.8865 0.5731  5.04  <0.0001 
##  Income=(8-15) millions          -0.6099 0.3225 -1.89  0.0585  
##  Income=(15-25) millions         -0.4474 0.3736 -1.20  0.2312  
##  Income=>25 millions             -2.3626 0.7897 -2.99  0.0028  
##  Motor.Certificate=Yes           -1.3322 0.3920 -3.40  0.0007  
##  Motor.Owning=Yes                -0.4551 0.3553 -1.28  0.2003  
##  Travel.Period                    0.0365 0.0097  3.75  0.0002  
##  Cost                            -0.0928 0.0310 -2.99  0.0028  
## 

4. Test models

# 4.1. Deviance - càng nhỏ càng tốt (sai khác giữa giá trị thực tế và giá trị ước lượng theo mô hình)
deviance(mg)
## [1] 254.2948
deviance(mg1)
## [1] 382.2724
deviance(mg2)
## [1] 411.4808
# 4.2. LRT (Likelihood Ratio Test) - Chênh lệch deviance giữa mô hình đơn giản và mô hình phức tạp --> có ý nghĩa thống kê
library(rms)
lrtest(mg2, mg)
lrtest(mg,mg2)
lrtest(mg2, mg1)
# 4.3. AIC (Aikake Information Criterion) - càng nhỏ càng tốt
AIC(mg)
## [1] 332.2948
AIC(mg1)
## [1] 428.2724
AIC(mg2)
## [1] 441.4808
## Calculate MacFadden's R2 voi packages "pscl"
library(pscl)
## Warning: package 'pscl' was built under R version 3.6.3
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(mg2)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML 
## -205.7403930 -314.1608850  216.8409840    0.3451114    0.2351196 
##         r2CU 
##    0.4353563
pR2(mg)
## fitting null model for pseudo-r2
##          llh      llhNull           G2     McFadden         r2ML 
## -127.1473808 -314.1608850  374.0270083    0.5952794    0.3701871 
##         r2CU 
##    0.6854523
# 4.4. Calcule the number of observation that model predict correctly = Accuracy (Độ chính xác của mô hình)
## Model mg2
contrasts(datBus$Bus)
##     Yes
## No    0
## Yes   1
glm.probs <- predict(mg2, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg2 (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng). Có thể dùng hàm probmg2 <- fitted(mg2), head(probmg2), tail(probmg2)
glm.probs[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên 
##           1           2           3           4           5           6 
## 0.068978792 0.010724685 0.014359875 0.011680718 0.038184381 0.003167892 
##           7           8           9          10 
## 0.005570387 0.047199701 0.013507900 0.008546719
glm.pred <- rep("No", 809)
glm.pred[glm.probs >= 0.5] = "Yes"
table(glm.pred, datBus$Bus)
##         
## glm.pred  No Yes
##      No  688  61
##      Yes  15  45
mean(glm.pred == datBus$Bus) # Tỷ lệ dự báo chính xác của mô hình 90,61%
## [1] 0.9060569
round(prop.table(table(datBus$Bus))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không sd xe buýt 86.9% < 90.61% --> KL: mức độ chính xác dự báo của mô hình 2 > lớp có tỷ lệ cao nhất ở biến phụ thuộc
## 
##   No  Yes 
## 86.9 13.1
## Model mg
contrasts(datBus$Bus)
##     Yes
## No    0
## Yes   1
glm.probs.mg <- predict(mg, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng)
glm.probs.mg[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên 
##            1            2            3            4            5 
## 2.808039e-04 1.131239e-03 5.179213e-04 3.754305e-03 9.648223e-03 
##            6            7            8            9           10 
## 7.014613e-09 9.920555e-08 1.075833e-02 2.621728e-03 2.171477e-03
glm.pred.mg <- rep("No", 809)
glm.pred.mg[glm.probs.mg >= 0.5] = "Yes"
table(glm.pred.mg, datBus$Bus)
##            
## glm.pred.mg  No Yes
##         No  691  31
##         Yes  12  75
mean(glm.pred.mg == datBus$Bus) # Tỷ lệ dự báo chính xác của mô hình 94,68% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-94,68 = 5,32%
## [1] 0.946848
round(prop.table(table(datBus$Bus))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không sd xe buýt 86.9% < 94.68% --> KL: mức độ chính xác dự báo của mô hình > lớp có tỷ lệ cao nhất ở biến phụ thuộc
## 
##   No  Yes 
## 86.9 13.1
# 4.5. Calculate Sensitivity - Độ nhạy
712/(712+26) # Đạt 96,48% --> Rất lý tưởng: Mô hình xác định tới 96,48% người không sử dụng xe buýt
## [1] 0.9647696
# 4.6. Calculate Specificity - Độ đặc hiệu
49/(61+49) # Đạt 44,55% --> Mô hình có khả năng dự báo 44,55% người sử dụng xe buýt
## [1] 0.4454545
# 4.7. Hosmer-Lemeshow = Goodness of fit test - Kiểm định sự phù hợp của mô hình với Hàm HLgof.test(fit = fitted(mg2), obs = datBus$Bus) trong package "MKmisc" (Version 3.6.3) : Nếu p-value < 0.05 --> MH không phù hợp --> Ít sử dụng
# 4.8. Kiểm tra tính phổ quát của mô hình
   ## Chạy lại hai mô hình với hàm train thuộc package "caret"
library(caret)
## Warning: package 'caret' was built under R version 3.6.3
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
## The following object is masked from 'package:survival':
## 
##     cluster
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10, classProbs = TRUE, summaryFunction = defaultSummary) # sử dụng kiểm tra chéo 10 lớp (number=10), lặp lại 10 lần (repeats=10) với lựa chọn method="repeatedcv, lựa chọn classProbs = TRUE áp dụng cho mô hình phân loại và lựa chọn summaryFunction = defaultSummary để chỉ thị R tính toán chỉ 2 thông tin về phẩm chất của mô hình là Accuracy và Kappa.
mgtrain = train(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, data = datBus, method = "glm", family = "binomial", trControl = ctrl)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mgtrain)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5799  -0.2733  -0.1226  -0.0225   3.5052  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)                        0.693170   1.122728   0.617 0.536972
## Central.AreaYes                    1.153570   0.369569   3.121 0.001800
## `PurposePicking Children`         -0.846264   0.775170  -1.092 0.274959
## PurposeEntertainment              -2.128209   0.613702  -3.468 0.000525
## PurposeOthers                     -0.241582   0.938275  -0.257 0.796812
## `Frequency2 times`                 1.663772   0.810579   2.053 0.040114
## `Frequency3 times`                 0.551286   0.817643   0.674 0.500160
## `Frequency> 3 times`              -1.081913   0.669912  -1.615 0.106309
## `Temporary.Stop.Number1 stop`     -2.088256   0.549980  -3.797 0.000146
## `Temporary.Stop.Number>=2 stops`   0.007966   0.491717   0.016 0.987075
## WeatherRainny                      1.875750   1.022332   1.835 0.066539
## WeatherCool                       -0.560382   0.483314  -1.159 0.246270
## Bus.Stop.PresentYes               -0.648223   0.566922  -1.143 0.252869
## `Bus.Stop.PresentDon't know`      -1.082582   0.864050  -1.253 0.210236
## `Age25-60`                        -0.681496   0.689868  -0.988 0.323219
## `Age>60`                           2.254365   0.977059   2.307 0.021038
## `OccupationOfficers/Workers`       0.203182   0.735185   0.276 0.782265
## `OccupationHousewife/Unemployed`   2.166113   1.018430   2.127 0.033427
## `OccupationFree labor`            -0.369467   0.702315  -0.526 0.598840
## OccupationOthers                   1.387882   0.899394   1.543 0.122799
## `Income(8-15) millions`           -0.937300   0.434752  -2.156 0.031088
## `Income(15-25) millions`          -0.285344   0.510462  -0.559 0.576167
## `Income>25 millions`              -2.316472   1.157201  -2.002 0.045307
## `Number.of.Children1 child`       -0.666788   0.428135  -1.557 0.119370
## `Number.of.Children2 children`    -0.302515   0.608433  -0.497 0.619045
## `Number.of.Children>= 3 children` -0.920554   1.460357  -0.630 0.528457
## Motor.CertificateYes              -1.343825   0.544182  -2.469 0.013532
## Car.CertificateYes                 0.038586   0.945690   0.041 0.967454
## Motor.OwningYes                   -0.746040   0.527259  -1.415 0.157086
## Car.OwningYes                      0.036269   1.448918   0.025 0.980030
## Number.of.Motors1                  0.618812   0.808973   0.765 0.444310
## Number.of.Motors2                  0.222040   0.802489   0.277 0.782019
## Number.of.Motors3                 -0.523223   0.881065  -0.594 0.552610
## `Number.of.Motors>3`              -0.510007   1.380742  -0.369 0.711851
## Number.of.Car1                    -1.162398   0.858510  -1.354 0.175745
## `Number.of.Car>=2`                -1.072850   2.226960  -0.482 0.629980
## Distance                           0.688046   0.098829   6.962 3.36e-12
## Travel.Period                      0.009410   0.015030   0.626 0.531270
## Cost                              -0.618352   0.094949  -6.512 7.39e-11
##                                      
## (Intercept)                          
## Central.AreaYes                   ** 
## `PurposePicking Children`            
## PurposeEntertainment              ***
## PurposeOthers                        
## `Frequency2 times`                *  
## `Frequency3 times`                   
## `Frequency> 3 times`                 
## `Temporary.Stop.Number1 stop`     ***
## `Temporary.Stop.Number>=2 stops`     
## WeatherRainny                     .  
## WeatherCool                          
## Bus.Stop.PresentYes                  
## `Bus.Stop.PresentDon't know`         
## `Age25-60`                           
## `Age>60`                          *  
## `OccupationOfficers/Workers`         
## `OccupationHousewife/Unemployed`  *  
## `OccupationFree labor`               
## OccupationOthers                     
## `Income(8-15) millions`           *  
## `Income(15-25) millions`             
## `Income>25 millions`              *  
## `Number.of.Children1 child`          
## `Number.of.Children2 children`       
## `Number.of.Children>= 3 children`    
## Motor.CertificateYes              *  
## Car.CertificateYes                   
## Motor.OwningYes                      
## Car.OwningYes                        
## Number.of.Motors1                    
## Number.of.Motors2                    
## Number.of.Motors3                    
## `Number.of.Motors>3`                 
## Number.of.Car1                       
## `Number.of.Car>=2`                   
## Distance                          ***
## Travel.Period                        
## Cost                              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 254.29  on 770  degrees of freedom
## AIC: 332.29
## 
## Number of Fisher Scoring iterations: 9
mg2train = train(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, data = datBus, method = "glm", family = "binomial", trControl = ctrl)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mg2train)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4647  -0.4365  -0.2316  -0.1143   3.1417  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       0.77606    0.54706   1.419 0.156015    
## Central.AreaYes                   0.80313    0.27165   2.957 0.003111 ** 
## `PurposePicking Children`        -1.02909    0.56419  -1.824 0.068150 .  
## PurposeEntertainment             -2.36946    0.44919  -5.275 1.33e-07 ***
## PurposeOthers                    -0.97031    0.73516  -1.320 0.186878    
## `Frequency2 times`                0.46866    0.55317   0.847 0.396874    
## `Frequency3 times`                0.20536    0.54499   0.377 0.706315    
## `Frequency> 3 times`             -1.80115    0.46243  -3.895 9.82e-05 ***
## `Temporary.Stop.Number1 stop`    -1.74861    0.39604  -4.415 1.01e-05 ***
## `Temporary.Stop.Number>=2 stops`  0.13088    0.34701   0.377 0.706052    
## `Age25-60`                       -0.45058    0.30490  -1.478 0.139470    
## `Age>60`                          2.66331    0.55312   4.815 1.47e-06 ***
## Motor.CertificateYes             -1.59017    0.32545  -4.886 1.03e-06 ***
## Travel.Period                     0.04184    0.00925   4.524 6.08e-06 ***
## Cost                             -0.09852    0.02766  -3.562 0.000368 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 411.48  on 794  degrees of freedom
## AIC: 441.48
## 
## Number of Fisher Scoring iterations: 7
  ## Các thông tin về khả năng dự báo của mô hình bằng confusionMatrix()
predmg <- predict(mgtrain, newdata = datBus)
confusionMatrix(data = predmg, datBus$Bus)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  691  31
##        Yes  12  75
##                                           
##                Accuracy : 0.9468          
##                  95% CI : (0.9291, 0.9613)
##     No Information Rate : 0.869           
##     P-Value [Acc > NIR] : 1.955e-13       
##                                           
##                   Kappa : 0.7474          
##                                           
##  Mcnemar's Test P-Value : 0.006052        
##                                           
##             Sensitivity : 0.9829          
##             Specificity : 0.7075          
##          Pos Pred Value : 0.9571          
##          Neg Pred Value : 0.8621          
##              Prevalence : 0.8690          
##          Detection Rate : 0.8541          
##    Detection Prevalence : 0.8925          
##       Balanced Accuracy : 0.8452          
##                                           
##        'Positive' Class : No              
## 
predmg2 <- predict(mg2train, newdata = datBus)
confusionMatrix(data = predmg2, datBus$Bus)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  No Yes
##        No  688  61
##        Yes  15  45
##                                           
##                Accuracy : 0.9061          
##                  95% CI : (0.8838, 0.9253)
##     No Information Rate : 0.869           
##     P-Value [Acc > NIR] : 0.0006901       
##                                           
##                   Kappa : 0.4943          
##                                           
##  Mcnemar's Test P-Value : 2.445e-07       
##                                           
##             Sensitivity : 0.9787          
##             Specificity : 0.4245          
##          Pos Pred Value : 0.9186          
##          Neg Pred Value : 0.7500          
##              Prevalence : 0.8690          
##          Detection Rate : 0.8504          
##    Detection Prevalence : 0.9258          
##       Balanced Accuracy : 0.7016          
##                                           
##        'Positive' Class : No              
## 
  ## Lấy dataframe với thông tin Accuracy của mô hình
a = mgtrain$resample
a = a[, -3] # Trừ cột resample
a$Mohinh = "Logitmg" # Thêm biến mô hình
a
##      Accuracy     Kappa  Mohinh
## 1   0.9250000 0.6250000 Logitmg
## 2   0.9382716 0.7028613 Logitmg
## 3   0.9625000 0.8032787 Logitmg
## 4   0.9146341 0.6178429 Logitmg
## 5   0.9125000 0.6164384 Logitmg
## 6   0.9146341 0.5451664 Logitmg
## 7   0.9259259 0.6255778 Logitmg
## 8   0.9259259 0.6273006 Logitmg
## 9   0.9135802 0.5445783 Logitmg
## 10  0.9506173 0.7721519 Logitmg
## 11  0.9750000 0.8750000 Logitmg
## 12  0.9259259 0.6582278 Logitmg
## 13  0.9125000 0.6164384 Logitmg
## 14  0.9259259 0.6582278 Logitmg
## 15  0.9500000 0.7241379 Logitmg
## 16  0.8888889 0.3461883 Logitmg
## 17  0.9634146 0.8219971 Logitmg
## 18  0.9259259 0.6582278 Logitmg
## 19  0.9146341 0.5846599 Logitmg
## 20  0.9135802 0.5445783 Logitmg
## 21  0.9753086 0.8860759 Logitmg
## 22  0.9259259 0.6577465 Logitmg
## 23  0.9506173 0.7721519 Logitmg
## 24  0.9012346 0.3851992 Logitmg
## 25  0.9012346 0.5030675 Logitmg
## 26  0.9629630 0.8217168 Logitmg
## 27  0.9135802 0.6454034 Logitmg
## 28  0.9135802 0.5445783 Logitmg
## 29  0.9125000 0.5409836 Logitmg
## 30  0.9012346 0.5007704 Logitmg
## 31  0.9382716 0.6725950 Logitmg
## 32  0.9259259 0.6255778 Logitmg
## 33  0.8765432 0.5114596 Logitmg
## 34  0.9506173 0.7515337 Logitmg
## 35  0.9629630 0.8480300 Logitmg
## 36  0.9135802 0.6454034 Logitmg
## 37  0.9750000 0.8750000 Logitmg
## 38  0.9135802 0.4968944 Logitmg
## 39  0.9500000 0.7241379 Logitmg
## 40  0.9390244 0.7033285 Logitmg
## 41  0.9506173 0.7515337 Logitmg
## 42  0.9375000 0.7014925 Logitmg
## 43  0.9375000 0.7014925 Logitmg
## 44  0.9012346 0.5030675 Logitmg
## 45  0.9382716 0.7028613 Logitmg
## 46  0.9625000 0.8032787 Logitmg
## 47  0.9012346 0.5443038 Logitmg
## 48  0.9512195 0.7725381 Logitmg
## 49  0.9629630 0.8359217 Logitmg
## 50  0.9024390 0.5450763 Logitmg
## 51  0.9512195 0.7518911 Logitmg
## 52  0.9382716 0.7028613 Logitmg
## 53  0.9625000 0.8032787 Logitmg
## 54  0.9250000 0.6571429 Logitmg
## 55  0.9146341 0.6178429 Logitmg
## 56  0.9012346 0.5030675 Logitmg
## 57  0.9500000 0.7894737 Logitmg
## 58  0.9375000 0.6363636 Logitmg
## 59  0.8765432 0.3788344 Logitmg
## 60  0.9390244 0.7270306 Logitmg
## 61  0.9125000 0.5409836 Logitmg
## 62  0.9250000 0.6250000 Logitmg
## 63  0.9024390 0.4542429 Logitmg
## 64  0.9250000 0.6571429 Logitmg
## 65  0.9506173 0.7721519 Logitmg
## 66  0.9390244 0.7033285 Logitmg
## 67  0.9012346 0.5443038 Logitmg
## 68  0.9382716 0.6725950 Logitmg
## 69  0.9382716 0.7028613 Logitmg
## 70  0.9382716 0.7265361 Logitmg
## 71  0.9250000 0.6842105 Logitmg
## 72  0.9506173 0.7721519 Logitmg
## 73  0.9390244 0.7033285 Logitmg
## 74  0.8765432 0.5114596 Logitmg
## 75  0.9259259 0.6582278 Logitmg
## 76  0.9382716 0.6746988 Logitmg
## 77  0.9382716 0.6725950 Logitmg
## 78  0.9125000 0.4909091 Logitmg
## 79  0.9506173 0.7721519 Logitmg
## 80  0.9382716 0.7019868 Logitmg
## 81  0.9506173 0.7896104 Logitmg
## 82  0.9135802 0.6171506 Logitmg
## 83  0.8658537 0.3473227 Logitmg
## 84  0.9506173 0.7503852 Logitmg
## 85  0.9506173 0.7515337 Logitmg
## 86  0.9500000 0.7500000 Logitmg
## 87  0.9382716 0.7265361 Logitmg
## 88  0.9375000 0.7014925 Logitmg
## 89  0.9024390 0.4542429 Logitmg
## 90  0.9500000 0.7500000 Logitmg
## 91  0.9012346 0.5443038 Logitmg
## 92  0.9024390 0.5037821 Logitmg
## 93  0.8888889 0.4144578 Logitmg
## 94  0.9250000 0.6571429 Logitmg
## 95  0.9625000 0.8356164 Logitmg
## 96  0.9506173 0.7503852 Logitmg
## 97  0.9125000 0.6455696 Logitmg
## 98  0.9506173 0.7515337 Logitmg
## 99  0.9259259 0.6582278 Logitmg
## 100 0.9146341 0.4973730 Logitmg
b = mg2train$resample
b = b[, -3]
b$Mohinh = "Logitmg2"
b
##      Accuracy      Kappa   Mohinh
## 1   0.8875000 0.40983607 Logitmg2
## 2   0.8888889 0.26586103 Logitmg2
## 3   0.8641975 0.20940550 Logitmg2
## 4   0.8641975 0.20940550 Logitmg2
## 5   0.9512195 0.77253814 Logitmg2
## 6   0.8888889 0.46515040 Logitmg2
## 7   0.9012346 0.50306748 Logitmg2
## 8   0.9000000 0.44827586 Logitmg2
## 9   0.8888889 0.41067098 Logitmg2
## 10  0.9382716 0.67469880 Logitmg2
## 11  0.8625000 0.27868852 Logitmg2
## 12  0.9250000 0.58620690 Logitmg2
## 13  0.9000000 0.38461538 Logitmg2
## 14  0.8902439 0.35376532 Logitmg2
## 15  0.9382716 0.63677130 Logitmg2
## 16  0.8414634 0.34278668 Logitmg2
## 17  0.8518519 0.31645570 Logitmg2
## 18  0.9259259 0.59021922 Logitmg2
## 19  0.9135802 0.54457831 Logitmg2
## 20  0.8888889 0.35314996 Logitmg2
## 21  0.9125000 0.42857143 Logitmg2
## 22  0.9012346 0.45362563 Logitmg2
## 23  0.8518519 0.31645570 Logitmg2
## 24  0.8902439 0.27788650 Logitmg2
## 25  0.9000000 0.44827586 Logitmg2
## 26  0.8750000 0.42857143 Logitmg2
## 27  0.9629630 0.82171680 Logitmg2
## 28  0.9012346 0.44897959 Logitmg2
## 29  0.8765432 0.43037975 Logitmg2
## 30  0.9024390 0.54507628 Logitmg2
## 31  0.9135802 0.54457831 Logitmg2
## 32  0.8658537 0.28526149 Logitmg2
## 33  0.8888889 0.41445783 Logitmg2
## 34  0.9375000 0.67213115 Logitmg2
## 35  0.9012346 0.44897959 Logitmg2
## 36  0.9259259 0.62730061 Logitmg2
## 37  0.9125000 0.42857143 Logitmg2
## 38  0.8375000 0.28767123 Logitmg2
## 39  0.8765432 0.31703204 Logitmg2
## 40  0.9024390 0.50378215 Logitmg2
## 41  0.8658537 0.39946738 Logitmg2
## 42  0.9000000 0.38461538 Logitmg2
## 43  0.8641975 0.20940550 Logitmg2
## 44  0.9259259 0.59021922 Logitmg2
## 45  0.9382716 0.70286134 Logitmg2
## 46  0.8750000 0.31034483 Logitmg2
## 47  0.9259259 0.58673469 Logitmg2
## 48  0.8780488 0.37972769 Logitmg2
## 49  0.8888889 0.41445783 Logitmg2
## 50  0.9000000 0.44827586 Logitmg2
## 51  0.9135802 0.49689441 Logitmg2
## 52  0.9250000 0.58620690 Logitmg2
## 53  0.8888889 0.41445783 Logitmg2
## 54  0.9259259 0.59021922 Logitmg2
## 55  0.8518519 0.25460123 Logitmg2
## 56  0.8902439 0.35376532 Logitmg2
## 57  0.9135802 0.58278146 Logitmg2
## 58  0.8641975 0.27970897 Logitmg2
## 59  0.9012346 0.57922078 Logitmg2
## 60  0.9125000 0.42857143 Logitmg2
## 61  0.9146341 0.49737303 Logitmg2
## 62  0.8875000 0.34545455 Logitmg2
## 63  0.8875000 0.34545455 Logitmg2
## 64  0.9012346 0.50306748 Logitmg2
## 65  0.8750000 0.37500000 Logitmg2
## 66  0.9012346 0.50306748 Logitmg2
## 67  0.9146341 0.61784288 Logitmg2
## 68  0.8658537 0.11741683 Logitmg2
## 69  0.9000000 0.50000000 Logitmg2
## 70  0.8888889 0.54409006 Logitmg2
## 71  0.8750000 0.31034483 Logitmg2
## 72  0.8765432 0.31703204 Logitmg2
## 73  0.9135802 0.58400587 Logitmg2
## 74  0.9000000 0.38461538 Logitmg2
## 75  0.9375000 0.63636364 Logitmg2
## 76  0.8750000 0.37500000 Logitmg2
## 77  0.9024390 0.50378215 Logitmg2
## 78  0.8780488 0.43134535 Logitmg2
## 79  0.8888889 0.27750248 Logitmg2
## 80  0.9146341 0.61784288 Logitmg2
## 81  0.9000000 0.54285714 Logitmg2
## 82  0.9250000 0.58620690 Logitmg2
## 83  0.9268293 0.62783661 Logitmg2
## 84  0.8536585 0.09057301 Logitmg2
## 85  0.8641975 0.34629494 Logitmg2
## 86  0.9259259 0.59021922 Logitmg2
## 87  0.8875000 0.40983607 Logitmg2
## 88  0.8888889 0.46515040 Logitmg2
## 89  0.9012346 0.45362563 Logitmg2
## 90  0.9012346 0.38519924 Logitmg2
## 91  0.8888889 0.46357616 Logitmg2
## 92  0.9012346 0.57922078 Logitmg2
## 93  0.9250000 0.53846154 Logitmg2
## 94  0.8518519 0.18043845 Logitmg2
## 95  0.8888889 0.35314996 Logitmg2
## 96  0.9135802 0.49689441 Logitmg2
## 97  0.9012346 0.54430380 Logitmg2
## 98  0.9250000 0.53846154 Logitmg2
## 99  0.9259259 0.53889943 Logitmg2
## 100 0.9146341 0.61784288 Logitmg2
resamplemod = rbind(a, b) # Hợp nhất 2 data frame a và b
resamplemod
##      Accuracy      Kappa   Mohinh
## 1   0.9250000 0.62500000  Logitmg
## 2   0.9382716 0.70286134  Logitmg
## 3   0.9625000 0.80327869  Logitmg
## 4   0.9146341 0.61784288  Logitmg
## 5   0.9125000 0.61643836  Logitmg
## 6   0.9146341 0.54516640  Logitmg
## 7   0.9259259 0.62557781  Logitmg
## 8   0.9259259 0.62730061  Logitmg
## 9   0.9135802 0.54457831  Logitmg
## 10  0.9506173 0.77215190  Logitmg
## 11  0.9750000 0.87500000  Logitmg
## 12  0.9259259 0.65822785  Logitmg
## 13  0.9125000 0.61643836  Logitmg
## 14  0.9259259 0.65822785  Logitmg
## 15  0.9500000 0.72413793  Logitmg
## 16  0.8888889 0.34618834  Logitmg
## 17  0.9634146 0.82199711  Logitmg
## 18  0.9259259 0.65822785  Logitmg
## 19  0.9146341 0.58465991  Logitmg
## 20  0.9135802 0.54457831  Logitmg
## 21  0.9753086 0.88607595  Logitmg
## 22  0.9259259 0.65774648  Logitmg
## 23  0.9506173 0.77215190  Logitmg
## 24  0.9012346 0.38519924  Logitmg
## 25  0.9012346 0.50306748  Logitmg
## 26  0.9629630 0.82171680  Logitmg
## 27  0.9135802 0.64540338  Logitmg
## 28  0.9135802 0.54457831  Logitmg
## 29  0.9125000 0.54098361  Logitmg
## 30  0.9012346 0.50077042  Logitmg
## 31  0.9382716 0.67259499  Logitmg
## 32  0.9259259 0.62557781  Logitmg
## 33  0.8765432 0.51145959  Logitmg
## 34  0.9506173 0.75153374  Logitmg
## 35  0.9629630 0.84803002  Logitmg
## 36  0.9135802 0.64540338  Logitmg
## 37  0.9750000 0.87500000  Logitmg
## 38  0.9135802 0.49689441  Logitmg
## 39  0.9500000 0.72413793  Logitmg
## 40  0.9390244 0.70332851  Logitmg
## 41  0.9506173 0.75153374  Logitmg
## 42  0.9375000 0.70149254  Logitmg
## 43  0.9375000 0.70149254  Logitmg
## 44  0.9012346 0.50306748  Logitmg
## 45  0.9382716 0.70286134  Logitmg
## 46  0.9625000 0.80327869  Logitmg
## 47  0.9012346 0.54430380  Logitmg
## 48  0.9512195 0.77253814  Logitmg
## 49  0.9629630 0.83592167  Logitmg
## 50  0.9024390 0.54507628  Logitmg
## 51  0.9512195 0.75189107  Logitmg
## 52  0.9382716 0.70286134  Logitmg
## 53  0.9625000 0.80327869  Logitmg
## 54  0.9250000 0.65714286  Logitmg
## 55  0.9146341 0.61784288  Logitmg
## 56  0.9012346 0.50306748  Logitmg
## 57  0.9500000 0.78947368  Logitmg
## 58  0.9375000 0.63636364  Logitmg
## 59  0.8765432 0.37883436  Logitmg
## 60  0.9390244 0.72703063  Logitmg
## 61  0.9125000 0.54098361  Logitmg
## 62  0.9250000 0.62500000  Logitmg
## 63  0.9024390 0.45424293  Logitmg
## 64  0.9250000 0.65714286  Logitmg
## 65  0.9506173 0.77215190  Logitmg
## 66  0.9390244 0.70332851  Logitmg
## 67  0.9012346 0.54430380  Logitmg
## 68  0.9382716 0.67259499  Logitmg
## 69  0.9382716 0.70286134  Logitmg
## 70  0.9382716 0.72653612  Logitmg
## 71  0.9250000 0.68421053  Logitmg
## 72  0.9506173 0.77215190  Logitmg
## 73  0.9390244 0.70332851  Logitmg
## 74  0.8765432 0.51145959  Logitmg
## 75  0.9259259 0.65822785  Logitmg
## 76  0.9382716 0.67469880  Logitmg
## 77  0.9382716 0.67259499  Logitmg
## 78  0.9125000 0.49090909  Logitmg
## 79  0.9506173 0.77215190  Logitmg
## 80  0.9382716 0.70198675  Logitmg
## 81  0.9506173 0.78961039  Logitmg
## 82  0.9135802 0.61715057  Logitmg
## 83  0.8658537 0.34732272  Logitmg
## 84  0.9506173 0.75038521  Logitmg
## 85  0.9506173 0.75153374  Logitmg
## 86  0.9500000 0.75000000  Logitmg
## 87  0.9382716 0.72653612  Logitmg
## 88  0.9375000 0.70149254  Logitmg
## 89  0.9024390 0.45424293  Logitmg
## 90  0.9500000 0.75000000  Logitmg
## 91  0.9012346 0.54430380  Logitmg
## 92  0.9024390 0.50378215  Logitmg
## 93  0.8888889 0.41445783  Logitmg
## 94  0.9250000 0.65714286  Logitmg
## 95  0.9625000 0.83561644  Logitmg
## 96  0.9506173 0.75038521  Logitmg
## 97  0.9125000 0.64556962  Logitmg
## 98  0.9506173 0.75153374  Logitmg
## 99  0.9259259 0.65822785  Logitmg
## 100 0.9146341 0.49737303  Logitmg
## 101 0.8875000 0.40983607 Logitmg2
## 102 0.8888889 0.26586103 Logitmg2
## 103 0.8641975 0.20940550 Logitmg2
## 104 0.8641975 0.20940550 Logitmg2
## 105 0.9512195 0.77253814 Logitmg2
## 106 0.8888889 0.46515040 Logitmg2
## 107 0.9012346 0.50306748 Logitmg2
## 108 0.9000000 0.44827586 Logitmg2
## 109 0.8888889 0.41067098 Logitmg2
## 110 0.9382716 0.67469880 Logitmg2
## 111 0.8625000 0.27868852 Logitmg2
## 112 0.9250000 0.58620690 Logitmg2
## 113 0.9000000 0.38461538 Logitmg2
## 114 0.8902439 0.35376532 Logitmg2
## 115 0.9382716 0.63677130 Logitmg2
## 116 0.8414634 0.34278668 Logitmg2
## 117 0.8518519 0.31645570 Logitmg2
## 118 0.9259259 0.59021922 Logitmg2
## 119 0.9135802 0.54457831 Logitmg2
## 120 0.8888889 0.35314996 Logitmg2
## 121 0.9125000 0.42857143 Logitmg2
## 122 0.9012346 0.45362563 Logitmg2
## 123 0.8518519 0.31645570 Logitmg2
## 124 0.8902439 0.27788650 Logitmg2
## 125 0.9000000 0.44827586 Logitmg2
## 126 0.8750000 0.42857143 Logitmg2
## 127 0.9629630 0.82171680 Logitmg2
## 128 0.9012346 0.44897959 Logitmg2
## 129 0.8765432 0.43037975 Logitmg2
## 130 0.9024390 0.54507628 Logitmg2
## 131 0.9135802 0.54457831 Logitmg2
## 132 0.8658537 0.28526149 Logitmg2
## 133 0.8888889 0.41445783 Logitmg2
## 134 0.9375000 0.67213115 Logitmg2
## 135 0.9012346 0.44897959 Logitmg2
## 136 0.9259259 0.62730061 Logitmg2
## 137 0.9125000 0.42857143 Logitmg2
## 138 0.8375000 0.28767123 Logitmg2
## 139 0.8765432 0.31703204 Logitmg2
## 140 0.9024390 0.50378215 Logitmg2
## 141 0.8658537 0.39946738 Logitmg2
## 142 0.9000000 0.38461538 Logitmg2
## 143 0.8641975 0.20940550 Logitmg2
## 144 0.9259259 0.59021922 Logitmg2
## 145 0.9382716 0.70286134 Logitmg2
## 146 0.8750000 0.31034483 Logitmg2
## 147 0.9259259 0.58673469 Logitmg2
## 148 0.8780488 0.37972769 Logitmg2
## 149 0.8888889 0.41445783 Logitmg2
## 150 0.9000000 0.44827586 Logitmg2
## 151 0.9135802 0.49689441 Logitmg2
## 152 0.9250000 0.58620690 Logitmg2
## 153 0.8888889 0.41445783 Logitmg2
## 154 0.9259259 0.59021922 Logitmg2
## 155 0.8518519 0.25460123 Logitmg2
## 156 0.8902439 0.35376532 Logitmg2
## 157 0.9135802 0.58278146 Logitmg2
## 158 0.8641975 0.27970897 Logitmg2
## 159 0.9012346 0.57922078 Logitmg2
## 160 0.9125000 0.42857143 Logitmg2
## 161 0.9146341 0.49737303 Logitmg2
## 162 0.8875000 0.34545455 Logitmg2
## 163 0.8875000 0.34545455 Logitmg2
## 164 0.9012346 0.50306748 Logitmg2
## 165 0.8750000 0.37500000 Logitmg2
## 166 0.9012346 0.50306748 Logitmg2
## 167 0.9146341 0.61784288 Logitmg2
## 168 0.8658537 0.11741683 Logitmg2
## 169 0.9000000 0.50000000 Logitmg2
## 170 0.8888889 0.54409006 Logitmg2
## 171 0.8750000 0.31034483 Logitmg2
## 172 0.8765432 0.31703204 Logitmg2
## 173 0.9135802 0.58400587 Logitmg2
## 174 0.9000000 0.38461538 Logitmg2
## 175 0.9375000 0.63636364 Logitmg2
## 176 0.8750000 0.37500000 Logitmg2
## 177 0.9024390 0.50378215 Logitmg2
## 178 0.8780488 0.43134535 Logitmg2
## 179 0.8888889 0.27750248 Logitmg2
## 180 0.9146341 0.61784288 Logitmg2
## 181 0.9000000 0.54285714 Logitmg2
## 182 0.9250000 0.58620690 Logitmg2
## 183 0.9268293 0.62783661 Logitmg2
## 184 0.8536585 0.09057301 Logitmg2
## 185 0.8641975 0.34629494 Logitmg2
## 186 0.9259259 0.59021922 Logitmg2
## 187 0.8875000 0.40983607 Logitmg2
## 188 0.8888889 0.46515040 Logitmg2
## 189 0.9012346 0.45362563 Logitmg2
## 190 0.9012346 0.38519924 Logitmg2
## 191 0.8888889 0.46357616 Logitmg2
## 192 0.9012346 0.57922078 Logitmg2
## 193 0.9250000 0.53846154 Logitmg2
## 194 0.8518519 0.18043845 Logitmg2
## 195 0.8888889 0.35314996 Logitmg2
## 196 0.9135802 0.49689441 Logitmg2
## 197 0.9012346 0.54430380 Logitmg2
## 198 0.9250000 0.53846154 Logitmg2
## 199 0.9259259 0.53889943 Logitmg2
## 200 0.9146341 0.61784288 Logitmg2
   ## Phân tích hình ảnh
library(ggplot2)
library(ggthemes)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
h1 <- ggplot(resamplemod) + geom_boxplot(aes(x = Mohinh, y = Accuracy, fill = Mohinh)) + coord_flip() + geom_hline(yintercept = 0.8701, color = "blue") + theme_wsj() # Giá trị 0.8701 = 87.01% là tỷ lệ nhóm chiếm ưu thế (không sử dụng xe buýt)
h1 # Nhận xét: Cả 2 mô hình đều có khả năng phân loại tốt - độ chính xác accuracy của cả 2 mô hình đều lớn hơn 87.01% (đường màu xanh)

h2 <- ggplot(resamplemod, aes(Accuracy, fill = Mohinh)) + geom_density(alpha = 0.3) + theme_wsj() + facet_wrap(~ Mohinh)
h2

   ## Kiểm định t-test để đánh giá sự khác biệt về Accuracy và Kappa của 2 mô hình: p<0.05 có ý nghĩa thống kê về sự khác biệt Accuracy và Kappa của 2 mô hình
t.test(resamplemod$Accuracy ~ resamplemod$Mohinh)
## 
##  Welch Two Sample t-test
## 
## data:  resamplemod$Accuracy by resamplemod$Mohinh
## t = 9.3916, df = 197.24, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.02551030 0.03907123
## sample estimates:
##  mean in group Logitmg mean in group Logitmg2 
##              0.9293400              0.8970493
summary(a)
##     Accuracy          Kappa           Mohinh         
##  Min.   :0.8659   Min.   :0.3462   Length:100        
##  1st Qu.:0.9136   1st Qu.:0.5450   Class :character  
##  Median :0.9259   Median :0.6582   Mode  :character  
##  Mean   :0.9293   Mean   :0.6545                     
##  3rd Qu.:0.9502   3rd Qu.:0.7504                     
##  Max.   :0.9753   Max.   :0.8861
summary(b)
##     Accuracy          Kappa            Mohinh         
##  Min.   :0.8375   Min.   :0.09057   Length:100        
##  1st Qu.:0.8780   1st Qu.:0.35315   Class :character  
##  Median :0.9000   Median :0.44828   Mode  :character  
##  Mean   :0.8970   Mean   :0.45014                     
##  3rd Qu.:0.9136   3rd Qu.:0.54470                     
##  Max.   :0.9630   Max.   :0.82172
t.test(resamplemod$Kappa ~ resamplemod$Mohinh)
## 
##  Welch Two Sample t-test
## 
## data:  resamplemod$Kappa by resamplemod$Mohinh
## t = 11.036, df = 194.82, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1678119 0.2408437
## sample estimates:
##  mean in group Logitmg mean in group Logitmg2 
##              0.6544642              0.4501364
# 4.9. Calculate AUC (Area under Curve) - Diện tích dưới đường cong ROC (Receiver Operating Characteristics), đánh giá mức độ lồi về phía trên của đường ROC (Tiêu chí thường được sử dụng khi so sánh các mô hình phân loại với nhau): sử dụng hàm roc trong gói "pROC": AUC càng gần 1 càng tốt, >0.7 xem như chấp nhận được
library(pROC)
## Warning: package 'pROC' was built under R version 3.6.3
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
## 
##     ci
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
   ## Mô hình mg 
mgtrain1 <- train(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, method = "glm", family = "binomial", data = datBus)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mgtrain1)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5799  -0.2733  -0.1226  -0.0225   3.5052  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)
## (Intercept)                        0.693170   1.122728   0.617 0.536972
## Central.AreaYes                    1.153570   0.369569   3.121 0.001800
## `PurposePicking Children`         -0.846264   0.775170  -1.092 0.274959
## PurposeEntertainment              -2.128209   0.613702  -3.468 0.000525
## PurposeOthers                     -0.241582   0.938275  -0.257 0.796812
## `Frequency2 times`                 1.663772   0.810579   2.053 0.040114
## `Frequency3 times`                 0.551286   0.817643   0.674 0.500160
## `Frequency> 3 times`              -1.081913   0.669912  -1.615 0.106309
## `Temporary.Stop.Number1 stop`     -2.088256   0.549980  -3.797 0.000146
## `Temporary.Stop.Number>=2 stops`   0.007966   0.491717   0.016 0.987075
## WeatherRainny                      1.875750   1.022332   1.835 0.066539
## WeatherCool                       -0.560382   0.483314  -1.159 0.246270
## Bus.Stop.PresentYes               -0.648223   0.566922  -1.143 0.252869
## `Bus.Stop.PresentDon't know`      -1.082582   0.864050  -1.253 0.210236
## `Age25-60`                        -0.681496   0.689868  -0.988 0.323219
## `Age>60`                           2.254365   0.977059   2.307 0.021038
## `OccupationOfficers/Workers`       0.203182   0.735185   0.276 0.782265
## `OccupationHousewife/Unemployed`   2.166113   1.018430   2.127 0.033427
## `OccupationFree labor`            -0.369467   0.702315  -0.526 0.598840
## OccupationOthers                   1.387882   0.899394   1.543 0.122799
## `Income(8-15) millions`           -0.937300   0.434752  -2.156 0.031088
## `Income(15-25) millions`          -0.285344   0.510462  -0.559 0.576167
## `Income>25 millions`              -2.316472   1.157201  -2.002 0.045307
## `Number.of.Children1 child`       -0.666788   0.428135  -1.557 0.119370
## `Number.of.Children2 children`    -0.302515   0.608433  -0.497 0.619045
## `Number.of.Children>= 3 children` -0.920554   1.460357  -0.630 0.528457
## Motor.CertificateYes              -1.343825   0.544182  -2.469 0.013532
## Car.CertificateYes                 0.038586   0.945690   0.041 0.967454
## Motor.OwningYes                   -0.746040   0.527259  -1.415 0.157086
## Car.OwningYes                      0.036269   1.448918   0.025 0.980030
## Number.of.Motors1                  0.618812   0.808973   0.765 0.444310
## Number.of.Motors2                  0.222040   0.802489   0.277 0.782019
## Number.of.Motors3                 -0.523223   0.881065  -0.594 0.552610
## `Number.of.Motors>3`              -0.510007   1.380742  -0.369 0.711851
## Number.of.Car1                    -1.162398   0.858510  -1.354 0.175745
## `Number.of.Car>=2`                -1.072850   2.226960  -0.482 0.629980
## Distance                           0.688046   0.098829   6.962 3.36e-12
## Travel.Period                      0.009410   0.015030   0.626 0.531270
## Cost                              -0.618352   0.094949  -6.512 7.39e-11
##                                      
## (Intercept)                          
## Central.AreaYes                   ** 
## `PurposePicking Children`            
## PurposeEntertainment              ***
## PurposeOthers                        
## `Frequency2 times`                *  
## `Frequency3 times`                   
## `Frequency> 3 times`                 
## `Temporary.Stop.Number1 stop`     ***
## `Temporary.Stop.Number>=2 stops`     
## WeatherRainny                     .  
## WeatherCool                          
## Bus.Stop.PresentYes                  
## `Bus.Stop.PresentDon't know`         
## `Age25-60`                           
## `Age>60`                          *  
## `OccupationOfficers/Workers`         
## `OccupationHousewife/Unemployed`  *  
## `OccupationFree labor`               
## OccupationOthers                     
## `Income(8-15) millions`           *  
## `Income(15-25) millions`             
## `Income>25 millions`              *  
## `Number.of.Children1 child`          
## `Number.of.Children2 children`       
## `Number.of.Children>= 3 children`    
## Motor.CertificateYes              *  
## Car.CertificateYes                   
## Motor.OwningYes                      
## Car.OwningYes                        
## Number.of.Motors1                    
## Number.of.Motors2                    
## Number.of.Motors3                    
## `Number.of.Motors>3`                 
## Number.of.Car1                       
## `Number.of.Car>=2`                   
## Distance                          ***
## Travel.Period                        
## Cost                              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 254.29  on 770  degrees of freedom
## AIC: 332.29
## 
## Number of Fisher Scoring iterations: 9
pred1mg <- predict(mgtrain1, newdata = datBus, type = "prob")
rmg <- roc(datBus$Bus, pred1mg$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg$auc
## Area under the curve: 0.942
plot(rmg, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)

plot(smooth(rmg), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)

   ## Có thể dùng các lệnh đơn giản
auc(rmg)
## Area under the curve: 0.942
plot.roc(rmg)

ci(rmg)
## 95% CI: 0.9137-0.9702 (DeLong)
   ## Mô hình mg2 
mg2train1 <- train(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, data = datBus, method = "glm", family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mg2train1)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4647  -0.4365  -0.2316  -0.1143   3.1417  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       0.77606    0.54706   1.419 0.156015    
## Central.AreaYes                   0.80313    0.27165   2.957 0.003111 ** 
## `PurposePicking Children`        -1.02909    0.56419  -1.824 0.068150 .  
## PurposeEntertainment             -2.36946    0.44919  -5.275 1.33e-07 ***
## PurposeOthers                    -0.97031    0.73516  -1.320 0.186878    
## `Frequency2 times`                0.46866    0.55317   0.847 0.396874    
## `Frequency3 times`                0.20536    0.54499   0.377 0.706315    
## `Frequency> 3 times`             -1.80115    0.46243  -3.895 9.82e-05 ***
## `Temporary.Stop.Number1 stop`    -1.74861    0.39604  -4.415 1.01e-05 ***
## `Temporary.Stop.Number>=2 stops`  0.13088    0.34701   0.377 0.706052    
## `Age25-60`                       -0.45058    0.30490  -1.478 0.139470    
## `Age>60`                          2.66331    0.55312   4.815 1.47e-06 ***
## Motor.CertificateYes             -1.59017    0.32545  -4.886 1.03e-06 ***
## Travel.Period                     0.04184    0.00925   4.524 6.08e-06 ***
## Cost                             -0.09852    0.02766  -3.562 0.000368 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 411.48  on 794  degrees of freedom
## AIC: 441.48
## 
## Number of Fisher Scoring iterations: 7
pred1mg2 <- predict(mg2train1, newdata = datBus, type = "prob")
rmg2 <- roc(datBus$Bus, pred1mg2$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg2$auc
## Area under the curve: 0.882
auc(rmg2)
## Area under the curve: 0.882
plot.roc(rmg2)

ci(rmg2)
## 95% CI: 0.8444-0.9196 (DeLong)
plot(rmg2, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)

plot(smooth(rmg2), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)

   ## Tính hệ số Gini = 2AUC-1
Ginimg <- 2*(rmg$auc)-1
Ginimg
## [1] 0.8839207
Ginimg2 <- 2*(rmg2$auc)-1
Ginimg2
## [1] 0.7640302

5. Importance of variables in models

library(caret)
varImp(mg2)
##                                  Overall
## Central.AreaYes                2.9565217
## PurposePicking Children        1.8240135
## PurposeEntertainment           5.2749505
## PurposeOthers                  1.3198716
## Frequency2 times               0.8472177
## Frequency3 times               0.3768097
## Frequency> 3 times             3.8949517
## Temporary.Stop.Number1 stop    4.4152481
## Temporary.Stop.Number>=2 stops 0.3771633
## Age25-60                       1.4777667
## Age>60                         4.8150353
## Motor.CertificateYes           4.8860187
## Travel.Period                  4.5237408
## Cost                           3.5621070
plot(varImp(mg2))

varImp(mg)
##                                    Overall
## Central.AreaYes                 3.12139274
## PurposePicking Children         1.09171417
## PurposeEntertainment            3.46782157
## PurposeOthers                   0.25747503
## Frequency2 times                2.05257212
## Frequency3 times                0.67423751
## Frequency> 3 times              1.61500743
## Temporary.Stop.Number1 stop     3.79696941
## Temporary.Stop.Number>=2 stops  0.01620000
## WeatherRainny                   1.83477545
## WeatherCool                     1.15945771
## Bus.Stop.PresentYes             1.14340863
## Bus.Stop.PresentDon't know      1.25291614
## Age25-60                        0.98786437
## Age>60                          2.30729503
## OccupationOfficers/Workers      0.27636808
## OccupationHousewife/Unemployed  2.12691327
## OccupationFree labor            0.52606942
## OccupationOthers                1.54313029
## Income(8-15) millions           2.15594049
## Income(15-25) millions          0.55899226
## Income>25 millions              2.00178931
## Number.of.Children1 child       1.55742325
## Number.of.Children2 children    0.49720424
## Number.of.Children>= 3 children 0.63036243
## Motor.CertificateYes            2.46944311
## Car.CertificateYes              0.04080213
## Motor.OwningYes                 1.41494082
## Car.OwningYes                   0.02503163
## Number.of.Motors1               0.76493566
## Number.of.Motors2               0.27668869
## Number.of.Motors3               0.59385335
## Number.of.Motors>3              0.36937161
## Number.of.Car1                  1.35397226
## Number.of.Car>=2                0.48175552
## Distance                        6.96197233
## Travel.Period                   0.62606908
## Cost                            6.51245585

6. Probility of bus use - Prohibit model

# Prohibit Models
library(aod)
## Warning: package 'aod' was built under R version 3.6.3
## 
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
## 
##     rats
   ## Model mg
prohibitmg <- glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, family = binomial(link = "probit"), data = datBus)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(prohibitmg)
## 
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Weather + Bus.Stop.Present + Age + Occupation + Income + 
##     Number.of.Children + Motor.Certificate + Car.Certificate + 
##     Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + 
##     Distance + Travel.Period + Cost, family = binomial(link = "probit"), 
##     data = datBus)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6426  -0.3016  -0.1042  -0.0043   3.5865  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      0.402449   0.584282   0.689 0.490954    
## Central.AreaYes                  0.584441   0.183328   3.188 0.001433 ** 
## PurposePicking Children         -0.516126   0.389421  -1.325 0.185050    
## PurposeEntertainment            -1.098028   0.309321  -3.550 0.000386 ***
## PurposeOthers                   -0.130938   0.478141  -0.274 0.784202    
## Frequency2 times                 0.847701   0.425218   1.994 0.046200 *  
## Frequency3 times                 0.308207   0.424978   0.725 0.468311    
## Frequency> 3 times              -0.549188   0.350443  -1.567 0.117085    
## Temporary.Stop.Number1 stop     -1.010253   0.257255  -3.927 8.60e-05 ***
## Temporary.Stop.Number>=2 stops  -0.003833   0.249232  -0.015 0.987730    
## WeatherRainny                    0.988161   0.553723   1.785 0.074330 .  
## WeatherCool                     -0.268920   0.235831  -1.140 0.254158    
## Bus.Stop.PresentYes             -0.371152   0.284494  -1.305 0.192027    
## Bus.Stop.PresentDon't know      -0.630963   0.421369  -1.497 0.134286    
## Age25-60                        -0.236704   0.343305  -0.689 0.490518    
## Age>60                           1.292995   0.493032   2.623 0.008728 ** 
## OccupationOfficers/Workers       0.014321   0.364257   0.039 0.968638    
## OccupationHousewife/Unemployed   0.956887   0.501140   1.909 0.056208 .  
## OccupationFree labor            -0.269399   0.348788  -0.772 0.439886    
## OccupationOthers                 0.629580   0.451720   1.394 0.163396    
## Income(8-15) millions           -0.523111   0.217347  -2.407 0.016093 *  
## Income(15-25) millions          -0.223731   0.256079  -0.874 0.382292    
## Income>25 millions              -1.286046   0.579842  -2.218 0.026560 *  
## Number.of.Children1 child       -0.341940   0.214248  -1.596 0.110489    
## Number.of.Children2 children    -0.187685   0.312806  -0.600 0.548505    
## Number.of.Children>= 3 children -0.287871   0.691383  -0.416 0.677140    
## Motor.CertificateYes            -0.597675   0.277590  -2.153 0.031312 *  
## Car.CertificateYes              -0.053636   0.470492  -0.114 0.909238    
## Motor.OwningYes                 -0.504480   0.260076  -1.940 0.052411 .  
## Car.OwningYes                    0.167182   0.702423   0.238 0.811876    
## Number.of.Motors1                0.466191   0.426873   1.092 0.274787    
## Number.of.Motors2                0.164327   0.415433   0.396 0.692433    
## Number.of.Motors3               -0.203764   0.451891  -0.451 0.652052    
## Number.of.Motors>3              -0.049101   0.630898  -0.078 0.937966    
## Number.of.Car1                  -0.651396   0.422560  -1.542 0.123184    
## Number.of.Car>=2                -0.548670   1.071047  -0.512 0.608459    
## Distance                         0.349429   0.049228   7.098 1.26e-12 ***
## Travel.Period                    0.001293   0.008218   0.157 0.874980    
## Cost                            -0.314582   0.047784  -6.583 4.60e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 262.71  on 770  degrees of freedom
## AIC: 340.71
## 
## Number of Fisher Scoring iterations: 10
   ## Model mg2
prohibitmg2 <- glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, family = binomial(link = "probit"), data = datBus)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(prohibitmg2)
## 
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + 
##     Age + Motor.Certificate + Travel.Period + Cost, family = binomial(link = "probit"), 
##     data = datBus)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3347  -0.4543  -0.2357  -0.0880   3.2910  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     0.321332   0.307844   1.044 0.296571    
## Central.AreaYes                 0.449759   0.142742   3.151 0.001628 ** 
## PurposePicking Children        -0.441866   0.283879  -1.557 0.119582    
## PurposeEntertainment           -1.176172   0.231109  -5.089 3.59e-07 ***
## PurposeOthers                  -0.428349   0.399398  -1.072 0.283501    
## Frequency2 times                0.250444   0.310939   0.805 0.420562    
## Frequency3 times                0.133451   0.305593   0.437 0.662334    
## Frequency> 3 times             -0.924517   0.259446  -3.563 0.000366 ***
## Temporary.Stop.Number1 stop    -0.858111   0.192319  -4.462 8.12e-06 ***
## Temporary.Stop.Number>=2 stops  0.072113   0.187312   0.385 0.700247    
## Age25-60                       -0.262606   0.158451  -1.657 0.097452 .  
## Age>60                          1.410811   0.291861   4.834 1.34e-06 ***
## Motor.CertificateYes           -0.855405   0.180384  -4.742 2.11e-06 ***
## Travel.Period                   0.021625   0.004887   4.425 9.66e-06 ***
## Cost                           -0.053923   0.014555  -3.705 0.000212 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 628.32  on 808  degrees of freedom
## Residual deviance: 414.71  on 794  degrees of freedom
## AIC: 444.71
## 
## Number of Fisher Scoring iterations: 8
# Đánh giá tác động tổng thể Purpose (Total effect of Travel Purpose) trong mô hinhg mg2 có ảnh hưởng hay không đến khả năng chọn xe buýt (thứ tự hệ số ứng với Purpose là từ 3:6)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 3:6) # Giá trị 19.8 ứng với p-value < 0.05 --> KL: tác động tổng thể về Purpose (mục đich chuyến đi) của người sử dụng lên khả năng chọn xe buýt là tồn tại và có ý nghĩa thống kê
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 26.2, df = 4, P(> X2) = 2.9e-05
# Đánh giá tác động tổng thể Temporary.Stop.Number (Total effect of Temporary.Stop.Number) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chọn xe buýt (thứ tự hệ số ứng với Temporary.Stop.Number là từ 11:13)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 11:13)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 63.4, df = 3, P(> X2) = 1.1e-13

6. Reason why choose or doesn’t choose bus

t = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 1-Mode choice\\SO LIEU R\\Bus choice in DN.csv"
MC = read.csv(t, header = T)
head(MC)
##   Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency
## 1           6                  1            0       3         3
## 2           3                  1            0       1         4
## 3           3                  1            0       1         4
## 4           3                  1            0       1         4
## 5           3                  1            0       1         4
## 6           4                  1            0       1         4
##   Departure.Time Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 1              1        2            10                  1             1
## 2              1        8            15                  1             1
## 3              3        5            15                  1             1
## 4              1        5            10                  1             1
## 5              3        8            15                  1             1
## 6              1       20            30                  1             1
##   Temporary.Stop.Number Mode.Choice.Reason Weather Weekend Non.Bus.Reason
## 1                     0                  4       1       0              1
## 2                     1                  2       3       0              1
## 3                     1                  4       1       0              2
## 4                     1                  2       1       0              4
## 5                     0                  2       3       0              2
## 6                     0                  5       3       0              2
##   Cost Bus.Stop.Present Gender Age Occupation Income Number.of.Children
## 1   12                1      0   5          4      2                  2
## 2    8                2      0   3          6      3                  1
## 3    5                1      1   3          2      4                  0
## 4    5                1      1   3          2      3                  0
## 5    8                2      0   4          6      3                  1
## 6   40                2      1   4          3      4                  2
##   Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1                 0               0              0            0          0
## 2                 1               0              1            1          0
## 3                 1               0              0            1          0
## 4                 1               0              1            1          0
## 5                 1               0              0            1          0
## 6                 1               1              1            1          1
##   Number.of.Bicycles Number.of.Motors Number.of.Car
## 1                  1                2             1
## 2                  1                3             0
## 3                  0                3             0
## 4                  1                3             0
## 5                  0                2             0
## 6                  1                2             1
# View data (number of rows and colume)
dim(MC) 
## [1] 809  30
# Sumerize Data
summary(MC) 
##   Travel.Mode    Bus.Stop.Condition  Central.Area      Purpose     
##  Min.   :1.000   Min.   :0.0000     Min.   :0.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:0.0000     1st Qu.:0.000   1st Qu.:1.000  
##  Median :3.000   Median :1.0000     Median :0.000   Median :1.000  
##  Mean   :3.663   Mean   :0.5117     Mean   :0.471   Mean   :1.576  
##  3rd Qu.:4.000   3rd Qu.:1.0000     3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :7.000   Max.   :1.0000     Max.   :1.000   Max.   :4.000  
##                                                                    
##    Frequency     Departure.Time     Distance      Travel.Period   
##  Min.   :1.000   Min.   :1.000   Min.   : 0.015   Min.   :  0.00  
##  1st Qu.:4.000   1st Qu.:1.000   1st Qu.: 3.000   1st Qu.: 10.00  
##  Median :4.000   Median :1.000   Median : 5.000   Median : 15.00  
##  Mean   :3.555   Mean   :1.832   Mean   : 6.654   Mean   : 17.55  
##  3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:10.000   3rd Qu.: 20.00  
##  Max.   :4.000   Max.   :4.000   Max.   :35.000   Max.   :180.00  
##                                                                   
##  Sidewalk.Clearance Lane.Separate    Temporary.Stop.Number
##  Min.   :0.000      Min.   :0.0000   Min.   :0.0000       
##  1st Qu.:1.000      1st Qu.:1.0000   1st Qu.:0.0000       
##  Median :1.000      Median :1.0000   Median :0.0000       
##  Mean   :0.864      Mean   :0.7886   Mean   :0.6922       
##  3rd Qu.:1.000      3rd Qu.:1.0000   3rd Qu.:1.0000       
##  Max.   :1.000      Max.   :1.0000   Max.   :3.0000       
##                                                           
##  Mode.Choice.Reason    Weather        Weekend       Non.Bus.Reason 
##  Min.   :1.000      Min.   :1.00   Min.   :0.0000   Min.   :1.000  
##  1st Qu.:2.000      1st Qu.:1.00   1st Qu.:0.0000   1st Qu.:2.000  
##  Median :2.000      Median :1.00   Median :0.0000   Median :2.000  
##  Mean   :2.714      Mean   :1.64   Mean   :0.2435   Mean   :2.882  
##  3rd Qu.:4.000      3rd Qu.:3.00   3rd Qu.:0.0000   3rd Qu.:4.000  
##  Max.   :6.000      Max.   :3.00   Max.   :1.0000   Max.   :6.000  
##                                                     NA's   :106    
##       Cost         Bus.Stop.Present     Gender           Age       
##  Min.   :  0.000   Min.   :0.000    Min.   :0.000   Min.   :1.000  
##  1st Qu.:  3.000   1st Qu.:1.000    1st Qu.:0.000   1st Qu.:3.000  
##  Median :  5.000   Median :1.000    Median :1.000   Median :4.000  
##  Mean   :  8.754   Mean   :1.005    Mean   :0.581   Mean   :3.677  
##  3rd Qu.: 10.000   3rd Qu.:1.000    3rd Qu.:1.000   3rd Qu.:4.000  
##  Max.   :285.000   Max.   :2.000    Max.   :1.000   Max.   :6.000  
##                                                                    
##    Occupation        Income      Number.of.Children Motor.Certificate
##  Min.   :1.000   Min.   :1.000   Min.   :0.0000     Min.   :0.0000   
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:0.0000     1st Qu.:1.0000   
##  Median :3.000   Median :2.000   Median :1.0000     Median :1.0000   
##  Mean   :4.298   Mean   :2.206   Mean   :0.7651     Mean   :0.8838   
##  3rd Qu.:7.000   3rd Qu.:3.000   3rd Qu.:1.0000     3rd Qu.:1.0000   
##  Max.   :8.000   Max.   :4.000   Max.   :3.0000     Max.   :1.0000   
##                                                                      
##  Car.Certificate  Bicycle.Owning    Motor.Owning      Car.Owning    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :1.0000   Median :0.0000  
##  Mean   :0.1916   Mean   :0.3956   Mean   :0.8183   Mean   :0.1471  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##  Number.of.Bicycles Number.of.Motors Number.of.Car   
##  Min.   :0.0000     Min.   :0.00     Min.   :0.0000  
##  1st Qu.:0.0000     1st Qu.:2.00     1st Qu.:0.0000  
##  Median :1.0000     Median :2.00     Median :0.0000  
##  Mean   :0.6873     Mean   :2.13     Mean   :0.2447  
##  3rd Qu.:1.0000     3rd Qu.:3.00     3rd Qu.:0.0000  
##  Max.   :3.0000     Max.   :4.00     Max.   :2.0000  
## 
# Create new variable - Bus 
MC$Bus[MC$Travel.Mode < 7] <- 0
MC$Bus[MC$Travel.Mode == 7] <- 1
# Vẽ biểu đồ reason why choose bus or doesn't choose bus
library(magrittr)
library(tidyverse)
library(ggplot2)
library(car)
# Lấy 1 phần dữ liệu gồm Bus, Mode.Choice.Reason, Non.Bus.Reason
str(MC)
## 'data.frame':    809 obs. of  31 variables:
##  $ Travel.Mode          : int  6 3 3 3 3 4 4 3 3 3 ...
##  $ Bus.Stop.Condition   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Central.Area         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Purpose              : int  3 1 1 1 1 1 1 1 1 1 ...
##  $ Frequency            : int  3 4 4 4 4 4 4 4 4 4 ...
##  $ Departure.Time       : int  1 1 3 1 3 1 1 1 2 1 ...
##  $ Distance             : num  2 8 5 5 8 20 15 10 12 10 ...
##  $ Travel.Period        : num  10 15 15 10 15 30 20 25 30 25 ...
##  $ Sidewalk.Clearance   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Lane.Separate        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Temporary.Stop.Number: int  0 1 1 1 0 0 0 0 1 1 ...
##  $ Mode.Choice.Reason   : int  4 2 4 2 2 5 5 2 2 2 ...
##  $ Weather              : int  1 3 1 1 3 3 3 3 1 3 ...
##  $ Weekend              : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ Non.Bus.Reason       : int  1 1 2 4 2 2 4 2 1 4 ...
##  $ Cost                 : int  12 8 5 5 8 40 30 10 12 10 ...
##  $ Bus.Stop.Present     : int  1 2 1 1 2 2 2 1 1 1 ...
##  $ Gender               : int  0 0 1 1 0 1 1 0 0 1 ...
##  $ Age                  : int  5 3 3 3 4 4 5 4 3 4 ...
##  $ Occupation           : int  4 6 2 2 6 3 3 7 7 3 ...
##  $ Income               : int  2 3 4 3 3 4 4 2 3 3 ...
##  $ Number.of.Children   : int  2 1 0 0 1 2 2 0 1 0 ...
##  $ Motor.Certificate    : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Car.Certificate      : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Bicycle.Owning       : int  0 1 0 1 0 1 0 0 1 0 ...
##  $ Motor.Owning         : int  0 1 1 1 1 1 1 1 1 1 ...
##  $ Car.Owning           : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Number.of.Bicycles   : int  1 1 0 1 0 1 0 0 1 0 ...
##  $ Number.of.Motors     : int  2 3 3 3 2 2 2 2 3 3 ...
##  $ Number.of.Car        : int  1 0 0 0 0 1 1 0 0 0 ...
##  $ Bus                  : num  0 0 0 0 0 0 0 0 0 0 ...
dat1 <- MC[,c(12,15,31)]
head(dat1)
##   Mode.Choice.Reason Non.Bus.Reason Bus
## 1                  4              1   0
## 2                  2              1   0
## 3                  4              2   0
## 4                  2              4   0
## 5                  2              2   0
## 6                  5              2   0
# Dữ liệu sử dụng xe buýt
datBusonly <- subset(dat1, Bus == 1)
datBusonly$Bus.Choice.Reason <- datBusonly$Mode.Choice.Reason
head(datBusonly)
##     Mode.Choice.Reason Non.Bus.Reason Bus Bus.Choice.Reason
## 57                   1             NA   1                 1
## 123                  1             NA   1                 1
## 234                  2             NA   1                 2
## 267                  2             NA   1                 2
## 327                  1             NA   1                 1
## 328                  2             NA   1                 2
str(datBusonly)
## 'data.frame':    106 obs. of  4 variables:
##  $ Mode.Choice.Reason: int  1 1 2 2 1 2 1 1 3 3 ...
##  $ Non.Bus.Reason    : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Bus               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Bus.Choice.Reason : int  1 1 2 2 1 2 1 1 3 3 ...
# reformat variables in factor variables
attach(datBusonly)
## The following objects are masked from MC:
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
datBusonly = within(datBusonly, {
  Bus.Choice.Reason = factor(Bus.Choice.Reason, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "others"))
  } )
head(datBusonly)
##     Mode.Choice.Reason Non.Bus.Reason Bus Bus.Choice.Reason
## 57                   1             NA   1            Safety
## 123                  1             NA   1            Safety
## 234                  2             NA   1       Comfortable
## 267                  2             NA   1       Comfortable
## 327                  1             NA   1            Safety
## 328                  2             NA   1       Comfortable
str(datBusonly)
## 'data.frame':    106 obs. of  4 variables:
##  $ Mode.Choice.Reason: int  1 1 2 2 1 2 1 1 3 3 ...
##  $ Non.Bus.Reason    : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Bus               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Bus.Choice.Reason : Factor w/ 5 levels "Safety","Comfortable",..: 1 1 2 2 1 2 1 1 3 3 ...
p.bus = ggplot(datBusonly, aes(x = Bus.Choice.Reason, fill = Bus.Choice.Reason))
p.bus = p.bus +  geom_bar(width = 0.8, col = "blue")
p.bus = p.bus + theme(legend.position = "none")
p.bus = p.bus + coord_flip()
p.bus 

# Dữ liệu không sử dụng xe buýt
datNonBusonly <- subset(dat1, Bus == 0)
head(datNonBusonly)
##   Mode.Choice.Reason Non.Bus.Reason Bus
## 1                  4              1   0
## 2                  2              1   0
## 3                  4              2   0
## 4                  2              4   0
## 5                  2              2   0
## 6                  5              2   0
str(datNonBusonly)
## 'data.frame':    703 obs. of  3 variables:
##  $ Mode.Choice.Reason: int  4 2 4 2 2 5 5 2 2 2 ...
##  $ Non.Bus.Reason    : int  1 1 2 4 2 2 4 2 1 4 ...
##  $ Bus               : num  0 0 0 0 0 0 0 0 0 0 ...
attach(datBusonly)
## The following objects are masked from datBusonly (pos = 3):
## 
##     Bus, Bus.Choice.Reason, Mode.Choice.Reason, Non.Bus.Reason
## 
## The following objects are masked from MC:
## 
##     Bus, Mode.Choice.Reason, Non.Bus.Reason
datNonBusonly = within(datNonBusonly, {
  Non.Bus.Reason = factor(Non.Bus.Reason, labels = c("No Route", "Inconvenient", "Unsafety", "Long waiting time", "Unreliability", "others"))
  } )
str(datNonBusonly)
## 'data.frame':    703 obs. of  3 variables:
##  $ Mode.Choice.Reason: int  4 2 4 2 2 5 5 2 2 2 ...
##  $ Non.Bus.Reason    : Factor w/ 6 levels "No Route","Inconvenient",..: 1 1 2 4 2 2 4 2 1 4 ...
##  $ Bus               : num  0 0 0 0 0 0 0 0 0 0 ...
p.nonbus = ggplot(datNonBusonly, aes(x = Non.Bus.Reason, fill = Non.Bus.Reason))
p.nonbus = p.nonbus +  geom_bar(width = 0.8, col = "blue")
p.nonbus = p.nonbus + theme(legend.position = "none")
p.nonbus = p.nonbus + coord_flip()
p.nonbus