1. Read and Create Data
t = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 1-Mode choice\\SO LIEU R\\Bus choice in DN.csv"
MC = read.csv(t, header = T)
head(MC)
## Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency
## 1 6 1 0 3 3
## 2 3 1 0 1 4
## 3 3 1 0 1 4
## 4 3 1 0 1 4
## 5 3 1 0 1 4
## 6 4 1 0 1 4
## Departure.Time Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 1 1 2 10 1 1
## 2 1 8 15 1 1
## 3 3 5 15 1 1
## 4 1 5 10 1 1
## 5 3 8 15 1 1
## 6 1 20 30 1 1
## Temporary.Stop.Number Mode.Choice.Reason Weather Weekend Non.Bus.Reason
## 1 0 4 1 0 1
## 2 1 2 3 0 1
## 3 1 4 1 0 2
## 4 1 2 1 0 4
## 5 0 2 3 0 2
## 6 0 5 3 0 2
## Cost Bus.Stop.Present Gender Age Occupation Income Number.of.Children
## 1 12 1 0 5 4 2 2
## 2 8 2 0 3 6 3 1
## 3 5 1 1 3 2 4 0
## 4 5 1 1 3 2 3 0
## 5 8 2 0 4 6 3 1
## 6 40 2 1 4 3 4 2
## Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1 0 0 0 0 0
## 2 1 0 1 1 0
## 3 1 0 0 1 0
## 4 1 0 1 1 0
## 5 1 0 0 1 0
## 6 1 1 1 1 1
## Number.of.Bicycles Number.of.Motors Number.of.Car
## 1 1 2 1
## 2 1 3 0
## 3 0 3 0
## 4 1 3 0
## 5 0 2 0
## 6 1 2 1
head(MC$Age)
## [1] 5 3 3 3 4 4
str(MC$Travel.Mode)
## int [1:809] 6 3 3 3 3 4 4 3 3 3 ...
# View data (number of rows and colume)
dim(MC)
## [1] 809 30
# Sumerize Data
summary(MC)
## Travel.Mode Bus.Stop.Condition Central.Area Purpose
## Min. :1.000 Min. :0.0000 Min. :0.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.000
## Median :3.000 Median :1.0000 Median :0.000 Median :1.000
## Mean :3.663 Mean :0.5117 Mean :0.471 Mean :1.576
## 3rd Qu.:4.000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:2.000
## Max. :7.000 Max. :1.0000 Max. :1.000 Max. :4.000
##
## Frequency Departure.Time Distance Travel.Period
## Min. :1.000 Min. :1.000 Min. : 0.015 Min. : 0.00
## 1st Qu.:4.000 1st Qu.:1.000 1st Qu.: 3.000 1st Qu.: 10.00
## Median :4.000 Median :1.000 Median : 5.000 Median : 15.00
## Mean :3.555 Mean :1.832 Mean : 6.654 Mean : 17.55
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:10.000 3rd Qu.: 20.00
## Max. :4.000 Max. :4.000 Max. :35.000 Max. :180.00
##
## Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :1.000 Median :1.0000 Median :0.0000
## Mean :0.864 Mean :0.7886 Mean :0.6922
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.000 Max. :1.0000 Max. :3.0000
##
## Mode.Choice.Reason Weather Weekend Non.Bus.Reason
## Min. :1.000 Min. :1.00 Min. :0.0000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:0.0000 1st Qu.:2.000
## Median :2.000 Median :1.00 Median :0.0000 Median :2.000
## Mean :2.714 Mean :1.64 Mean :0.2435 Mean :2.882
## 3rd Qu.:4.000 3rd Qu.:3.00 3rd Qu.:0.0000 3rd Qu.:4.000
## Max. :6.000 Max. :3.00 Max. :1.0000 Max. :6.000
## NA's :106
## Cost Bus.Stop.Present Gender Age
## Min. : 0.000 Min. :0.000 Min. :0.000 Min. :1.000
## 1st Qu.: 3.000 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:3.000
## Median : 5.000 Median :1.000 Median :1.000 Median :4.000
## Mean : 8.754 Mean :1.005 Mean :0.581 Mean :3.677
## 3rd Qu.: 10.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:4.000
## Max. :285.000 Max. :2.000 Max. :1.000 Max. :6.000
##
## Occupation Income Number.of.Children Motor.Certificate
## Min. :1.000 Min. :1.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :3.000 Median :2.000 Median :1.0000 Median :1.0000
## Mean :4.298 Mean :2.206 Mean :0.7651 Mean :0.8838
## 3rd Qu.:7.000 3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :8.000 Max. :4.000 Max. :3.0000 Max. :1.0000
##
## Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.1916 Mean :0.3956 Mean :0.8183 Mean :0.1471
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Number.of.Bicycles Number.of.Motors Number.of.Car
## Min. :0.0000 Min. :0.00 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:2.00 1st Qu.:0.0000
## Median :1.0000 Median :2.00 Median :0.0000
## Mean :0.6873 Mean :2.13 Mean :0.2447
## 3rd Qu.:1.0000 3rd Qu.:3.00 3rd Qu.:0.0000
## Max. :3.0000 Max. :4.00 Max. :2.0000
##
# Create new variable - Bus
MC$Bus[MC$Travel.Mode < 7] <- 0
MC$Bus[MC$Travel.Mode == 7] <- 1
# Đổi biến Age
MC$Age[MC$Age == 2] <- 1
MC$Age[MC$Age == 3] <- 1
MC$Age[MC$Age == 4] <- 2
MC$Age[MC$Age == 5] <- 2
MC$Age[MC$Age == 6] <- 3
# Đổi biến Occupation
MC$Occupation[MC$Occupation == 2] <- 1
MC$Occupation[MC$Occupation == 3] <- 2
MC$Occupation[MC$Occupation == 6] <- 2
MC$Occupation[MC$Occupation == 4] <- 3
MC$Occupation[MC$Occupation == 5] <- 3
MC$Occupation[MC$Occupation == 7] <- 4
MC$Occupation[MC$Occupation == 8] <- 5
# Đổi biến Temporary Stop Number
MC$Temporary.Stop.Number[MC$Temporary.Stop.Number == 3] <- 2
head(MC)
## Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency
## 1 6 1 0 3 3
## 2 3 1 0 1 4
## 3 3 1 0 1 4
## 4 3 1 0 1 4
## 5 3 1 0 1 4
## 6 4 1 0 1 4
## Departure.Time Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 1 1 2 10 1 1
## 2 1 8 15 1 1
## 3 3 5 15 1 1
## 4 1 5 10 1 1
## 5 3 8 15 1 1
## 6 1 20 30 1 1
## Temporary.Stop.Number Mode.Choice.Reason Weather Weekend Non.Bus.Reason
## 1 0 4 1 0 1
## 2 1 2 3 0 1
## 3 1 4 1 0 2
## 4 1 2 1 0 4
## 5 0 2 3 0 2
## 6 0 5 3 0 2
## Cost Bus.Stop.Present Gender Age Occupation Income Number.of.Children
## 1 12 1 0 2 3 2 2
## 2 8 2 0 1 2 3 1
## 3 5 1 1 1 1 4 0
## 4 5 1 1 1 1 3 0
## 5 8 2 0 2 2 3 1
## 6 40 2 1 2 2 4 2
## Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1 0 0 0 0 0
## 2 1 0 1 1 0
## 3 1 0 0 1 0
## 4 1 0 1 1 0
## 5 1 0 0 1 0
## 6 1 1 1 1 1
## Number.of.Bicycles Number.of.Motors Number.of.Car Bus
## 1 1 2 1 0
## 2 1 3 0 0
## 3 0 3 0 0
## 4 1 3 0 0
## 5 0 2 0 0
## 6 1 2 1 0
dim(MC)
## [1] 809 31
str(MC)
## 'data.frame': 809 obs. of 31 variables:
## $ Travel.Mode : int 6 3 3 3 3 4 4 3 3 3 ...
## $ Bus.Stop.Condition : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Central.Area : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose : int 3 1 1 1 1 1 1 1 1 1 ...
## $ Frequency : int 3 4 4 4 4 4 4 4 4 4 ...
## $ Departure.Time : int 1 1 3 1 3 1 1 1 2 1 ...
## $ Distance : num 2 8 5 5 8 20 15 10 12 10 ...
## $ Travel.Period : num 10 15 15 10 15 30 20 25 30 25 ...
## $ Sidewalk.Clearance : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Lane.Separate : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Temporary.Stop.Number: num 0 1 1 1 0 0 0 0 1 1 ...
## $ Mode.Choice.Reason : int 4 2 4 2 2 5 5 2 2 2 ...
## $ Weather : int 1 3 1 1 3 3 3 3 1 3 ...
## $ Weekend : int 0 0 0 0 0 0 0 1 0 0 ...
## $ Non.Bus.Reason : int 1 1 2 4 2 2 4 2 1 4 ...
## $ Cost : int 12 8 5 5 8 40 30 10 12 10 ...
## $ Bus.Stop.Present : int 1 2 1 1 2 2 2 1 1 1 ...
## $ Gender : int 0 0 1 1 0 1 1 0 0 1 ...
## $ Age : num 2 1 1 1 2 2 2 2 1 2 ...
## $ Occupation : num 3 2 1 1 2 2 2 4 4 2 ...
## $ Income : int 2 3 4 3 3 4 4 2 3 3 ...
## $ Number.of.Children : int 2 1 0 0 1 2 2 0 1 0 ...
## $ Motor.Certificate : int 0 1 1 1 1 1 1 1 1 1 ...
## $ Car.Certificate : int 0 0 0 0 0 1 1 0 0 0 ...
## $ Bicycle.Owning : int 0 1 0 1 0 1 0 0 1 0 ...
## $ Motor.Owning : int 0 1 1 1 1 1 1 1 1 1 ...
## $ Car.Owning : int 0 0 0 0 0 1 1 0 0 0 ...
## $ Number.of.Bicycles : int 1 1 0 1 0 1 0 0 1 0 ...
## $ Number.of.Motors : int 2 3 3 3 2 2 2 2 3 3 ...
## $ Number.of.Car : int 1 0 0 0 0 1 1 0 0 0 ...
## $ Bus : num 0 0 0 0 0 0 0 0 0 0 ...
# reformat variables in factor variables
attach(MC)
MC = within(MC, {
Travel.Mode = factor(Travel.Mode, labels = c("Walk", "Bicycle", "Motorbike", "Car", "Hichhiking", "App-based Motor/Car", "Bus"))
Bus.Stop.Condition = factor(Bus.Stop.Condition,labels = c("No", "Yes"))
Central.Area = factor(Central.Area, labels = c("No", "Yes"))
Purpose = factor(Purpose, labels = c("Work/Study", "Picking Children", "Entertainment", "Others"))
Frequency = factor(Frequency, labels = c("Once", "2 times", "3 times", "> 3 times"))
Departure.Time = factor(Departure.Time, labels = c("Morning", "Afternoon", "Evening", "Others"))
Sidewalk.Clearance = factor(Sidewalk.Clearance, labels = c("No", "Yes"))
Lane.Separate = factor(Lane.Separate, labels = c("No", "Yes"))
Temporary.Stop.Number = factor(Temporary.Stop.Number, labels = c("None", "1 stop", ">=2 stops"))
Mode.Choice.Reason = factor(Mode.Choice.Reason, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "Reliability", "others"))
Weather = factor(Weather, labels = c("Sunny", "Rainny", "Cool"))
Weekend = factor(Weekend, labels = c("No", "Yes"))
Non.Bus.Reason = factor(Non.Bus.Reason, labels = c("No Route", "Uncomfortable", "Unsafety", "Long waiting time", "Unreliability", "others"))
Bus.Stop.Present = factor(Bus.Stop.Present, labels = c("No", "Yes", "Don't know"))
Gender = factor(Gender, labels = c("Female", "Male"))
Age = factor(Age, labels = c("15-24", "25-60", ">60"))
Occupation = factor(Occupation, labels = c("Pupils/Students", "Officers/Workers", "Housewife/Unemployed", "Free labor", "Others"))
Number.of.Children = factor(Number.of.Children, labels = c("None", "1 child", "2 children", ">= 3 children"))
Motor.Certificate = factor(Motor.Certificate, labels = c("No", "Yes"))
Car.Certificate = factor(Car.Certificate, labels = c("No", "Yes"))
Bicycle.Owning = factor(Bicycle.Owning, labels = c("No", "Yes"))
Motor.Owning = factor(Motor.Owning, labels = c("No", "Yes"))
Car.Owning = factor(Car.Owning, labels = c("No", "Yes"))
Number.of.Bicycles = factor(Number.of.Bicycles, labels = c("None", "1", "2", ">=3"))
Number.of.Motors = factor(Number.of.Motors, labels = c("None", "1", "2", "3", ">3"))
Number.of.Car = factor(Number.of.Car, labels = c("None", "1", ">=2"))
Income = factor(Income, labels = c("<8 millions", "(8-15) millions", "(15-25) millions", ">25 millions"))
Bus = factor(Bus, labels = c("No", "Yes"))
Distance = as.numeric(Distance)
Travel.Period = as.numeric(Travel.Period)
Cost = as.numeric(Cost)
} )
head(MC)
## Travel.Mode Bus.Stop.Condition Central.Area Purpose
## 1 App-based Motor/Car Yes No Entertainment
## 2 Motorbike Yes No Work/Study
## 3 Motorbike Yes No Work/Study
## 4 Motorbike Yes No Work/Study
## 5 Motorbike Yes No Work/Study
## 6 Car Yes No Work/Study
## Frequency Departure.Time Distance Travel.Period Sidewalk.Clearance
## 1 3 times Morning 2 10 Yes
## 2 > 3 times Morning 8 15 Yes
## 3 > 3 times Evening 5 15 Yes
## 4 > 3 times Morning 5 10 Yes
## 5 > 3 times Evening 8 15 Yes
## 6 > 3 times Morning 20 30 Yes
## Lane.Separate Temporary.Stop.Number Mode.Choice.Reason Weather Weekend
## 1 Yes None Accessibility Sunny No
## 2 Yes 1 stop Comfortable Cool No
## 3 Yes 1 stop Accessibility Sunny No
## 4 Yes 1 stop Comfortable Sunny No
## 5 Yes None Comfortable Cool No
## 6 Yes None Reliability Cool No
## Non.Bus.Reason Cost Bus.Stop.Present Gender Age
## 1 No Route 12 Yes Female 25-60
## 2 No Route 8 Don't know Female 15-24
## 3 Uncomfortable 5 Yes Male 15-24
## 4 Long waiting time 5 Yes Male 15-24
## 5 Uncomfortable 8 Don't know Female 25-60
## 6 Uncomfortable 40 Don't know Male 25-60
## Occupation Income Number.of.Children
## 1 Housewife/Unemployed (8-15) millions 2 children
## 2 Officers/Workers (15-25) millions 1 child
## 3 Pupils/Students >25 millions None
## 4 Pupils/Students (15-25) millions None
## 5 Officers/Workers (15-25) millions 1 child
## 6 Officers/Workers >25 millions 2 children
## Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1 No No No No No
## 2 Yes No Yes Yes No
## 3 Yes No No Yes No
## 4 Yes No Yes Yes No
## 5 Yes No No Yes No
## 6 Yes Yes Yes Yes Yes
## Number.of.Bicycles Number.of.Motors Number.of.Car Bus
## 1 1 2 1 No
## 2 1 3 None No
## 3 None 3 None No
## 4 1 3 None No
## 5 None 2 None No
## 6 1 2 1 No
str(MC)
## 'data.frame': 809 obs. of 31 variables:
## $ Travel.Mode : Factor w/ 7 levels "Walk","Bicycle",..: 6 3 3 3 3 4 4 3 3 3 ...
## $ Bus.Stop.Condition : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Central.Area : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Purpose : Factor w/ 4 levels "Work/Study","Picking Children",..: 3 1 1 1 1 1 1 1 1 1 ...
## $ Frequency : Factor w/ 4 levels "Once","2 times",..: 3 4 4 4 4 4 4 4 4 4 ...
## $ Departure.Time : Factor w/ 4 levels "Morning","Afternoon",..: 1 1 3 1 3 1 1 1 2 1 ...
## $ Distance : num 2 8 5 5 8 20 15 10 12 10 ...
## $ Travel.Period : num 10 15 15 10 15 30 20 25 30 25 ...
## $ Sidewalk.Clearance : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Lane.Separate : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Temporary.Stop.Number: Factor w/ 3 levels "None","1 stop",..: 1 2 2 2 1 1 1 1 2 2 ...
## $ Mode.Choice.Reason : Factor w/ 6 levels "Safety","Comfortable",..: 4 2 4 2 2 5 5 2 2 2 ...
## $ Weather : Factor w/ 3 levels "Sunny","Rainny",..: 1 3 1 1 3 3 3 3 1 3 ...
## $ Weekend : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
## $ Non.Bus.Reason : Factor w/ 6 levels "No Route","Uncomfortable",..: 1 1 2 4 2 2 4 2 1 4 ...
## $ Cost : num 12 8 5 5 8 40 30 10 12 10 ...
## $ Bus.Stop.Present : Factor w/ 3 levels "No","Yes","Don't know": 2 3 2 2 3 3 3 2 2 2 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 2 2 1 2 2 1 1 2 ...
## $ Age : Factor w/ 3 levels "15-24","25-60",..: 2 1 1 1 2 2 2 2 1 2 ...
## $ Occupation : Factor w/ 5 levels "Pupils/Students",..: 3 2 1 1 2 2 2 4 4 2 ...
## $ Income : Factor w/ 4 levels "<8 millions",..: 2 3 4 3 3 4 4 2 3 3 ...
## $ Number.of.Children : Factor w/ 4 levels "None","1 child",..: 3 2 1 1 2 3 3 1 2 1 ...
## $ Motor.Certificate : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ Car.Certificate : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
## $ Bicycle.Owning : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 2 1 1 2 1 ...
## $ Motor.Owning : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ Car.Owning : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
## $ Number.of.Bicycles : Factor w/ 4 levels "None","1","2",..: 2 2 1 2 1 2 1 1 2 1 ...
## $ Number.of.Motors : Factor w/ 5 levels "None","1","2",..: 3 4 4 4 3 3 3 3 4 4 ...
## $ Number.of.Car : Factor w/ 3 levels "None","1",">=2": 2 1 1 1 1 2 2 1 1 1 ...
## $ Bus : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
# Create new data - datBus (Bus và tất cả các loại phương tiện còn lại)
datBus <- MC[, c(2:31)]
head(datBus)
## Bus.Stop.Condition Central.Area Purpose Frequency Departure.Time
## 1 Yes No Entertainment 3 times Morning
## 2 Yes No Work/Study > 3 times Morning
## 3 Yes No Work/Study > 3 times Evening
## 4 Yes No Work/Study > 3 times Morning
## 5 Yes No Work/Study > 3 times Evening
## 6 Yes No Work/Study > 3 times Morning
## Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 1 2 10 Yes Yes
## 2 8 15 Yes Yes
## 3 5 15 Yes Yes
## 4 5 10 Yes Yes
## 5 8 15 Yes Yes
## 6 20 30 Yes Yes
## Temporary.Stop.Number Mode.Choice.Reason Weather Weekend
## 1 None Accessibility Sunny No
## 2 1 stop Comfortable Cool No
## 3 1 stop Accessibility Sunny No
## 4 1 stop Comfortable Sunny No
## 5 None Comfortable Cool No
## 6 None Reliability Cool No
## Non.Bus.Reason Cost Bus.Stop.Present Gender Age
## 1 No Route 12 Yes Female 25-60
## 2 No Route 8 Don't know Female 15-24
## 3 Uncomfortable 5 Yes Male 15-24
## 4 Long waiting time 5 Yes Male 15-24
## 5 Uncomfortable 8 Don't know Female 25-60
## 6 Uncomfortable 40 Don't know Male 25-60
## Occupation Income Number.of.Children
## 1 Housewife/Unemployed (8-15) millions 2 children
## 2 Officers/Workers (15-25) millions 1 child
## 3 Pupils/Students >25 millions None
## 4 Pupils/Students (15-25) millions None
## 5 Officers/Workers (15-25) millions 1 child
## 6 Officers/Workers >25 millions 2 children
## Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1 No No No No No
## 2 Yes No Yes Yes No
## 3 Yes No No Yes No
## 4 Yes No Yes Yes No
## 5 Yes No No Yes No
## 6 Yes Yes Yes Yes Yes
## Number.of.Bicycles Number.of.Motors Number.of.Car Bus
## 1 1 2 1 No
## 2 1 3 None No
## 3 None 3 None No
## 4 1 3 None No
## 5 None 2 None No
## 6 1 2 1 No
str(datBus)
## 'data.frame': 809 obs. of 30 variables:
## $ Bus.Stop.Condition : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Central.Area : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Purpose : Factor w/ 4 levels "Work/Study","Picking Children",..: 3 1 1 1 1 1 1 1 1 1 ...
## $ Frequency : Factor w/ 4 levels "Once","2 times",..: 3 4 4 4 4 4 4 4 4 4 ...
## $ Departure.Time : Factor w/ 4 levels "Morning","Afternoon",..: 1 1 3 1 3 1 1 1 2 1 ...
## $ Distance : num 2 8 5 5 8 20 15 10 12 10 ...
## $ Travel.Period : num 10 15 15 10 15 30 20 25 30 25 ...
## $ Sidewalk.Clearance : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Lane.Separate : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Temporary.Stop.Number: Factor w/ 3 levels "None","1 stop",..: 1 2 2 2 1 1 1 1 2 2 ...
## $ Mode.Choice.Reason : Factor w/ 6 levels "Safety","Comfortable",..: 4 2 4 2 2 5 5 2 2 2 ...
## $ Weather : Factor w/ 3 levels "Sunny","Rainny",..: 1 3 1 1 3 3 3 3 1 3 ...
## $ Weekend : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
## $ Non.Bus.Reason : Factor w/ 6 levels "No Route","Uncomfortable",..: 1 1 2 4 2 2 4 2 1 4 ...
## $ Cost : num 12 8 5 5 8 40 30 10 12 10 ...
## $ Bus.Stop.Present : Factor w/ 3 levels "No","Yes","Don't know": 2 3 2 2 3 3 3 2 2 2 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 2 2 1 2 2 1 1 2 ...
## $ Age : Factor w/ 3 levels "15-24","25-60",..: 2 1 1 1 2 2 2 2 1 2 ...
## $ Occupation : Factor w/ 5 levels "Pupils/Students",..: 3 2 1 1 2 2 2 4 4 2 ...
## $ Income : Factor w/ 4 levels "<8 millions",..: 2 3 4 3 3 4 4 2 3 3 ...
## $ Number.of.Children : Factor w/ 4 levels "None","1 child",..: 3 2 1 1 2 3 3 1 2 1 ...
## $ Motor.Certificate : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ Car.Certificate : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
## $ Bicycle.Owning : Factor w/ 2 levels "No","Yes": 1 2 1 2 1 2 1 1 2 1 ...
## $ Motor.Owning : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ Car.Owning : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 2 1 1 1 ...
## $ Number.of.Bicycles : Factor w/ 4 levels "None","1","2",..: 2 2 1 2 1 2 1 1 2 1 ...
## $ Number.of.Motors : Factor w/ 5 levels "None","1","2",..: 3 4 4 4 3 3 3 3 4 4 ...
## $ Number.of.Car : Factor w/ 3 levels "None","1",">=2": 2 1 1 1 1 2 2 1 1 1 ...
## $ Bus : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
dim(datBus)
## [1] 809 30
2. Descriptive analyse
# Descriptive analyse with package "table 1" - sort by Bus variable
## install.packages("table1", dependencies = T)
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1 (~ Bus.Stop.Condition + Central.Area + Purpose + Frequency + Departure.Time + Sidewalk.Clearance + Lane.Separate + Temporary.Stop.Number + Mode.Choice.Reason + Weather + Weekend + Non.Bus.Reason + Bus.Stop.Present + Gender + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Bicycle.Owning + Motor.Owning + Car.Owning + Number.of.Bicycles + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost| Bus , data = datBus)
| No (n=703) |
Yes (n=106) |
Overall (n=809) |
|
|---|---|---|---|
| Bus.Stop.Condition | |||
| No | 334 (47.5%) | 61 (57.5%) | 395 (48.8%) |
| Yes | 369 (52.5%) | 45 (42.5%) | 414 (51.2%) |
| Central.Area | |||
| No | 387 (55.0%) | 41 (38.7%) | 428 (52.9%) |
| Yes | 316 (45.0%) | 65 (61.3%) | 381 (47.1%) |
| Purpose | |||
| Work/Study | 463 (65.9%) | 77 (72.6%) | 540 (66.7%) |
| Picking Children | 85 (12.1%) | 9 (8.5%) | 94 (11.6%) |
| Entertainment | 140 (19.9%) | 13 (12.3%) | 153 (18.9%) |
| Others | 15 (2.1%) | 7 (6.6%) | 22 (2.7%) |
| Frequency | |||
| Once | 42 (6.0%) | 13 (12.3%) | 55 (6.8%) |
| 2 times | 44 (6.3%) | 18 (17.0%) | 62 (7.7%) |
| 3 times | 52 (7.4%) | 19 (17.9%) | 71 (8.8%) |
| > 3 times | 565 (80.4%) | 56 (52.8%) | 621 (76.8%) |
| Departure.Time | |||
| Morning | 425 (60.5%) | 71 (67.0%) | 496 (61.3%) |
| Afternoon | 55 (7.8%) | 11 (10.4%) | 66 (8.2%) |
| Evening | 117 (16.6%) | 17 (16.0%) | 134 (16.6%) |
| Others | 106 (15.1%) | 7 (6.6%) | 113 (14.0%) |
| Sidewalk.Clearance | |||
| No | 95 (13.5%) | 15 (14.2%) | 110 (13.6%) |
| Yes | 608 (86.5%) | 91 (85.8%) | 699 (86.4%) |
| Lane.Separate | |||
| No | 151 (21.5%) | 20 (18.9%) | 171 (21.1%) |
| Yes | 552 (78.5%) | 86 (81.1%) | 638 (78.9%) |
| Temporary.Stop.Number | |||
| None | 349 (49.6%) | 77 (72.6%) | 426 (52.7%) |
| 1 stop | 255 (36.3%) | 10 (9.4%) | 265 (32.8%) |
| >=2 stops | 99 (14.1%) | 19 (17.9%) | 118 (14.6%) |
| Mode.Choice.Reason | |||
| Safety | 84 (11.9%) | 31 (29.2%) | 115 (14.2%) |
| Comfortable | 324 (46.1%) | 39 (36.8%) | 363 (44.9%) |
| Low price | 40 (5.7%) | 33 (31.1%) | 73 (9.0%) |
| Accessibility | 177 (25.2%) | 1 (0.9%) | 178 (22.0%) |
| Reliability | 56 (8.0%) | 0 (0%) | 56 (6.9%) |
| others | 22 (3.1%) | 2 (1.9%) | 24 (3.0%) |
| Weather | |||
| Sunny | 469 (66.7%) | 72 (67.9%) | 541 (66.9%) |
| Rainny | 10 (1.4%) | 8 (7.5%) | 18 (2.2%) |
| Cool | 224 (31.9%) | 26 (24.5%) | 250 (30.9%) |
| Weekend | |||
| No | 528 (75.1%) | 84 (79.2%) | 612 (75.6%) |
| Yes | 175 (24.9%) | 22 (20.8%) | 197 (24.4%) |
| Non.Bus.Reason | |||
| No Route | 159 (22.6%) | 0 (0%) | 159 (19.7%) |
| Uncomfortable | 218 (31.0%) | 0 (0%) | 218 (26.9%) |
| Unsafety | 18 (2.6%) | 0 (0%) | 18 (2.2%) |
| Long waiting time | 204 (29.0%) | 0 (0%) | 204 (25.2%) |
| Unreliability | 63 (9.0%) | 0 (0%) | 63 (7.8%) |
| others | 41 (5.8%) | 0 (0%) | 41 (5.1%) |
| Missing | 0 (0%) | 106 (100%) | 106 (13.1%) |
| Bus.Stop.Present | |||
| No | 64 (9.1%) | 17 (16.0%) | 81 (10.0%) |
| Yes | 559 (79.5%) | 84 (79.2%) | 643 (79.5%) |
| Don't know | 80 (11.4%) | 5 (4.7%) | 85 (10.5%) |
| Gender | |||
| Female | 297 (42.2%) | 42 (39.6%) | 339 (41.9%) |
| Male | 406 (57.8%) | 64 (60.4%) | 470 (58.1%) |
| Age | |||
| 15-24 | 296 (42.1%) | 60 (56.6%) | 356 (44.0%) |
| 25-60 | 389 (55.3%) | 26 (24.5%) | 415 (51.3%) |
| >60 | 18 (2.6%) | 20 (18.9%) | 38 (4.7%) |
| Occupation | |||
| Pupils/Students | 234 (33.3%) | 50 (47.2%) | 284 (35.1%) |
| Officers/Workers | 204 (29.0%) | 14 (13.2%) | 218 (26.9%) |
| Housewife/Unemployed | 42 (6.0%) | 12 (11.3%) | 54 (6.7%) |
| Free labor | 172 (24.5%) | 12 (11.3%) | 184 (22.7%) |
| Others | 51 (7.3%) | 18 (17.0%) | 69 (8.5%) |
| Income | |||
| <8 millions | 151 (21.5%) | 48 (45.3%) | 199 (24.6%) |
| (8-15) millions | 297 (42.2%) | 29 (27.4%) | 326 (40.3%) |
| (15-25) millions | 176 (25.0%) | 26 (24.5%) | 202 (25.0%) |
| >25 millions | 79 (11.2%) | 3 (2.8%) | 82 (10.1%) |
| Number.of.Children | |||
| None | 295 (42.0%) | 63 (59.4%) | 358 (44.3%) |
| 1 child | 285 (40.5%) | 23 (21.7%) | 308 (38.1%) |
| 2 children | 100 (14.2%) | 18 (17.0%) | 118 (14.6%) |
| >= 3 children | 23 (3.3%) | 2 (1.9%) | 25 (3.1%) |
| Motor.Certificate | |||
| No | 59 (8.4%) | 35 (33.0%) | 94 (11.6%) |
| Yes | 644 (91.6%) | 71 (67.0%) | 715 (88.4%) |
| Car.Certificate | |||
| No | 554 (78.8%) | 100 (94.3%) | 654 (80.8%) |
| Yes | 149 (21.2%) | 6 (5.7%) | 155 (19.2%) |
| Bicycle.Owning | |||
| No | 429 (61.0%) | 60 (56.6%) | 489 (60.4%) |
| Yes | 274 (39.0%) | 46 (43.4%) | 320 (39.6%) |
| Motor.Owning | |||
| No | 99 (14.1%) | 48 (45.3%) | 147 (18.2%) |
| Yes | 604 (85.9%) | 58 (54.7%) | 662 (81.8%) |
| Car.Owning | |||
| No | 588 (83.6%) | 102 (96.2%) | 690 (85.3%) |
| Yes | 115 (16.4%) | 4 (3.8%) | 119 (14.7%) |
| Number.of.Bicycles | |||
| None | 326 (46.4%) | 38 (35.8%) | 364 (45.0%) |
| 1 | 287 (40.8%) | 58 (54.7%) | 345 (42.6%) |
| 2 | 80 (11.4%) | 9 (8.5%) | 89 (11.0%) |
| >=3 | 10 (1.4%) | 1 (0.9%) | 11 (1.4%) |
| Number.of.Motors | |||
| None | 19 (2.7%) | 13 (12.3%) | 32 (4.0%) |
| 1 | 102 (14.5%) | 26 (24.5%) | 128 (15.8%) |
| 2 | 356 (50.6%) | 48 (45.3%) | 404 (49.9%) |
| 3 | 177 (25.2%) | 16 (15.1%) | 193 (23.9%) |
| >3 | 49 (7.0%) | 3 (2.8%) | 52 (6.4%) |
| Number.of.Car | |||
| None | 533 (75.8%) | 97 (91.5%) | 630 (77.9%) |
| 1 | 152 (21.6%) | 8 (7.5%) | 160 (19.8%) |
| >=2 | 18 (2.6%) | 1 (0.9%) | 19 (2.3%) |
| Distance | |||
| Mean (SD) | 6.36 (5.52) | 8.59 (4.77) | 6.65 (5.48) |
| Median [Min, Max] | 5.00 [0.0150, 35.0] | 8.00 [0.500, 25.0] | 5.00 [0.0150, 35.0] |
| Travel.Period | |||
| Mean (SD) | 16.9 (15.6) | 21.6 (12.5) | 17.5 (15.3) |
| Median [Min, Max] | 15.0 [0.00, 180] | 20.0 [3.00, 60.0] | 15.0 [0.00, 180] |
| Cost | |||
| Mean (SD) | 9.28 (17.2) | 5.24 (1.07) | 8.75 (16.1) |
| Median [Min, Max] | 5.00 [0.00, 285] | 5.00 [5.00, 10.0] | 5.00 [0.00, 285] |
# Descriptive analyse with package "compareGroups" - sort by Bus variable (hon table1 la cung cap them tri so p. Doi voi bien phan loai, tri so p cung duoc tinh theo Chi-Square)
library(compareGroups)
## Loading required package: SNPassoc
## Loading required package: haplo.stats
## Loading required package: survival
## Loading required package: mvtnorm
## Loading required package: parallel
## Registered S3 method overwritten by 'SNPassoc':
## method from
## summary.haplo.glm haplo.stats
t = compareGroups(Bus ~ Bus.Stop.Condition + Central.Area + Purpose + Frequency + Departure.Time + Sidewalk.Clearance + Lane.Separate + Temporary.Stop.Number + Mode.Choice.Reason + Weather + Weekend + Non.Bus.Reason + Bus.Stop.Present + Gender + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Bicycle.Owning + Motor.Owning + Car.Owning + Number.of.Bicycles + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, data = datBus)
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## [1] "Error in binom.test(nn[i, j], n.ij, conf.level = conf.level) : \n 'n' must be a positive integer >= 'x'\n"
## attr(,"class")
## [1] "try-error"
## attr(,"condition")
## <simpleError in binom.test(nn[i, j], n.ij, conf.level = conf.level): 'n' must be a positive integer >= 'x'>
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in compareGroups.fit(X = X, y = y, include.label = include.label, :
## Variables 'Non.Bus.Reason' have been removed since some errors occurred
createTable(t)
##
## --------Summary descriptives table by 'Bus'---------
##
## __________________________________________________________
## No Yes p.overall
## N=703 N=106
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Bus.Stop.Condition: 0.068
## No 334 (47.5%) 61 (57.5%)
## Yes 369 (52.5%) 45 (42.5%)
## Central.Area: 0.002
## No 387 (55.0%) 41 (38.7%)
## Yes 316 (45.0%) 65 (61.3%)
## Purpose: 0.014
## Work/Study 463 (65.9%) 77 (72.6%)
## Picking Children 85 (12.1%) 9 (8.49%)
## Entertainment 140 (19.9%) 13 (12.3%)
## Others 15 (2.13%) 7 (6.60%)
## Frequency: <0.001
## Once 42 (5.97%) 13 (12.3%)
## 2 times 44 (6.26%) 18 (17.0%)
## 3 times 52 (7.40%) 19 (17.9%)
## > 3 times 565 (80.4%) 56 (52.8%)
## Departure.Time: 0.105
## Morning 425 (60.5%) 71 (67.0%)
## Afternoon 55 (7.82%) 11 (10.4%)
## Evening 117 (16.6%) 17 (16.0%)
## Others 106 (15.1%) 7 (6.60%)
## Sidewalk.Clearance: 0.979
## No 95 (13.5%) 15 (14.2%)
## Yes 608 (86.5%) 91 (85.8%)
## Lane.Separate: 0.627
## No 151 (21.5%) 20 (18.9%)
## Yes 552 (78.5%) 86 (81.1%)
## Temporary.Stop.Number: <0.001
## None 349 (49.6%) 77 (72.6%)
## 1 stop 255 (36.3%) 10 (9.43%)
## >=2 stops 99 (14.1%) 19 (17.9%)
## Mode.Choice.Reason: .
## Safety 84 (11.9%) 31 (29.2%)
## Comfortable 324 (46.1%) 39 (36.8%)
## Low price 40 (5.69%) 33 (31.1%)
## Accessibility 177 (25.2%) 1 (0.94%)
## Reliability 56 (7.97%) 0 (0.00%)
## others 22 (3.13%) 2 (1.89%)
## Weather: 0.001
## Sunny 469 (66.7%) 72 (67.9%)
## Rainny 10 (1.42%) 8 (7.55%)
## Cool 224 (31.9%) 26 (24.5%)
## Weekend: 0.421
## No 528 (75.1%) 84 (79.2%)
## Yes 175 (24.9%) 22 (20.8%)
## Bus.Stop.Present: 0.016
## No 64 (9.10%) 17 (16.0%)
## Yes 559 (79.5%) 84 (79.2%)
## Don't know 80 (11.4%) 5 (4.72%)
## Gender: 0.685
## Female 297 (42.2%) 42 (39.6%)
## Male 406 (57.8%) 64 (60.4%)
## Age: <0.001
## 15-24 296 (42.1%) 60 (56.6%)
## 25-60 389 (55.3%) 26 (24.5%)
## >60 18 (2.56%) 20 (18.9%)
## Occupation: <0.001
## Pupils/Students 234 (33.3%) 50 (47.2%)
## Officers/Workers 204 (29.0%) 14 (13.2%)
## Housewife/Unemployed 42 (5.97%) 12 (11.3%)
## Free labor 172 (24.5%) 12 (11.3%)
## Others 51 (7.25%) 18 (17.0%)
## Income: <0.001
## <8 millions 151 (21.5%) 48 (45.3%)
## (8-15) millions 297 (42.2%) 29 (27.4%)
## (15-25) millions 176 (25.0%) 26 (24.5%)
## >25 millions 79 (11.2%) 3 (2.83%)
## Number.of.Children: 0.001
## None 295 (42.0%) 63 (59.4%)
## 1 child 285 (40.5%) 23 (21.7%)
## 2 children 100 (14.2%) 18 (17.0%)
## >= 3 children 23 (3.27%) 2 (1.89%)
## Motor.Certificate: <0.001
## No 59 (8.39%) 35 (33.0%)
## Yes 644 (91.6%) 71 (67.0%)
## Car.Certificate: <0.001
## No 554 (78.8%) 100 (94.3%)
## Yes 149 (21.2%) 6 (5.66%)
## Bicycle.Owning: 0.447
## No 429 (61.0%) 60 (56.6%)
## Yes 274 (39.0%) 46 (43.4%)
## Motor.Owning: <0.001
## No 99 (14.1%) 48 (45.3%)
## Yes 604 (85.9%) 58 (54.7%)
## Car.Owning: 0.001
## No 588 (83.6%) 102 (96.2%)
## Yes 115 (16.4%) 4 (3.77%)
## Number.of.Bicycles: 0.065
## None 326 (46.4%) 38 (35.8%)
## 1 287 (40.8%) 58 (54.7%)
## 2 80 (11.4%) 9 (8.49%)
## >=3 10 (1.42%) 1 (0.94%)
## Number.of.Motors: <0.001
## None 19 (2.70%) 13 (12.3%)
## 1 102 (14.5%) 26 (24.5%)
## 2 356 (50.6%) 48 (45.3%)
## 3 177 (25.2%) 16 (15.1%)
## >3 49 (6.97%) 3 (2.83%)
## Number.of.Car: 0.001
## None 533 (75.8%) 97 (91.5%)
## 1 152 (21.6%) 8 (7.55%)
## >=2 18 (2.56%) 1 (0.94%)
## Distance 6.36 (5.52) 8.59 (4.77) <0.001
## Travel.Period 16.9 (15.6) 21.6 (12.5) 0.001
## Cost 9.28 (17.2) 5.24 (1.07) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Remove variables with p-value > 0.05: Bus.Stop.Condition, Departure.Time, Sidewalk.Clearance, Lane.Separate, Mode.Choice.Reason, Non.Bus.Reason, Weekend, Gender, Bicycle.Owning, Number.of.Bicycles
t1 = compareGroups(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, data = datBus)
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation
## may be incorrect
createTable(t1)
##
## --------Summary descriptives table by 'Bus'---------
##
## __________________________________________________________
## No Yes p.overall
## N=703 N=106
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Central.Area: 0.002
## No 387 (55.0%) 41 (38.7%)
## Yes 316 (45.0%) 65 (61.3%)
## Purpose: 0.014
## Work/Study 463 (65.9%) 77 (72.6%)
## Picking Children 85 (12.1%) 9 (8.49%)
## Entertainment 140 (19.9%) 13 (12.3%)
## Others 15 (2.13%) 7 (6.60%)
## Frequency: <0.001
## Once 42 (5.97%) 13 (12.3%)
## 2 times 44 (6.26%) 18 (17.0%)
## 3 times 52 (7.40%) 19 (17.9%)
## > 3 times 565 (80.4%) 56 (52.8%)
## Temporary.Stop.Number: <0.001
## None 349 (49.6%) 77 (72.6%)
## 1 stop 255 (36.3%) 10 (9.43%)
## >=2 stops 99 (14.1%) 19 (17.9%)
## Weather: 0.001
## Sunny 469 (66.7%) 72 (67.9%)
## Rainny 10 (1.42%) 8 (7.55%)
## Cool 224 (31.9%) 26 (24.5%)
## Bus.Stop.Present: 0.016
## No 64 (9.10%) 17 (16.0%)
## Yes 559 (79.5%) 84 (79.2%)
## Don't know 80 (11.4%) 5 (4.72%)
## Age: <0.001
## 15-24 296 (42.1%) 60 (56.6%)
## 25-60 389 (55.3%) 26 (24.5%)
## >60 18 (2.56%) 20 (18.9%)
## Occupation: <0.001
## Pupils/Students 234 (33.3%) 50 (47.2%)
## Officers/Workers 204 (29.0%) 14 (13.2%)
## Housewife/Unemployed 42 (5.97%) 12 (11.3%)
## Free labor 172 (24.5%) 12 (11.3%)
## Others 51 (7.25%) 18 (17.0%)
## Income: <0.001
## <8 millions 151 (21.5%) 48 (45.3%)
## (8-15) millions 297 (42.2%) 29 (27.4%)
## (15-25) millions 176 (25.0%) 26 (24.5%)
## >25 millions 79 (11.2%) 3 (2.83%)
## Number.of.Children: 0.001
## None 295 (42.0%) 63 (59.4%)
## 1 child 285 (40.5%) 23 (21.7%)
## 2 children 100 (14.2%) 18 (17.0%)
## >= 3 children 23 (3.27%) 2 (1.89%)
## Motor.Certificate: <0.001
## No 59 (8.39%) 35 (33.0%)
## Yes 644 (91.6%) 71 (67.0%)
## Car.Certificate: <0.001
## No 554 (78.8%) 100 (94.3%)
## Yes 149 (21.2%) 6 (5.66%)
## Motor.Owning: <0.001
## No 99 (14.1%) 48 (45.3%)
## Yes 604 (85.9%) 58 (54.7%)
## Car.Owning: 0.001
## No 588 (83.6%) 102 (96.2%)
## Yes 115 (16.4%) 4 (3.77%)
## Number.of.Motors: <0.001
## None 19 (2.70%) 13 (12.3%)
## 1 102 (14.5%) 26 (24.5%)
## 2 356 (50.6%) 48 (45.3%)
## 3 177 (25.2%) 16 (15.1%)
## >3 49 (6.97%) 3 (2.83%)
## Number.of.Car: 0.001
## None 533 (75.8%) 97 (91.5%)
## 1 152 (21.6%) 8 (7.55%)
## >=2 18 (2.56%) 1 (0.94%)
## Distance 6.36 (5.52) 8.59 (4.77) <0.001
## Travel.Period 16.9 (15.6) 21.6 (12.5) 0.001
## Cost 9.28 (17.2) 5.24 (1.07) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
3. Descriptive analyse by graphs
library(magrittr)
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
## x tidyr::extract() masks magrittr::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(car)
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
datBus %>%
group_by(Bus.Stop.Condition, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Bus.Stop.Condition)) +
geom_col(position = "fill") +
xlab("Bus") +
ylab("Bus Stop Condition") +
ggtitle("Proportion of Bus Choice ~ Bus Stop Condition")
datBus %>%
group_by(Central.Area, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Central.Area)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Central Area") +
ggtitle("Proportion of Bus Choice ~ Central Area")
datBus %>%
group_by(Purpose, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Purpose)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Purpose of travelling") +
ggtitle("Proportion of Bus Choice ~ Purpose of Travelling")
datBus %>%
group_by(Frequency, Purpose) %>%
count() %>%
ggplot(aes(Purpose, n, fill = Frequency)) +
geom_col(position = "fill") +
xlab("Purpose") +
ylab("Frequency") +
ggtitle("Proportion of Purpose ~ Frequency")
datBus %>%
group_by(Frequency, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Frequency)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Frequency of Travelling") +
ggtitle("Proportion of Bus Choice ~ Frequency of Travelling")
datBus %>%
group_by(Departure.Time, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Departure.Time)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Departure time of travel") +
ggtitle("Proportion of Bus Choice ~ Departure Time of Travel")
datBus %>%
group_by(Sidewalk.Clearance, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Sidewalk.Clearance)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Sidewalk Clearance of Roads") +
ggtitle("Proportion of Bus Choice ~ Sidewalk Clearance of Roads")
datBus %>%
group_by(Lane.Separate, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Lane.Separate)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Lane Separate") +
ggtitle("Proportion of Bus Choice ~ Lane Separate of Roads")
datBus %>%
group_by(Temporary.Stop.Number, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Temporary.Stop.Number)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("The number of temporary stops") +
ggtitle("Proportion of Bus Choice ~ The number of temporary stops")
datBus %>%
group_by(Mode.Choice.Reason, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Mode.Choice.Reason)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("The reason of motor mode choice") +
ggtitle("Proportion of Bus Choice ~ Motor Choice Reason")
datBus %>%
group_by(Weather, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Weather)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Weather condition") +
ggtitle("Proportion of Bus Choice ~ Weather condition")
datBus %>%
group_by(Weekend, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Weekend)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Weekend") +
ggtitle("Proportion of Bus Choice ~ Weekend")
datBus %>%
group_by(Bus.Stop.Present, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Bus.Stop.Present)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("The presence of bus stop") +
ggtitle("Proportion of Bus Choice ~ The presence of bus stop")
datBus %>%
group_by(Gender, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Gender)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Gender") +
ggtitle("Proportion of Bus Choice ~ Gender")
datBus %>%
group_by(Age, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Age)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Age") +
ggtitle("Proportion of Bus Choice ~ Age")
datBus %>%
group_by(Occupation, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Occupation)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Occupation") +
ggtitle("Proportion of Bus Choice ~ Occupation")
datBus %>%
group_by(Income, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Income)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Income") +
ggtitle("Proportion of Bus Choice ~ Income")
datBus %>%
group_by(Number.of.Children, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Number.of.Children)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Number of Children") +
ggtitle("Proportion of Bus Choice ~ Number of Children in family")
datBus %>%
group_by(Motor.Certificate, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Motor.Certificate)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Motor Certificate") +
ggtitle("Proportion of Bus Choice ~ Motor certificate")
datBus %>%
group_by(Car.Certificate, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Car.Certificate)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Car Certificate") +
ggtitle("Proportion of Bus Choice ~ Car certificate")
datBus %>%
group_by(Bicycle.Owning, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Bicycle.Owning)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Bicycle Owning") +
ggtitle("Proportion of Bus Choice ~ Bicycle Owning")
datBus %>%
group_by(Motor.Owning, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Motor.Owning)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Motor Owning") +
ggtitle("Proportion of Bus Choice ~ Motor Owning")
datBus %>%
group_by(Car.Owning, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Car.Owning)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Car Owning") +
ggtitle("Proportion of Bus Choice ~ Car Owning")
datBus %>%
group_by(Number.of.Bicycles, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Number.of.Bicycles)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Number of Bicycle") +
ggtitle("Proportion of Bus Choice ~ Number of Bicycle")
datBus %>%
group_by(Number.of.Motors, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Number.of.Motors)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Number of Motors") +
ggtitle("Proportion of Bus Choice ~ Number of Motors")
datBus %>%
group_by(Number.of.Car, Bus) %>%
count() %>%
ggplot(aes(Bus, n, fill = Number.of.Car)) +
geom_col(position = "fill") +
xlab("Bus Choice") +
ylab("Number of Cars") +
ggtitle("Proportion of Bus Choice ~ Number of Cars")
## Travel Mode Choice ~ Countinuous Variables (Distance, Travel Period and Cost)
ggplot(datBus, aes(x=Distance, fill = Bus, color = Bus)) +
geom_histogram (position = "dodge") +
xlab("Distance") +
ylab("Count") +
ggtitle("Histogram of Distance")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(datBus, aes(x=Travel.Period, fill = Bus, color = Bus)) +
geom_histogram (position = "dodge") +
xlab("Travel Period") +
ylab("Count") +
ggtitle("Histogram of Travel Period")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(datBus, aes(x=Cost, fill = Bus, color = Bus)) +
geom_histogram (position = "dodge") +
xlab("Cost") +
ylab("Count") +
ggtitle("Histogram of Cost")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Correlation in continuous variables
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
CVBus = data.frame(datBus$Bus, datBus$Distance, datBus$Travel.Period, datBus$Cost)
pairs.panels(CVBus)
## Statistic Analysis by boxplot (continuous variales)
datBus %>%
group_by(Distance, Bus) %>%
count() %>%
ggplot(aes(x = Bus, y = Distance, fill = Bus)) +
geom_boxplot() +
xlab("Bus Choice") +
ylab("Distance") +
ggtitle("Boxplot of Bus Choice ~ Distance")
datBus %>%
group_by(Cost, Bus) %>%
count() %>%
ggplot(aes(x = Bus, y = Cost, fill = Bus)) +
geom_boxplot() +
xlab("Bus Choice") +
ylab("Cost") +
ggtitle("Boxplot of Bus Choice ~ Cost")
datBus %>%
group_by(Travel.Period, Bus) %>%
count() %>%
ggplot(aes(x = Bus, y = Travel.Period, fill = Bus)) +
geom_boxplot() +
xlab("Bus Choice") +
ylab("Travel Period") +
ggtitle("Boxplot of Bus Choice ~ Travel Period")
### Boxplot of Distance, Travel Period and Cost ~ Gender
datBus %>%
group_by(Distance, Bus) %>%
count() %>%
ggplot(aes(x = Bus, y = Distance, fill = Bus)) +
geom_boxplot() +
xlab("Bus Choice") +
ylab("Distance") +
ggtitle("Boxplot of Distance ~ Bus Choice")
datBus %>%
group_by(Cost, Bus) %>%
count() %>%
ggplot(aes(x = Bus, y = Cost, fill = Bus)) +
geom_boxplot() +
xlab("Bus") +
ylab("Cost") +
ggtitle("Boxplot of Cost ~ Bus")
datBus %>%
group_by(Travel.Period, Bus) %>%
count() %>%
ggplot(aes(x = Bus, y = Travel.Period, fill = Bus)) +
geom_boxplot() +
xlab("Bus") +
ylab("Travel Period") +
ggtitle("Boxplot of Travel Period ~ Bus")
### Boxplot of Distance, Travel Period and Cost ~ Occupation
datBus %>%
group_by(Distance, Occupation) %>%
count() %>%
ggplot(aes(x = Occupation, y = Distance, fill = Occupation)) +
geom_boxplot() +
xlab("Occupation") +
ylab("Distance") +
ggtitle("Boxplot of Distance ~ Occupation")
datBus %>%
group_by(Cost, Occupation) %>%
count() %>%
ggplot(aes(x = Occupation, y = Cost, fill = Occupation)) +
geom_boxplot() +
xlab("Occupation") +
ylab("Cost") +
ggtitle("Boxplot of Cost ~ Occupation")
datBus %>%
group_by(Travel.Period, Occupation) %>%
count() %>%
ggplot(aes(x = Occupation, y = Travel.Period, fill = Occupation)) +
geom_boxplot() +
xlab("Occupation") +
ylab("Travel Period") +
ggtitle("Boxplot of Travel Period ~ Occupation")
summary(datBus)
## Bus.Stop.Condition Central.Area Purpose Frequency
## No :395 No :428 Work/Study :540 Once : 55
## Yes:414 Yes:381 Picking Children: 94 2 times : 62
## Entertainment :153 3 times : 71
## Others : 22 > 3 times:621
##
##
##
## Departure.Time Distance Travel.Period Sidewalk.Clearance
## Morning :496 Min. : 0.015 Min. : 0.00 No :110
## Afternoon: 66 1st Qu.: 3.000 1st Qu.: 10.00 Yes:699
## Evening :134 Median : 5.000 Median : 15.00
## Others :113 Mean : 6.654 Mean : 17.55
## 3rd Qu.:10.000 3rd Qu.: 20.00
## Max. :35.000 Max. :180.00
##
## Lane.Separate Temporary.Stop.Number Mode.Choice.Reason Weather
## No :171 None :426 Safety :115 Sunny :541
## Yes:638 1 stop :265 Comfortable :363 Rainny: 18
## >=2 stops:118 Low price : 73 Cool :250
## Accessibility:178
## Reliability : 56
## others : 24
##
## Weekend Non.Bus.Reason Cost Bus.Stop.Present
## No :612 No Route :159 Min. : 0.000 No : 81
## Yes:197 Uncomfortable :218 1st Qu.: 3.000 Yes :643
## Unsafety : 18 Median : 5.000 Don't know: 85
## Long waiting time:204 Mean : 8.754
## Unreliability : 63 3rd Qu.: 10.000
## others : 41 Max. :285.000
## NA's :106
## Gender Age Occupation
## Female:339 15-24:356 Pupils/Students :284
## Male :470 25-60:415 Officers/Workers :218
## >60 : 38 Housewife/Unemployed: 54
## Free labor :184
## Others : 69
##
##
## Income Number.of.Children Motor.Certificate
## <8 millions :199 None :358 No : 94
## (8-15) millions :326 1 child :308 Yes:715
## (15-25) millions:202 2 children :118
## >25 millions : 82 >= 3 children: 25
##
##
##
## Car.Certificate Bicycle.Owning Motor.Owning Car.Owning Number.of.Bicycles
## No :654 No :489 No :147 No :690 None:364
## Yes:155 Yes:320 Yes:662 Yes:119 1 :345
## 2 : 89
## >=3 : 11
##
##
##
## Number.of.Motors Number.of.Car Bus
## None: 32 None:630 No :703
## 1 :128 1 :160 Yes:106
## 2 :404 >=2 : 19
## 3 :193
## >3 : 52
##
##
3. Binary Model
# Model with multi-variables (remove variables with p-value>0,05 in t1) --> 19 variables
mg = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, family = binomial, data = datBus)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
mg
##
## Call: glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Weather + Bus.Stop.Present + Age + Occupation + Income +
## Number.of.Children + Motor.Certificate + Car.Certificate +
## Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car +
## Distance + Travel.Period + Cost, family = binomial, data = datBus)
##
## Coefficients:
## (Intercept) Central.AreaYes
## 0.693170 1.153570
## PurposePicking Children PurposeEntertainment
## -0.846264 -2.128209
## PurposeOthers Frequency2 times
## -0.241582 1.663772
## Frequency3 times Frequency> 3 times
## 0.551286 -1.081913
## Temporary.Stop.Number1 stop Temporary.Stop.Number>=2 stops
## -2.088256 0.007966
## WeatherRainny WeatherCool
## 1.875750 -0.560382
## Bus.Stop.PresentYes Bus.Stop.PresentDon't know
## -0.648223 -1.082582
## Age25-60 Age>60
## -0.681496 2.254365
## OccupationOfficers/Workers OccupationHousewife/Unemployed
## 0.203182 2.166113
## OccupationFree labor OccupationOthers
## -0.369467 1.387882
## Income(8-15) millions Income(15-25) millions
## -0.937300 -0.285344
## Income>25 millions Number.of.Children1 child
## -2.316472 -0.666788
## Number.of.Children2 children Number.of.Children>= 3 children
## -0.302515 -0.920554
## Motor.CertificateYes Car.CertificateYes
## -1.343825 0.038586
## Motor.OwningYes Car.OwningYes
## -0.746040 0.036269
## Number.of.Motors1 Number.of.Motors2
## 0.618812 0.222040
## Number.of.Motors3 Number.of.Motors>3
## -0.523223 -0.510007
## Number.of.Car1 Number.of.Car>=2
## -1.162398 -1.072850
## Distance Travel.Period
## 0.688046 0.009410
## Cost
## -0.618352
##
## Degrees of Freedom: 808 Total (i.e. Null); 770 Residual
## Null Deviance: 628.3
## Residual Deviance: 254.3 AIC: 332.3
summary(mg)
##
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Weather + Bus.Stop.Present + Age + Occupation + Income +
## Number.of.Children + Motor.Certificate + Car.Certificate +
## Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car +
## Distance + Travel.Period + Cost, family = binomial, data = datBus)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5799 -0.2733 -0.1226 -0.0225 3.5052
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.693170 1.122728 0.617 0.536972
## Central.AreaYes 1.153570 0.369569 3.121 0.001800 **
## PurposePicking Children -0.846264 0.775170 -1.092 0.274959
## PurposeEntertainment -2.128209 0.613702 -3.468 0.000525 ***
## PurposeOthers -0.241582 0.938275 -0.257 0.796812
## Frequency2 times 1.663772 0.810579 2.053 0.040114 *
## Frequency3 times 0.551286 0.817643 0.674 0.500160
## Frequency> 3 times -1.081913 0.669912 -1.615 0.106309
## Temporary.Stop.Number1 stop -2.088256 0.549980 -3.797 0.000146 ***
## Temporary.Stop.Number>=2 stops 0.007966 0.491717 0.016 0.987075
## WeatherRainny 1.875750 1.022332 1.835 0.066539 .
## WeatherCool -0.560382 0.483314 -1.159 0.246270
## Bus.Stop.PresentYes -0.648223 0.566922 -1.143 0.252869
## Bus.Stop.PresentDon't know -1.082582 0.864050 -1.253 0.210236
## Age25-60 -0.681496 0.689868 -0.988 0.323219
## Age>60 2.254365 0.977059 2.307 0.021038 *
## OccupationOfficers/Workers 0.203182 0.735185 0.276 0.782265
## OccupationHousewife/Unemployed 2.166113 1.018430 2.127 0.033427 *
## OccupationFree labor -0.369467 0.702315 -0.526 0.598840
## OccupationOthers 1.387882 0.899394 1.543 0.122799
## Income(8-15) millions -0.937300 0.434752 -2.156 0.031088 *
## Income(15-25) millions -0.285344 0.510462 -0.559 0.576167
## Income>25 millions -2.316472 1.157201 -2.002 0.045307 *
## Number.of.Children1 child -0.666788 0.428135 -1.557 0.119370
## Number.of.Children2 children -0.302515 0.608433 -0.497 0.619045
## Number.of.Children>= 3 children -0.920554 1.460357 -0.630 0.528457
## Motor.CertificateYes -1.343825 0.544182 -2.469 0.013532 *
## Car.CertificateYes 0.038586 0.945690 0.041 0.967454
## Motor.OwningYes -0.746040 0.527259 -1.415 0.157086
## Car.OwningYes 0.036269 1.448918 0.025 0.980030
## Number.of.Motors1 0.618812 0.808973 0.765 0.444310
## Number.of.Motors2 0.222040 0.802489 0.277 0.782019
## Number.of.Motors3 -0.523223 0.881065 -0.594 0.552610
## Number.of.Motors>3 -0.510007 1.380742 -0.369 0.711851
## Number.of.Car1 -1.162398 0.858510 -1.354 0.175745
## Number.of.Car>=2 -1.072850 2.226960 -0.482 0.629980
## Distance 0.688046 0.098829 6.962 3.36e-12 ***
## Travel.Period 0.009410 0.015030 0.626 0.531270
## Cost -0.618352 0.094949 -6.512 7.39e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 254.29 on 770 degrees of freedom
## AIC: 332.29
##
## Number of Fisher Scoring iterations: 9
## Remove insignificant variables: Occupation (19), Number.of.Children (21), Car.Certificate (23), Car.Owning (26), Number.of.Motors (28), Number.of.Car (29), Distance (6) --> 12 variables
mg1 = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
mg1
##
## Call: glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Weather + Bus.Stop.Present + Age + Income + Motor.Certificate +
## Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
##
## Coefficients:
## (Intercept) Central.AreaYes
## 1.802519 0.905130
## PurposePicking Children PurposeEntertainment
## -1.087577 -2.484922
## PurposeOthers Frequency2 times
## -1.010247 0.386235
## Frequency3 times Frequency> 3 times
## 0.128561 -1.882813
## Temporary.Stop.Number1 stop Temporary.Stop.Number>=2 stops
## -1.754352 -0.050375
## WeatherRainny WeatherCool
## 2.362667 0.009817
## Bus.Stop.PresentYes Bus.Stop.PresentDon't know
## -0.576247 -0.808657
## Age25-60 Age>60
## -0.180929 2.886523
## Income(8-15) millions Income(15-25) millions
## -0.609947 -0.447394
## Income>25 millions Motor.CertificateYes
## -2.362587 -1.332178
## Motor.OwningYes Travel.Period
## -0.455052 0.036497
## Cost
## -0.092782
##
## Degrees of Freedom: 808 Total (i.e. Null); 786 Residual
## Null Deviance: 628.3
## Residual Deviance: 382.3 AIC: 428.3
summary(mg1)
##
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Weather + Bus.Stop.Present + Age + Income + Motor.Certificate +
## Motor.Owning + Travel.Period + Cost, family = binomial, data = datBus)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6136 -0.4119 -0.2060 -0.0914 3.2337
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.802519 0.677629 2.660 0.007813 **
## Central.AreaYes 0.905130 0.292539 3.094 0.001974 **
## PurposePicking Children -1.087577 0.597356 -1.821 0.068660 .
## PurposeEntertainment -2.484922 0.483074 -5.144 2.69e-07 ***
## PurposeOthers -1.010247 0.761896 -1.326 0.184851
## Frequency2 times 0.386235 0.572849 0.674 0.500162
## Frequency3 times 0.128561 0.575826 0.223 0.823330
## Frequency> 3 times -1.882813 0.479622 -3.926 8.65e-05 ***
## Temporary.Stop.Number1 stop -1.754352 0.412500 -4.253 2.11e-05 ***
## Temporary.Stop.Number>=2 stops -0.050375 0.377640 -0.133 0.893881
## WeatherRainny 2.362667 0.718862 3.287 0.001014 **
## WeatherCool 0.009817 0.345467 0.028 0.977330
## Bus.Stop.PresentYes -0.576247 0.416923 -1.382 0.166928
## Bus.Stop.PresentDon't know -0.808657 0.674309 -1.199 0.230435
## Age25-60 -0.180929 0.327045 -0.553 0.580111
## Age>60 2.886523 0.573149 5.036 4.75e-07 ***
## Income(8-15) millions -0.609947 0.322454 -1.892 0.058547 .
## Income(15-25) millions -0.447394 0.373637 -1.197 0.231150
## Income>25 millions -2.362587 0.789739 -2.992 0.002775 **
## Motor.CertificateYes -1.332178 0.391963 -3.399 0.000677 ***
## Motor.OwningYes -0.455052 0.355347 -1.281 0.200340
## Travel.Period 0.036497 0.009735 3.749 0.000177 ***
## Cost -0.092782 0.031029 -2.990 0.002788 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 382.27 on 786 degrees of freedom
## AIC: 428.27
##
## Number of Fisher Scoring iterations: 7
## Way 2: Use function lmr in package "rms" - library(rms) - ml = lrm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost) - ml
# Find Parsimonious model - Tìm mô hình tối ưu
library(BMA)
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.4-9)
xvars <- datBus[, c(2,3,4,10,12,16,18,19,20,21,22,23,25,26,28,29,6,7,15)]
y <- Bus
bma.search <- bic.glm(xvars, y, strict = F, OR = 20, glm.family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(bma.search)
##
## Call:
## bic.glm.data.frame(x = xvars, y = y, glm.family = "binomial", strict = F, OR = 20)
##
##
## 7 models were selected
## Best 5 models (cumulative posterior probability = 0.9455 ):
##
## p!=0 EV SD model 1
## Intercept 100 -1.41592 0.66406 -1.7933
## Central.Area 100.0
## .Yes 1.13384 0.31484 1.1049
## Purpose 18.3
## .Picking Children -0.12694 0.38890 .
## .Entertainment -0.38154 0.83803 .
## .Others -0.05927 0.34303 .
## Frequency 51.5
## .2 times 0.47757 0.66588 .
## .3 times 0.17333 0.50950 .
## .> 3 times -0.55581 0.70787 .
## Temporary.Stop.Number 100.0
## .1 stop -1.80988 0.45388 -1.8490
## .>=2 stops -0.45764 0.44603 -0.5760
## Weather 17.9
## .Rainny 0.26507 0.68785 .
## .Cool -0.16431 0.39171 .
## Bus.Stop.Present 0.0
## .Yes 0.00000 0.00000 .
## .Don't know 0.00000 0.00000 .
## Age 100.0
## .25-60 -0.59843 0.35579 -0.6181
## .>60 2.38357 0.61606 2.1969
## Occupation 0.0
## .Officers/Workers 0.00000 0.00000 .
## .Housewife/Unemployed 0.00000 0.00000 .
## .Free labor 0.00000 0.00000 .
## .Others 0.00000 0.00000 .
## Income 0.0
## .(8-15) millions 0.00000 0.00000 .
## .(15-25) millions 0.00000 0.00000 .
## .>25 millions 0.00000 0.00000 .
## Number.of.Children 0.0
## .1 child 0.00000 0.00000 .
## .2 children 0.00000 0.00000 .
## .>= 3 children 0.00000 0.00000 .
## Motor.Certificate 5.5
## .Yes -0.03976 0.19314 .
## Car.Certificate 0.0
## .Yes 0.00000 0.00000 .
## Motor.Owning 100.0
## .Yes -1.58958 0.35390 -1.6317
## Car.Owning 0.0
## .Yes 0.00000 0.00000 .
## Number.of.Motors 0.0
## .1 0.00000 0.00000 .
## .2 0.00000 0.00000 .
## .3 0.00000 0.00000 .
## .>3 0.00000 0.00000 .
## Number.of.Car 0.0
## .1 0.00000 0.00000 .
## .>=2 0.00000 0.00000 .
## Distance 100.0 0.68250 0.08231 0.6873
## Travel.Period 0.0 0.00000 0.00000 .
## Cost 100.0 -0.60391 0.08271 -0.6075
##
## nVar 6
## BIC -5020.4280
## post prob 0.409
## model 2 model 3 model 4
## Intercept -1.3985 -0.6710 -1.2074
## Central.Area
## .Yes 1.1290 1.1376 1.2050
## Purpose
## .Picking Children . -0.6985 .
## .Entertainment . -2.0828 .
## .Others . -0.3124 .
## Frequency
## .2 times 0.8604 1.0177 0.9266
## .3 times 0.4297 0.1439 0.4171
## .> 3 times -0.8089 -1.5182 -0.9287
## Temporary.Stop.Number
## .1 stop -1.8603 -1.7650 -1.6729
## .>=2 stops -0.5737 -0.2161 -0.2698
## Weather
## .Rainny . . 1.1680
## .Cool . . -1.0502
## Bus.Stop.Present
## .Yes . . .
## .Don't know . . .
## Age
## .25-60 -0.6070 -0.5824 -0.5700
## .>60 2.2247 2.8801 2.5260
## Occupation
## .Officers/Workers . . .
## .Housewife/Unemployed . . .
## .Free labor . . .
## .Others . . .
## Income
## .(8-15) millions . . .
## .(15-25) millions . . .
## .>25 millions . . .
## Number.of.Children
## .1 child . . .
## .2 children . . .
## .>= 3 children . . .
## Motor.Certificate
## .Yes . . .
## Car.Certificate
## .Yes . . .
## Motor.Owning
## .Yes -1.5595 -1.6229 -1.5423
## Car.Owning
## .Yes . . .
## Number.of.Motors
## .1 . . .
## .2 . . .
## .3 . . .
## .>3 . . .
## Number.of.Car
## .1 . . .
## .>=2 . . .
## Distance 0.6700 0.6682 0.7037
## Travel.Period . . .
## Cost -0.5982 -0.5742 -0.6380
##
## nVar 7 8 8
## BIC -5019.0262 -5018.4877 -5017.6613
## post prob 0.203 0.155 0.102
## model 5
## Intercept -1.7367
## Central.Area
## .Yes 1.2087
## Purpose
## .Picking Children .
## .Entertainment .
## .Others .
## Frequency
## .2 times .
## .3 times .
## .> 3 times .
## Temporary.Stop.Number
## .1 stop -1.7093
## .>=2 stops -0.3905
## Weather
## .Rainny 1.8979
## .Cool -0.7401
## Bus.Stop.Present
## .Yes .
## .Don't know .
## Age
## .25-60 -0.5820
## .>60 2.4384
## Occupation
## .Officers/Workers .
## .Housewife/Unemployed .
## .Free labor .
## .Others .
## Income
## .(8-15) millions .
## .(15-25) millions .
## .>25 millions .
## Number.of.Children
## .1 child .
## .2 children .
## .>= 3 children .
## Motor.Certificate
## .Yes .
## Car.Certificate
## .Yes .
## Motor.Owning
## .Yes -1.6291
## Car.Owning
## .Yes .
## Number.of.Motors
## .1 .
## .2 .
## .3 .
## .>3 .
## Number.of.Car
## .1 .
## .>=2 .
## Distance 0.7075
## Travel.Period .
## Cost -0.6365
##
## nVar 7
## BIC -5017.0791
## post prob 0.077
imageplot.bma(bma.search)
## Remove insignificant variables: Occupation (19), Number.of.Children (21), Car.Certificate (23), Car.Owning (26), Number.of.Motors (28), Number.of.Car (29), Distance (6)
library(BMA)
xvars1 <- datBus[, c(2,3,4,10,12,16,18,20,22,25,7,15)]
y1 <- Bus
bma.search1 <- bic.glm(xvars1, y1, strict = F, OR = 20, glm.family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(bma.search1)
##
## Call:
## bic.glm.data.frame(x = xvars1, y = y1, glm.family = "binomial", strict = F, OR = 20)
##
##
## 5 models were selected
## Best 5 models (cumulative posterior probability = 1 ):
##
## p!=0 EV SD model 1
## Intercept 100 0.90900 0.591728 7.761e-01
## Central.Area 83.4
## .Yes 0.69811 0.402090 8.031e-01
## Purpose 100.0
## .Picking Children -1.04356 0.568641 -1.029e+00
## .Entertainment -2.39775 0.456826 -2.369e+00
## .Others -0.98505 0.746565 -9.703e-01
## Frequency 100.0
## .2 times 0.42454 0.559143 4.687e-01
## .3 times 0.16370 0.548297 2.054e-01
## .> 3 times -1.84363 0.465893 -1.801e+00
## Temporary.Stop.Number 100.0
## .1 stop -1.74026 0.399078 -1.749e+00
## .>=2 stops 0.11400 0.351863 1.309e-01
## Weather 24.1
## .Rainny 0.51796 0.974498 .
## .Cool -0.04228 0.179591 .
## Bus.Stop.Present 0.0
## .Yes 0.00000 0.000000 .
## .Don't know 0.00000 0.000000 .
## Age 100.0
## .25-60 -0.42415 0.308633 -4.506e-01
## .>60 2.71003 0.561479 2.663e+00
## Income 4.5
## .(8-15) millions -0.03090 0.157895 .
## .(15-25) millions -0.02235 0.128148 .
## .>25 millions -0.10611 0.518043 .
## Motor.Certificate 100.0
## .Yes -1.57939 0.332623 -1.590e+00
## Motor.Owning 3.0
## .Yes -0.01707 0.113970 .
## Travel.Period 100.0 0.04064 0.009331 4.184e-02
## Cost 100.0 -0.09959 0.028185 -9.852e-02
##
## nVar 8
## BIC -4.905e+03
## post prob 0.548
## model 2 model 3 model 4
## Intercept 8.100e-01 1.370e+00 1.278e+00
## Central.Area
## .Yes 9.122e-01 . 8.527e-01
## Purpose
## .Picking Children -1.000e+00 -1.137e+00 -1.115e+00
## .Entertainment -2.469e+00 -2.378e+00 -2.453e+00
## .Others -1.102e+00 -9.134e-01 -7.229e-01
## Frequency
## .2 times 2.975e-01 4.681e-01 4.199e-01
## .3 times 1.350e-01 8.485e-02 1.495e-01
## .> 3 times -1.911e+00 -1.873e+00 -1.893e+00
## Temporary.Stop.Number
## .1 stop -1.676e+00 -1.784e+00 -1.824e+00
## .>=2 stops 7.599e-02 1.285e-01 7.867e-02
## Weather
## .Rainny 2.136e+00 . .
## .Cool -1.811e-01 . .
## Bus.Stop.Present
## .Yes . . .
## .Don't know . . .
## Age
## .25-60 -4.025e-01 -4.205e-01 -2.879e-01
## .>60 2.850e+00 2.661e+00 2.712e+00
## Income
## .(8-15) millions . . -6.929e-01
## .(15-25) millions . . -5.012e-01
## .>25 millions . . -2.380e+00
## Motor.Certificate
## .Yes -1.583e+00 -1.585e+00 -1.608e+00
## Motor.Owning
## .Yes . . .
## Travel.Period 4.071e-02 3.670e-02 4.105e-02
## Cost -1.051e-01 -9.813e-02 -9.229e-02
##
## nVar 9 7 9
## BIC -4.903e+03 -4.903e+03 -4.900e+03
## post prob 0.212 0.166 0.045
## model 5
## Intercept 9.432e-01
## Central.Area
## .Yes 8.984e-01
## Purpose
## .Picking Children -9.919e-01
## .Entertainment -2.443e+00
## .Others -1.217e+00
## Frequency
## .2 times 2.794e-01
## .3 times 6.074e-02
## .> 3 times -1.911e+00
## Temporary.Stop.Number
## .1 stop -1.670e+00
## .>=2 stops 4.538e-02
## Weather
## .Rainny 2.214e+00
## .Cool -1.329e-01
## Bus.Stop.Present
## .Yes .
## .Don't know .
## Age
## .25-60 -3.158e-01
## .>60 2.842e+00
## Income
## .(8-15) millions .
## .(15-25) millions .
## .>25 millions .
## Motor.Certificate
## .Yes -1.277e+00
## Motor.Owning
## .Yes -5.745e-01
## Travel.Period 3.922e-02
## Cost -9.891e-02
##
## nVar 10
## BIC -4.899e+03
## post prob 0.030
imageplot.bma(bma.search1)
## Choose Model 1 from bma.search1 : remove 4 variables (Weather (12), Bus.Stop.Present (16), Motor.Owning (25), Income (20)) and modelling again --> 8 variables
mg2 = glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, family = binomial, data = datBus)
mg2
##
## Call: glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Age + Motor.Certificate + Travel.Period + Cost, family = binomial,
## data = datBus)
##
## Coefficients:
## (Intercept) Central.AreaYes
## 0.77606 0.80313
## PurposePicking Children PurposeEntertainment
## -1.02909 -2.36946
## PurposeOthers Frequency2 times
## -0.97031 0.46866
## Frequency3 times Frequency> 3 times
## 0.20536 -1.80115
## Temporary.Stop.Number1 stop Temporary.Stop.Number>=2 stops
## -1.74861 0.13088
## Age25-60 Age>60
## -0.45058 2.66331
## Motor.CertificateYes Travel.Period
## -1.59018 0.04184
## Cost
## -0.09852
##
## Degrees of Freedom: 808 Total (i.e. Null); 794 Residual
## Null Deviance: 628.3
## Residual Deviance: 411.5 AIC: 441.5
summary(mg2)
##
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Age + Motor.Certificate + Travel.Period + Cost, family = binomial,
## data = datBus)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4647 -0.4365 -0.2316 -0.1143 3.1417
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.77606 0.54706 1.419 0.156015
## Central.AreaYes 0.80313 0.27165 2.957 0.003111 **
## PurposePicking Children -1.02909 0.56419 -1.824 0.068150 .
## PurposeEntertainment -2.36946 0.44919 -5.275 1.33e-07 ***
## PurposeOthers -0.97031 0.73516 -1.320 0.186878
## Frequency2 times 0.46866 0.55317 0.847 0.396874
## Frequency3 times 0.20536 0.54499 0.377 0.706315
## Frequency> 3 times -1.80115 0.46243 -3.895 9.82e-05 ***
## Temporary.Stop.Number1 stop -1.74861 0.39604 -4.415 1.01e-05 ***
## Temporary.Stop.Number>=2 stops 0.13088 0.34701 0.377 0.706052
## Age25-60 -0.45058 0.30490 -1.478 0.139470
## Age>60 2.66331 0.55312 4.815 1.47e-06 ***
## Motor.CertificateYes -1.59017 0.32545 -4.886 1.03e-06 ***
## Travel.Period 0.04184 0.00925 4.524 6.08e-06 ***
## Cost -0.09852 0.02766 -3.562 0.000368 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 411.48 on 794 degrees of freedom
## AIC: 441.48
##
## Number of Fisher Scoring iterations: 7
## Calculate OR (odd ratio)
exp(mg2$coefficients)
## (Intercept) Central.AreaYes
## 2.17288728 2.23251131
## PurposePicking Children PurposeEntertainment
## 0.35733306 0.09353129
## PurposeOthers Frequency2 times
## 0.37896411 1.59784912
## Frequency3 times Frequency> 3 times
## 1.22796259 0.16510910
## Temporary.Stop.Number1 stop Temporary.Stop.Number>=2 stops
## 0.17401624 1.13982907
## Age25-60 Age>60
## 0.63725954 14.34371919
## Motor.CertificateYes Travel.Period
## 0.20388992 1.04273033
## Cost
## 0.90617421
exp(coef(mg2))
## (Intercept) Central.AreaYes
## 2.17288728 2.23251131
## PurposePicking Children PurposeEntertainment
## 0.35733306 0.09353129
## PurposeOthers Frequency2 times
## 0.37896411 1.59784912
## Frequency3 times Frequency> 3 times
## 1.22796259 0.16510910
## Temporary.Stop.Number1 stop Temporary.Stop.Number>=2 stops
## 0.17401624 1.13982907
## Age25-60 Age>60
## 0.63725954 14.34371919
## Motor.CertificateYes Travel.Period
## 0.20388992 1.04273033
## Cost
## 0.90617421
library(epiDisplay)
## Warning: package 'epiDisplay' was built under R version 3.6.3
## Loading required package: foreign
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: nnet
## Registered S3 method overwritten by 'epiDisplay':
## method from
## print.lrtest rms
##
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
##
## alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
##
## alpha
logistic.display(mg2)
##
## Logistic regression predicting Bus : Yes vs No
##
## crude OR(95%CI) adj. OR(95%CI)
## Central.Area: Yes vs No 1.94 (1.28,2.95) 2.23 (1.31,3.8)
##
## Purpose: ref.=Work/Study
## Picking Children 0.64 (0.31,1.32) 0.36 (0.12,1.08)
## Entertainment 0.56 (0.3,1.04) 0.09 (0.04,0.23)
## Others 2.81 (1.11,7.1) 0.38 (0.09,1.6)
##
## Frequency: ref.=Once
## 2 times 1.32 (0.58,3.03) 1.6 (0.54,4.72)
## 3 times 1.18 (0.52,2.66) 1.23 (0.42,3.57)
## > 3 times 0.32 (0.16,0.63) 0.17 (0.07,0.41)
##
## Temporary.Stop.Number: ref.=None
## 1 stop 0.18 (0.09,0.35) 0.17 (0.08,0.38)
## >=2 stops 0.87 (0.5,1.51) 1.14 (0.58,2.25)
##
## Age: ref.=15-24
## 25-60 0.33 (0.2,0.54) 0.64 (0.35,1.16)
## >60 5.48 (2.74,10.98) 14.34 (4.85,42.41)
##
## Motor.Certificate: Yes vs No 0.19 (0.11,0.3) 0.2 (0.11,0.39)
##
## Travel.Period (cont. var.) 1.01 (1,1.03) 1.04 (1.02,1.06)
##
## Cost (cont. var.) 0.94 (0.9,0.98) 0.91 (0.86,0.96)
##
## P(Wald's test) P(LR-test)
## Central.Area: Yes vs No 0.003 0.003
##
## Purpose: ref.=Work/Study < 0.001
## Picking Children 0.068
## Entertainment < 0.001
## Others 0.187
##
## Frequency: ref.=Once < 0.001
## 2 times 0.397
## 3 times 0.706
## > 3 times < 0.001
##
## Temporary.Stop.Number: ref.=None < 0.001
## 1 stop < 0.001
## >=2 stops 0.706
##
## Age: ref.=15-24 < 0.001
## 25-60 0.139
## >60 < 0.001
##
## Motor.Certificate: Yes vs No < 0.001 < 0.001
##
## Travel.Period (cont. var.) < 0.001 < 0.001
##
## Cost (cont. var.) < 0.001 < 0.001
##
## Log-likelihood = -205.7404
## No. of observations = 809
## AIC value = 441.4808
## Thay vì sử dụng hàm glm dùng cách 2 với hàm lrm trong gói rms
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:epiDisplay':
##
## dotplot
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:table1':
##
## label, label<-, units
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## Attaching package: 'rms'
## The following object is masked from 'package:epiDisplay':
##
## lrtest
## The following objects are masked from 'package:car':
##
## Predict, vif
mg2.c2 = lrm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, data = datBus)
mg2.c2
## Logistic Regression Model
##
## lrm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Age + Motor.Certificate + Travel.Period + Cost, data = datBus)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 809 LR chi2 216.84 R2 0.435 C 0.882
## No 703 d.f. 14 g 2.277 Dxy 0.764
## Yes 106 Pr(> chi2) <0.0001 gr 9.752 gamma 0.765
## max |deriv| 2e-05 gp 0.173 tau-a 0.174
## Brier 0.074
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 0.7761 0.5471 1.42 0.1560
## Central.Area=Yes 0.8031 0.2716 2.96 0.0031
## Purpose=Picking Children -1.0291 0.5642 -1.82 0.0682
## Purpose=Entertainment -2.3695 0.4492 -5.27 <0.0001
## Purpose=Others -0.9703 0.7352 -1.32 0.1869
## Frequency=2 times 0.4687 0.5532 0.85 0.3969
## Frequency=3 times 0.2054 0.5450 0.38 0.7063
## Frequency=> 3 times -1.8011 0.4624 -3.89 <0.0001
## Temporary.Stop.Number=1 stop -1.7486 0.3960 -4.42 <0.0001
## Temporary.Stop.Number=>=2 stops 0.1309 0.3470 0.38 0.7061
## Age=25-60 -0.4506 0.3049 -1.48 0.1395
## Age=>60 2.6633 0.5531 4.82 <0.0001
## Motor.Certificate=Yes -1.5902 0.3255 -4.89 <0.0001
## Travel.Period 0.0418 0.0092 4.52 <0.0001
## Cost -0.0985 0.0277 -3.56 0.0004
##
mg1.c2 = lrm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Income + Motor.Certificate + Motor.Owning + Travel.Period + Cost, data = datBus)
mg1.c2
## Logistic Regression Model
##
## lrm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Weather + Bus.Stop.Present + Age + Income + Motor.Certificate +
## Motor.Owning + Travel.Period + Cost, data = datBus)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 809 LR chi2 246.05 R2 0.486 C 0.892
## No 703 d.f. 22 g 2.520 Dxy 0.783
## Yes 106 Pr(> chi2) <0.0001 gr 12.430 gamma 0.784
## max |deriv| 1e-09 gp 0.182 tau-a 0.179
## Brier 0.067
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 1.8025 0.6776 2.66 0.0078
## Central.Area=Yes 0.9051 0.2925 3.09 0.0020
## Purpose=Picking Children -1.0876 0.5974 -1.82 0.0687
## Purpose=Entertainment -2.4849 0.4831 -5.14 <0.0001
## Purpose=Others -1.0102 0.7619 -1.33 0.1849
## Frequency=2 times 0.3862 0.5729 0.67 0.5002
## Frequency=3 times 0.1286 0.5758 0.22 0.8233
## Frequency=> 3 times -1.8828 0.4796 -3.93 <0.0001
## Temporary.Stop.Number=1 stop -1.7544 0.4125 -4.25 <0.0001
## Temporary.Stop.Number=>=2 stops -0.0504 0.3776 -0.13 0.8939
## Weather=Rainny 2.3627 0.7189 3.29 0.0010
## Weather=Cool 0.0098 0.3455 0.03 0.9773
## Bus.Stop.Present=Yes -0.5762 0.4169 -1.38 0.1669
## Bus.Stop.Present=Don't know -0.8087 0.6743 -1.20 0.2304
## Age=25-60 -0.1809 0.3270 -0.55 0.5801
## Age=>60 2.8865 0.5731 5.04 <0.0001
## Income=(8-15) millions -0.6099 0.3225 -1.89 0.0585
## Income=(15-25) millions -0.4474 0.3736 -1.20 0.2312
## Income=>25 millions -2.3626 0.7897 -2.99 0.0028
## Motor.Certificate=Yes -1.3322 0.3920 -3.40 0.0007
## Motor.Owning=Yes -0.4551 0.3553 -1.28 0.2003
## Travel.Period 0.0365 0.0097 3.75 0.0002
## Cost -0.0928 0.0310 -2.99 0.0028
##
4. Test models
# 4.1. Deviance - càng nhỏ càng tốt (sai khác giữa giá trị thực tế và giá trị ước lượng theo mô hình)
deviance(mg)
## [1] 254.2948
deviance(mg1)
## [1] 382.2724
deviance(mg2)
## [1] 411.4808
# 4.2. LRT (Likelihood Ratio Test) - Chênh lệch deviance giữa mô hình đơn giản và mô hình phức tạp --> có ý nghĩa thống kê
library(rms)
lrtest(mg2, mg)
lrtest(mg,mg2)
lrtest(mg2, mg1)
# 4.3. AIC (Aikake Information Criterion) - càng nhỏ càng tốt
AIC(mg)
## [1] 332.2948
AIC(mg1)
## [1] 428.2724
AIC(mg2)
## [1] 441.4808
## Calculate MacFadden's R2 voi packages "pscl"
library(pscl)
## Warning: package 'pscl' was built under R version 3.6.3
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
pR2(mg2)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -205.7403930 -314.1608850 216.8409840 0.3451114 0.2351196
## r2CU
## 0.4353563
pR2(mg)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -127.1473808 -314.1608850 374.0270083 0.5952794 0.3701871
## r2CU
## 0.6854523
# 4.4. Calcule the number of observation that model predict correctly = Accuracy (Độ chính xác của mô hình)
## Model mg2
contrasts(datBus$Bus)
## Yes
## No 0
## Yes 1
glm.probs <- predict(mg2, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg2 (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng). Có thể dùng hàm probmg2 <- fitted(mg2), head(probmg2), tail(probmg2)
glm.probs[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên
## 1 2 3 4 5 6
## 0.068978792 0.010724685 0.014359875 0.011680718 0.038184381 0.003167892
## 7 8 9 10
## 0.005570387 0.047199701 0.013507900 0.008546719
glm.pred <- rep("No", 809)
glm.pred[glm.probs >= 0.5] = "Yes"
table(glm.pred, datBus$Bus)
##
## glm.pred No Yes
## No 688 61
## Yes 15 45
mean(glm.pred == datBus$Bus) # Tỷ lệ dự báo chính xác của mô hình 90,61%
## [1] 0.9060569
round(prop.table(table(datBus$Bus))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không sd xe buýt 86.9% < 90.61% --> KL: mức độ chính xác dự báo của mô hình 2 > lớp có tỷ lệ cao nhất ở biến phụ thuộc
##
## No Yes
## 86.9 13.1
## Model mg
contrasts(datBus$Bus)
## Yes
## No 0
## Yes 1
glm.probs.mg <- predict(mg, type = "response") # XS dự báo cho tất cả các quan sát theo mô hình mg (nếu >0,5 thì xem như bẳng 1 nghĩa là mô hình dự báo đúng)
glm.probs.mg[1:10] # Xem XS dự báo cho 10 quan sát đầu tiên
## 1 2 3 4 5
## 2.808039e-04 1.131239e-03 5.179213e-04 3.754305e-03 9.648223e-03
## 6 7 8 9 10
## 7.014613e-09 9.920555e-08 1.075833e-02 2.621728e-03 2.171477e-03
glm.pred.mg <- rep("No", 809)
glm.pred.mg[glm.probs.mg >= 0.5] = "Yes"
table(glm.pred.mg, datBus$Bus)
##
## glm.pred.mg No Yes
## No 691 31
## Yes 12 75
mean(glm.pred.mg == datBus$Bus) # Tỷ lệ dự báo chính xác của mô hình 94,68% --> Tỷ lệ sai sót kiểm định (training error rate) = 100-94,68 = 5,32%
## [1] 0.946848
round(prop.table(table(datBus$Bus))*100,digits = 2) # Tính tỷ lệ các nhóm Y khác nhau - nhóm chiếm ưu thế là không sd xe buýt 86.9% < 94.68% --> KL: mức độ chính xác dự báo của mô hình > lớp có tỷ lệ cao nhất ở biến phụ thuộc
##
## No Yes
## 86.9 13.1
# 4.5. Calculate Sensitivity - Độ nhạy
712/(712+26) # Đạt 96,48% --> Rất lý tưởng: Mô hình xác định tới 96,48% người không sử dụng xe buýt
## [1] 0.9647696
# 4.6. Calculate Specificity - Độ đặc hiệu
49/(61+49) # Đạt 44,55% --> Mô hình có khả năng dự báo 44,55% người sử dụng xe buýt
## [1] 0.4454545
# 4.7. Hosmer-Lemeshow = Goodness of fit test - Kiểm định sự phù hợp của mô hình với Hàm HLgof.test(fit = fitted(mg2), obs = datBus$Bus) trong package "MKmisc" (Version 3.6.3) : Nếu p-value < 0.05 --> MH không phù hợp --> Ít sử dụng
# 4.8. Kiểm tra tính phổ quát của mô hình
## Chạy lại hai mô hình với hàm train thuộc package "caret"
library(caret)
## Warning: package 'caret' was built under R version 3.6.3
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
## The following object is masked from 'package:survival':
##
## cluster
ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 10, classProbs = TRUE, summaryFunction = defaultSummary) # sử dụng kiểm tra chéo 10 lớp (number=10), lặp lại 10 lần (repeats=10) với lựa chọn method="repeatedcv, lựa chọn classProbs = TRUE áp dụng cho mô hình phân loại và lựa chọn summaryFunction = defaultSummary để chỉ thị R tính toán chỉ 2 thông tin về phẩm chất của mô hình là Accuracy và Kappa.
mgtrain = train(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, data = datBus, method = "glm", family = "binomial", trControl = ctrl)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mgtrain)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5799 -0.2733 -0.1226 -0.0225 3.5052
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.693170 1.122728 0.617 0.536972
## Central.AreaYes 1.153570 0.369569 3.121 0.001800
## `PurposePicking Children` -0.846264 0.775170 -1.092 0.274959
## PurposeEntertainment -2.128209 0.613702 -3.468 0.000525
## PurposeOthers -0.241582 0.938275 -0.257 0.796812
## `Frequency2 times` 1.663772 0.810579 2.053 0.040114
## `Frequency3 times` 0.551286 0.817643 0.674 0.500160
## `Frequency> 3 times` -1.081913 0.669912 -1.615 0.106309
## `Temporary.Stop.Number1 stop` -2.088256 0.549980 -3.797 0.000146
## `Temporary.Stop.Number>=2 stops` 0.007966 0.491717 0.016 0.987075
## WeatherRainny 1.875750 1.022332 1.835 0.066539
## WeatherCool -0.560382 0.483314 -1.159 0.246270
## Bus.Stop.PresentYes -0.648223 0.566922 -1.143 0.252869
## `Bus.Stop.PresentDon't know` -1.082582 0.864050 -1.253 0.210236
## `Age25-60` -0.681496 0.689868 -0.988 0.323219
## `Age>60` 2.254365 0.977059 2.307 0.021038
## `OccupationOfficers/Workers` 0.203182 0.735185 0.276 0.782265
## `OccupationHousewife/Unemployed` 2.166113 1.018430 2.127 0.033427
## `OccupationFree labor` -0.369467 0.702315 -0.526 0.598840
## OccupationOthers 1.387882 0.899394 1.543 0.122799
## `Income(8-15) millions` -0.937300 0.434752 -2.156 0.031088
## `Income(15-25) millions` -0.285344 0.510462 -0.559 0.576167
## `Income>25 millions` -2.316472 1.157201 -2.002 0.045307
## `Number.of.Children1 child` -0.666788 0.428135 -1.557 0.119370
## `Number.of.Children2 children` -0.302515 0.608433 -0.497 0.619045
## `Number.of.Children>= 3 children` -0.920554 1.460357 -0.630 0.528457
## Motor.CertificateYes -1.343825 0.544182 -2.469 0.013532
## Car.CertificateYes 0.038586 0.945690 0.041 0.967454
## Motor.OwningYes -0.746040 0.527259 -1.415 0.157086
## Car.OwningYes 0.036269 1.448918 0.025 0.980030
## Number.of.Motors1 0.618812 0.808973 0.765 0.444310
## Number.of.Motors2 0.222040 0.802489 0.277 0.782019
## Number.of.Motors3 -0.523223 0.881065 -0.594 0.552610
## `Number.of.Motors>3` -0.510007 1.380742 -0.369 0.711851
## Number.of.Car1 -1.162398 0.858510 -1.354 0.175745
## `Number.of.Car>=2` -1.072850 2.226960 -0.482 0.629980
## Distance 0.688046 0.098829 6.962 3.36e-12
## Travel.Period 0.009410 0.015030 0.626 0.531270
## Cost -0.618352 0.094949 -6.512 7.39e-11
##
## (Intercept)
## Central.AreaYes **
## `PurposePicking Children`
## PurposeEntertainment ***
## PurposeOthers
## `Frequency2 times` *
## `Frequency3 times`
## `Frequency> 3 times`
## `Temporary.Stop.Number1 stop` ***
## `Temporary.Stop.Number>=2 stops`
## WeatherRainny .
## WeatherCool
## Bus.Stop.PresentYes
## `Bus.Stop.PresentDon't know`
## `Age25-60`
## `Age>60` *
## `OccupationOfficers/Workers`
## `OccupationHousewife/Unemployed` *
## `OccupationFree labor`
## OccupationOthers
## `Income(8-15) millions` *
## `Income(15-25) millions`
## `Income>25 millions` *
## `Number.of.Children1 child`
## `Number.of.Children2 children`
## `Number.of.Children>= 3 children`
## Motor.CertificateYes *
## Car.CertificateYes
## Motor.OwningYes
## Car.OwningYes
## Number.of.Motors1
## Number.of.Motors2
## Number.of.Motors3
## `Number.of.Motors>3`
## Number.of.Car1
## `Number.of.Car>=2`
## Distance ***
## Travel.Period
## Cost ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 254.29 on 770 degrees of freedom
## AIC: 332.29
##
## Number of Fisher Scoring iterations: 9
mg2train = train(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, data = datBus, method = "glm", family = "binomial", trControl = ctrl)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mg2train)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4647 -0.4365 -0.2316 -0.1143 3.1417
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.77606 0.54706 1.419 0.156015
## Central.AreaYes 0.80313 0.27165 2.957 0.003111 **
## `PurposePicking Children` -1.02909 0.56419 -1.824 0.068150 .
## PurposeEntertainment -2.36946 0.44919 -5.275 1.33e-07 ***
## PurposeOthers -0.97031 0.73516 -1.320 0.186878
## `Frequency2 times` 0.46866 0.55317 0.847 0.396874
## `Frequency3 times` 0.20536 0.54499 0.377 0.706315
## `Frequency> 3 times` -1.80115 0.46243 -3.895 9.82e-05 ***
## `Temporary.Stop.Number1 stop` -1.74861 0.39604 -4.415 1.01e-05 ***
## `Temporary.Stop.Number>=2 stops` 0.13088 0.34701 0.377 0.706052
## `Age25-60` -0.45058 0.30490 -1.478 0.139470
## `Age>60` 2.66331 0.55312 4.815 1.47e-06 ***
## Motor.CertificateYes -1.59017 0.32545 -4.886 1.03e-06 ***
## Travel.Period 0.04184 0.00925 4.524 6.08e-06 ***
## Cost -0.09852 0.02766 -3.562 0.000368 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 411.48 on 794 degrees of freedom
## AIC: 441.48
##
## Number of Fisher Scoring iterations: 7
## Các thông tin về khả năng dự báo của mô hình bằng confusionMatrix()
predmg <- predict(mgtrain, newdata = datBus)
confusionMatrix(data = predmg, datBus$Bus)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 691 31
## Yes 12 75
##
## Accuracy : 0.9468
## 95% CI : (0.9291, 0.9613)
## No Information Rate : 0.869
## P-Value [Acc > NIR] : 1.955e-13
##
## Kappa : 0.7474
##
## Mcnemar's Test P-Value : 0.006052
##
## Sensitivity : 0.9829
## Specificity : 0.7075
## Pos Pred Value : 0.9571
## Neg Pred Value : 0.8621
## Prevalence : 0.8690
## Detection Rate : 0.8541
## Detection Prevalence : 0.8925
## Balanced Accuracy : 0.8452
##
## 'Positive' Class : No
##
predmg2 <- predict(mg2train, newdata = datBus)
confusionMatrix(data = predmg2, datBus$Bus)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 688 61
## Yes 15 45
##
## Accuracy : 0.9061
## 95% CI : (0.8838, 0.9253)
## No Information Rate : 0.869
## P-Value [Acc > NIR] : 0.0006901
##
## Kappa : 0.4943
##
## Mcnemar's Test P-Value : 2.445e-07
##
## Sensitivity : 0.9787
## Specificity : 0.4245
## Pos Pred Value : 0.9186
## Neg Pred Value : 0.7500
## Prevalence : 0.8690
## Detection Rate : 0.8504
## Detection Prevalence : 0.9258
## Balanced Accuracy : 0.7016
##
## 'Positive' Class : No
##
## Lấy dataframe với thông tin Accuracy của mô hình
a = mgtrain$resample
a = a[, -3] # Trừ cột resample
a$Mohinh = "Logitmg" # Thêm biến mô hình
a
## Accuracy Kappa Mohinh
## 1 0.9250000 0.6250000 Logitmg
## 2 0.9382716 0.7028613 Logitmg
## 3 0.9625000 0.8032787 Logitmg
## 4 0.9146341 0.6178429 Logitmg
## 5 0.9125000 0.6164384 Logitmg
## 6 0.9146341 0.5451664 Logitmg
## 7 0.9259259 0.6255778 Logitmg
## 8 0.9259259 0.6273006 Logitmg
## 9 0.9135802 0.5445783 Logitmg
## 10 0.9506173 0.7721519 Logitmg
## 11 0.9750000 0.8750000 Logitmg
## 12 0.9259259 0.6582278 Logitmg
## 13 0.9125000 0.6164384 Logitmg
## 14 0.9259259 0.6582278 Logitmg
## 15 0.9500000 0.7241379 Logitmg
## 16 0.8888889 0.3461883 Logitmg
## 17 0.9634146 0.8219971 Logitmg
## 18 0.9259259 0.6582278 Logitmg
## 19 0.9146341 0.5846599 Logitmg
## 20 0.9135802 0.5445783 Logitmg
## 21 0.9753086 0.8860759 Logitmg
## 22 0.9259259 0.6577465 Logitmg
## 23 0.9506173 0.7721519 Logitmg
## 24 0.9012346 0.3851992 Logitmg
## 25 0.9012346 0.5030675 Logitmg
## 26 0.9629630 0.8217168 Logitmg
## 27 0.9135802 0.6454034 Logitmg
## 28 0.9135802 0.5445783 Logitmg
## 29 0.9125000 0.5409836 Logitmg
## 30 0.9012346 0.5007704 Logitmg
## 31 0.9382716 0.6725950 Logitmg
## 32 0.9259259 0.6255778 Logitmg
## 33 0.8765432 0.5114596 Logitmg
## 34 0.9506173 0.7515337 Logitmg
## 35 0.9629630 0.8480300 Logitmg
## 36 0.9135802 0.6454034 Logitmg
## 37 0.9750000 0.8750000 Logitmg
## 38 0.9135802 0.4968944 Logitmg
## 39 0.9500000 0.7241379 Logitmg
## 40 0.9390244 0.7033285 Logitmg
## 41 0.9506173 0.7515337 Logitmg
## 42 0.9375000 0.7014925 Logitmg
## 43 0.9375000 0.7014925 Logitmg
## 44 0.9012346 0.5030675 Logitmg
## 45 0.9382716 0.7028613 Logitmg
## 46 0.9625000 0.8032787 Logitmg
## 47 0.9012346 0.5443038 Logitmg
## 48 0.9512195 0.7725381 Logitmg
## 49 0.9629630 0.8359217 Logitmg
## 50 0.9024390 0.5450763 Logitmg
## 51 0.9512195 0.7518911 Logitmg
## 52 0.9382716 0.7028613 Logitmg
## 53 0.9625000 0.8032787 Logitmg
## 54 0.9250000 0.6571429 Logitmg
## 55 0.9146341 0.6178429 Logitmg
## 56 0.9012346 0.5030675 Logitmg
## 57 0.9500000 0.7894737 Logitmg
## 58 0.9375000 0.6363636 Logitmg
## 59 0.8765432 0.3788344 Logitmg
## 60 0.9390244 0.7270306 Logitmg
## 61 0.9125000 0.5409836 Logitmg
## 62 0.9250000 0.6250000 Logitmg
## 63 0.9024390 0.4542429 Logitmg
## 64 0.9250000 0.6571429 Logitmg
## 65 0.9506173 0.7721519 Logitmg
## 66 0.9390244 0.7033285 Logitmg
## 67 0.9012346 0.5443038 Logitmg
## 68 0.9382716 0.6725950 Logitmg
## 69 0.9382716 0.7028613 Logitmg
## 70 0.9382716 0.7265361 Logitmg
## 71 0.9250000 0.6842105 Logitmg
## 72 0.9506173 0.7721519 Logitmg
## 73 0.9390244 0.7033285 Logitmg
## 74 0.8765432 0.5114596 Logitmg
## 75 0.9259259 0.6582278 Logitmg
## 76 0.9382716 0.6746988 Logitmg
## 77 0.9382716 0.6725950 Logitmg
## 78 0.9125000 0.4909091 Logitmg
## 79 0.9506173 0.7721519 Logitmg
## 80 0.9382716 0.7019868 Logitmg
## 81 0.9506173 0.7896104 Logitmg
## 82 0.9135802 0.6171506 Logitmg
## 83 0.8658537 0.3473227 Logitmg
## 84 0.9506173 0.7503852 Logitmg
## 85 0.9506173 0.7515337 Logitmg
## 86 0.9500000 0.7500000 Logitmg
## 87 0.9382716 0.7265361 Logitmg
## 88 0.9375000 0.7014925 Logitmg
## 89 0.9024390 0.4542429 Logitmg
## 90 0.9500000 0.7500000 Logitmg
## 91 0.9012346 0.5443038 Logitmg
## 92 0.9024390 0.5037821 Logitmg
## 93 0.8888889 0.4144578 Logitmg
## 94 0.9250000 0.6571429 Logitmg
## 95 0.9625000 0.8356164 Logitmg
## 96 0.9506173 0.7503852 Logitmg
## 97 0.9125000 0.6455696 Logitmg
## 98 0.9506173 0.7515337 Logitmg
## 99 0.9259259 0.6582278 Logitmg
## 100 0.9146341 0.4973730 Logitmg
b = mg2train$resample
b = b[, -3]
b$Mohinh = "Logitmg2"
b
## Accuracy Kappa Mohinh
## 1 0.8875000 0.40983607 Logitmg2
## 2 0.8888889 0.26586103 Logitmg2
## 3 0.8641975 0.20940550 Logitmg2
## 4 0.8641975 0.20940550 Logitmg2
## 5 0.9512195 0.77253814 Logitmg2
## 6 0.8888889 0.46515040 Logitmg2
## 7 0.9012346 0.50306748 Logitmg2
## 8 0.9000000 0.44827586 Logitmg2
## 9 0.8888889 0.41067098 Logitmg2
## 10 0.9382716 0.67469880 Logitmg2
## 11 0.8625000 0.27868852 Logitmg2
## 12 0.9250000 0.58620690 Logitmg2
## 13 0.9000000 0.38461538 Logitmg2
## 14 0.8902439 0.35376532 Logitmg2
## 15 0.9382716 0.63677130 Logitmg2
## 16 0.8414634 0.34278668 Logitmg2
## 17 0.8518519 0.31645570 Logitmg2
## 18 0.9259259 0.59021922 Logitmg2
## 19 0.9135802 0.54457831 Logitmg2
## 20 0.8888889 0.35314996 Logitmg2
## 21 0.9125000 0.42857143 Logitmg2
## 22 0.9012346 0.45362563 Logitmg2
## 23 0.8518519 0.31645570 Logitmg2
## 24 0.8902439 0.27788650 Logitmg2
## 25 0.9000000 0.44827586 Logitmg2
## 26 0.8750000 0.42857143 Logitmg2
## 27 0.9629630 0.82171680 Logitmg2
## 28 0.9012346 0.44897959 Logitmg2
## 29 0.8765432 0.43037975 Logitmg2
## 30 0.9024390 0.54507628 Logitmg2
## 31 0.9135802 0.54457831 Logitmg2
## 32 0.8658537 0.28526149 Logitmg2
## 33 0.8888889 0.41445783 Logitmg2
## 34 0.9375000 0.67213115 Logitmg2
## 35 0.9012346 0.44897959 Logitmg2
## 36 0.9259259 0.62730061 Logitmg2
## 37 0.9125000 0.42857143 Logitmg2
## 38 0.8375000 0.28767123 Logitmg2
## 39 0.8765432 0.31703204 Logitmg2
## 40 0.9024390 0.50378215 Logitmg2
## 41 0.8658537 0.39946738 Logitmg2
## 42 0.9000000 0.38461538 Logitmg2
## 43 0.8641975 0.20940550 Logitmg2
## 44 0.9259259 0.59021922 Logitmg2
## 45 0.9382716 0.70286134 Logitmg2
## 46 0.8750000 0.31034483 Logitmg2
## 47 0.9259259 0.58673469 Logitmg2
## 48 0.8780488 0.37972769 Logitmg2
## 49 0.8888889 0.41445783 Logitmg2
## 50 0.9000000 0.44827586 Logitmg2
## 51 0.9135802 0.49689441 Logitmg2
## 52 0.9250000 0.58620690 Logitmg2
## 53 0.8888889 0.41445783 Logitmg2
## 54 0.9259259 0.59021922 Logitmg2
## 55 0.8518519 0.25460123 Logitmg2
## 56 0.8902439 0.35376532 Logitmg2
## 57 0.9135802 0.58278146 Logitmg2
## 58 0.8641975 0.27970897 Logitmg2
## 59 0.9012346 0.57922078 Logitmg2
## 60 0.9125000 0.42857143 Logitmg2
## 61 0.9146341 0.49737303 Logitmg2
## 62 0.8875000 0.34545455 Logitmg2
## 63 0.8875000 0.34545455 Logitmg2
## 64 0.9012346 0.50306748 Logitmg2
## 65 0.8750000 0.37500000 Logitmg2
## 66 0.9012346 0.50306748 Logitmg2
## 67 0.9146341 0.61784288 Logitmg2
## 68 0.8658537 0.11741683 Logitmg2
## 69 0.9000000 0.50000000 Logitmg2
## 70 0.8888889 0.54409006 Logitmg2
## 71 0.8750000 0.31034483 Logitmg2
## 72 0.8765432 0.31703204 Logitmg2
## 73 0.9135802 0.58400587 Logitmg2
## 74 0.9000000 0.38461538 Logitmg2
## 75 0.9375000 0.63636364 Logitmg2
## 76 0.8750000 0.37500000 Logitmg2
## 77 0.9024390 0.50378215 Logitmg2
## 78 0.8780488 0.43134535 Logitmg2
## 79 0.8888889 0.27750248 Logitmg2
## 80 0.9146341 0.61784288 Logitmg2
## 81 0.9000000 0.54285714 Logitmg2
## 82 0.9250000 0.58620690 Logitmg2
## 83 0.9268293 0.62783661 Logitmg2
## 84 0.8536585 0.09057301 Logitmg2
## 85 0.8641975 0.34629494 Logitmg2
## 86 0.9259259 0.59021922 Logitmg2
## 87 0.8875000 0.40983607 Logitmg2
## 88 0.8888889 0.46515040 Logitmg2
## 89 0.9012346 0.45362563 Logitmg2
## 90 0.9012346 0.38519924 Logitmg2
## 91 0.8888889 0.46357616 Logitmg2
## 92 0.9012346 0.57922078 Logitmg2
## 93 0.9250000 0.53846154 Logitmg2
## 94 0.8518519 0.18043845 Logitmg2
## 95 0.8888889 0.35314996 Logitmg2
## 96 0.9135802 0.49689441 Logitmg2
## 97 0.9012346 0.54430380 Logitmg2
## 98 0.9250000 0.53846154 Logitmg2
## 99 0.9259259 0.53889943 Logitmg2
## 100 0.9146341 0.61784288 Logitmg2
resamplemod = rbind(a, b) # Hợp nhất 2 data frame a và b
resamplemod
## Accuracy Kappa Mohinh
## 1 0.9250000 0.62500000 Logitmg
## 2 0.9382716 0.70286134 Logitmg
## 3 0.9625000 0.80327869 Logitmg
## 4 0.9146341 0.61784288 Logitmg
## 5 0.9125000 0.61643836 Logitmg
## 6 0.9146341 0.54516640 Logitmg
## 7 0.9259259 0.62557781 Logitmg
## 8 0.9259259 0.62730061 Logitmg
## 9 0.9135802 0.54457831 Logitmg
## 10 0.9506173 0.77215190 Logitmg
## 11 0.9750000 0.87500000 Logitmg
## 12 0.9259259 0.65822785 Logitmg
## 13 0.9125000 0.61643836 Logitmg
## 14 0.9259259 0.65822785 Logitmg
## 15 0.9500000 0.72413793 Logitmg
## 16 0.8888889 0.34618834 Logitmg
## 17 0.9634146 0.82199711 Logitmg
## 18 0.9259259 0.65822785 Logitmg
## 19 0.9146341 0.58465991 Logitmg
## 20 0.9135802 0.54457831 Logitmg
## 21 0.9753086 0.88607595 Logitmg
## 22 0.9259259 0.65774648 Logitmg
## 23 0.9506173 0.77215190 Logitmg
## 24 0.9012346 0.38519924 Logitmg
## 25 0.9012346 0.50306748 Logitmg
## 26 0.9629630 0.82171680 Logitmg
## 27 0.9135802 0.64540338 Logitmg
## 28 0.9135802 0.54457831 Logitmg
## 29 0.9125000 0.54098361 Logitmg
## 30 0.9012346 0.50077042 Logitmg
## 31 0.9382716 0.67259499 Logitmg
## 32 0.9259259 0.62557781 Logitmg
## 33 0.8765432 0.51145959 Logitmg
## 34 0.9506173 0.75153374 Logitmg
## 35 0.9629630 0.84803002 Logitmg
## 36 0.9135802 0.64540338 Logitmg
## 37 0.9750000 0.87500000 Logitmg
## 38 0.9135802 0.49689441 Logitmg
## 39 0.9500000 0.72413793 Logitmg
## 40 0.9390244 0.70332851 Logitmg
## 41 0.9506173 0.75153374 Logitmg
## 42 0.9375000 0.70149254 Logitmg
## 43 0.9375000 0.70149254 Logitmg
## 44 0.9012346 0.50306748 Logitmg
## 45 0.9382716 0.70286134 Logitmg
## 46 0.9625000 0.80327869 Logitmg
## 47 0.9012346 0.54430380 Logitmg
## 48 0.9512195 0.77253814 Logitmg
## 49 0.9629630 0.83592167 Logitmg
## 50 0.9024390 0.54507628 Logitmg
## 51 0.9512195 0.75189107 Logitmg
## 52 0.9382716 0.70286134 Logitmg
## 53 0.9625000 0.80327869 Logitmg
## 54 0.9250000 0.65714286 Logitmg
## 55 0.9146341 0.61784288 Logitmg
## 56 0.9012346 0.50306748 Logitmg
## 57 0.9500000 0.78947368 Logitmg
## 58 0.9375000 0.63636364 Logitmg
## 59 0.8765432 0.37883436 Logitmg
## 60 0.9390244 0.72703063 Logitmg
## 61 0.9125000 0.54098361 Logitmg
## 62 0.9250000 0.62500000 Logitmg
## 63 0.9024390 0.45424293 Logitmg
## 64 0.9250000 0.65714286 Logitmg
## 65 0.9506173 0.77215190 Logitmg
## 66 0.9390244 0.70332851 Logitmg
## 67 0.9012346 0.54430380 Logitmg
## 68 0.9382716 0.67259499 Logitmg
## 69 0.9382716 0.70286134 Logitmg
## 70 0.9382716 0.72653612 Logitmg
## 71 0.9250000 0.68421053 Logitmg
## 72 0.9506173 0.77215190 Logitmg
## 73 0.9390244 0.70332851 Logitmg
## 74 0.8765432 0.51145959 Logitmg
## 75 0.9259259 0.65822785 Logitmg
## 76 0.9382716 0.67469880 Logitmg
## 77 0.9382716 0.67259499 Logitmg
## 78 0.9125000 0.49090909 Logitmg
## 79 0.9506173 0.77215190 Logitmg
## 80 0.9382716 0.70198675 Logitmg
## 81 0.9506173 0.78961039 Logitmg
## 82 0.9135802 0.61715057 Logitmg
## 83 0.8658537 0.34732272 Logitmg
## 84 0.9506173 0.75038521 Logitmg
## 85 0.9506173 0.75153374 Logitmg
## 86 0.9500000 0.75000000 Logitmg
## 87 0.9382716 0.72653612 Logitmg
## 88 0.9375000 0.70149254 Logitmg
## 89 0.9024390 0.45424293 Logitmg
## 90 0.9500000 0.75000000 Logitmg
## 91 0.9012346 0.54430380 Logitmg
## 92 0.9024390 0.50378215 Logitmg
## 93 0.8888889 0.41445783 Logitmg
## 94 0.9250000 0.65714286 Logitmg
## 95 0.9625000 0.83561644 Logitmg
## 96 0.9506173 0.75038521 Logitmg
## 97 0.9125000 0.64556962 Logitmg
## 98 0.9506173 0.75153374 Logitmg
## 99 0.9259259 0.65822785 Logitmg
## 100 0.9146341 0.49737303 Logitmg
## 101 0.8875000 0.40983607 Logitmg2
## 102 0.8888889 0.26586103 Logitmg2
## 103 0.8641975 0.20940550 Logitmg2
## 104 0.8641975 0.20940550 Logitmg2
## 105 0.9512195 0.77253814 Logitmg2
## 106 0.8888889 0.46515040 Logitmg2
## 107 0.9012346 0.50306748 Logitmg2
## 108 0.9000000 0.44827586 Logitmg2
## 109 0.8888889 0.41067098 Logitmg2
## 110 0.9382716 0.67469880 Logitmg2
## 111 0.8625000 0.27868852 Logitmg2
## 112 0.9250000 0.58620690 Logitmg2
## 113 0.9000000 0.38461538 Logitmg2
## 114 0.8902439 0.35376532 Logitmg2
## 115 0.9382716 0.63677130 Logitmg2
## 116 0.8414634 0.34278668 Logitmg2
## 117 0.8518519 0.31645570 Logitmg2
## 118 0.9259259 0.59021922 Logitmg2
## 119 0.9135802 0.54457831 Logitmg2
## 120 0.8888889 0.35314996 Logitmg2
## 121 0.9125000 0.42857143 Logitmg2
## 122 0.9012346 0.45362563 Logitmg2
## 123 0.8518519 0.31645570 Logitmg2
## 124 0.8902439 0.27788650 Logitmg2
## 125 0.9000000 0.44827586 Logitmg2
## 126 0.8750000 0.42857143 Logitmg2
## 127 0.9629630 0.82171680 Logitmg2
## 128 0.9012346 0.44897959 Logitmg2
## 129 0.8765432 0.43037975 Logitmg2
## 130 0.9024390 0.54507628 Logitmg2
## 131 0.9135802 0.54457831 Logitmg2
## 132 0.8658537 0.28526149 Logitmg2
## 133 0.8888889 0.41445783 Logitmg2
## 134 0.9375000 0.67213115 Logitmg2
## 135 0.9012346 0.44897959 Logitmg2
## 136 0.9259259 0.62730061 Logitmg2
## 137 0.9125000 0.42857143 Logitmg2
## 138 0.8375000 0.28767123 Logitmg2
## 139 0.8765432 0.31703204 Logitmg2
## 140 0.9024390 0.50378215 Logitmg2
## 141 0.8658537 0.39946738 Logitmg2
## 142 0.9000000 0.38461538 Logitmg2
## 143 0.8641975 0.20940550 Logitmg2
## 144 0.9259259 0.59021922 Logitmg2
## 145 0.9382716 0.70286134 Logitmg2
## 146 0.8750000 0.31034483 Logitmg2
## 147 0.9259259 0.58673469 Logitmg2
## 148 0.8780488 0.37972769 Logitmg2
## 149 0.8888889 0.41445783 Logitmg2
## 150 0.9000000 0.44827586 Logitmg2
## 151 0.9135802 0.49689441 Logitmg2
## 152 0.9250000 0.58620690 Logitmg2
## 153 0.8888889 0.41445783 Logitmg2
## 154 0.9259259 0.59021922 Logitmg2
## 155 0.8518519 0.25460123 Logitmg2
## 156 0.8902439 0.35376532 Logitmg2
## 157 0.9135802 0.58278146 Logitmg2
## 158 0.8641975 0.27970897 Logitmg2
## 159 0.9012346 0.57922078 Logitmg2
## 160 0.9125000 0.42857143 Logitmg2
## 161 0.9146341 0.49737303 Logitmg2
## 162 0.8875000 0.34545455 Logitmg2
## 163 0.8875000 0.34545455 Logitmg2
## 164 0.9012346 0.50306748 Logitmg2
## 165 0.8750000 0.37500000 Logitmg2
## 166 0.9012346 0.50306748 Logitmg2
## 167 0.9146341 0.61784288 Logitmg2
## 168 0.8658537 0.11741683 Logitmg2
## 169 0.9000000 0.50000000 Logitmg2
## 170 0.8888889 0.54409006 Logitmg2
## 171 0.8750000 0.31034483 Logitmg2
## 172 0.8765432 0.31703204 Logitmg2
## 173 0.9135802 0.58400587 Logitmg2
## 174 0.9000000 0.38461538 Logitmg2
## 175 0.9375000 0.63636364 Logitmg2
## 176 0.8750000 0.37500000 Logitmg2
## 177 0.9024390 0.50378215 Logitmg2
## 178 0.8780488 0.43134535 Logitmg2
## 179 0.8888889 0.27750248 Logitmg2
## 180 0.9146341 0.61784288 Logitmg2
## 181 0.9000000 0.54285714 Logitmg2
## 182 0.9250000 0.58620690 Logitmg2
## 183 0.9268293 0.62783661 Logitmg2
## 184 0.8536585 0.09057301 Logitmg2
## 185 0.8641975 0.34629494 Logitmg2
## 186 0.9259259 0.59021922 Logitmg2
## 187 0.8875000 0.40983607 Logitmg2
## 188 0.8888889 0.46515040 Logitmg2
## 189 0.9012346 0.45362563 Logitmg2
## 190 0.9012346 0.38519924 Logitmg2
## 191 0.8888889 0.46357616 Logitmg2
## 192 0.9012346 0.57922078 Logitmg2
## 193 0.9250000 0.53846154 Logitmg2
## 194 0.8518519 0.18043845 Logitmg2
## 195 0.8888889 0.35314996 Logitmg2
## 196 0.9135802 0.49689441 Logitmg2
## 197 0.9012346 0.54430380 Logitmg2
## 198 0.9250000 0.53846154 Logitmg2
## 199 0.9259259 0.53889943 Logitmg2
## 200 0.9146341 0.61784288 Logitmg2
## Phân tích hình ảnh
library(ggplot2)
library(ggthemes)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
h1 <- ggplot(resamplemod) + geom_boxplot(aes(x = Mohinh, y = Accuracy, fill = Mohinh)) + coord_flip() + geom_hline(yintercept = 0.8701, color = "blue") + theme_wsj() # Giá trị 0.8701 = 87.01% là tỷ lệ nhóm chiếm ưu thế (không sử dụng xe buýt)
h1 # Nhận xét: Cả 2 mô hình đều có khả năng phân loại tốt - độ chính xác accuracy của cả 2 mô hình đều lớn hơn 87.01% (đường màu xanh)
h2 <- ggplot(resamplemod, aes(Accuracy, fill = Mohinh)) + geom_density(alpha = 0.3) + theme_wsj() + facet_wrap(~ Mohinh)
h2
## Kiểm định t-test để đánh giá sự khác biệt về Accuracy và Kappa của 2 mô hình: p<0.05 có ý nghĩa thống kê về sự khác biệt Accuracy và Kappa của 2 mô hình
t.test(resamplemod$Accuracy ~ resamplemod$Mohinh)
##
## Welch Two Sample t-test
##
## data: resamplemod$Accuracy by resamplemod$Mohinh
## t = 9.3916, df = 197.24, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.02551030 0.03907123
## sample estimates:
## mean in group Logitmg mean in group Logitmg2
## 0.9293400 0.8970493
summary(a)
## Accuracy Kappa Mohinh
## Min. :0.8659 Min. :0.3462 Length:100
## 1st Qu.:0.9136 1st Qu.:0.5450 Class :character
## Median :0.9259 Median :0.6582 Mode :character
## Mean :0.9293 Mean :0.6545
## 3rd Qu.:0.9502 3rd Qu.:0.7504
## Max. :0.9753 Max. :0.8861
summary(b)
## Accuracy Kappa Mohinh
## Min. :0.8375 Min. :0.09057 Length:100
## 1st Qu.:0.8780 1st Qu.:0.35315 Class :character
## Median :0.9000 Median :0.44828 Mode :character
## Mean :0.8970 Mean :0.45014
## 3rd Qu.:0.9136 3rd Qu.:0.54470
## Max. :0.9630 Max. :0.82172
t.test(resamplemod$Kappa ~ resamplemod$Mohinh)
##
## Welch Two Sample t-test
##
## data: resamplemod$Kappa by resamplemod$Mohinh
## t = 11.036, df = 194.82, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1678119 0.2408437
## sample estimates:
## mean in group Logitmg mean in group Logitmg2
## 0.6544642 0.4501364
# 4.9. Calculate AUC (Area under Curve) - Diện tích dưới đường cong ROC (Receiver Operating Characteristics), đánh giá mức độ lồi về phía trên của đường ROC (Tiêu chí thường được sử dụng khi so sánh các mô hình phân loại với nhau): sử dụng hàm roc trong gói "pROC": AUC càng gần 1 càng tốt, >0.7 xem như chấp nhận được
library(pROC)
## Warning: package 'pROC' was built under R version 3.6.3
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:epiDisplay':
##
## ci
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
## Mô hình mg
mgtrain1 <- train(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, method = "glm", family = "binomial", data = datBus)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mgtrain1)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5799 -0.2733 -0.1226 -0.0225 3.5052
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.693170 1.122728 0.617 0.536972
## Central.AreaYes 1.153570 0.369569 3.121 0.001800
## `PurposePicking Children` -0.846264 0.775170 -1.092 0.274959
## PurposeEntertainment -2.128209 0.613702 -3.468 0.000525
## PurposeOthers -0.241582 0.938275 -0.257 0.796812
## `Frequency2 times` 1.663772 0.810579 2.053 0.040114
## `Frequency3 times` 0.551286 0.817643 0.674 0.500160
## `Frequency> 3 times` -1.081913 0.669912 -1.615 0.106309
## `Temporary.Stop.Number1 stop` -2.088256 0.549980 -3.797 0.000146
## `Temporary.Stop.Number>=2 stops` 0.007966 0.491717 0.016 0.987075
## WeatherRainny 1.875750 1.022332 1.835 0.066539
## WeatherCool -0.560382 0.483314 -1.159 0.246270
## Bus.Stop.PresentYes -0.648223 0.566922 -1.143 0.252869
## `Bus.Stop.PresentDon't know` -1.082582 0.864050 -1.253 0.210236
## `Age25-60` -0.681496 0.689868 -0.988 0.323219
## `Age>60` 2.254365 0.977059 2.307 0.021038
## `OccupationOfficers/Workers` 0.203182 0.735185 0.276 0.782265
## `OccupationHousewife/Unemployed` 2.166113 1.018430 2.127 0.033427
## `OccupationFree labor` -0.369467 0.702315 -0.526 0.598840
## OccupationOthers 1.387882 0.899394 1.543 0.122799
## `Income(8-15) millions` -0.937300 0.434752 -2.156 0.031088
## `Income(15-25) millions` -0.285344 0.510462 -0.559 0.576167
## `Income>25 millions` -2.316472 1.157201 -2.002 0.045307
## `Number.of.Children1 child` -0.666788 0.428135 -1.557 0.119370
## `Number.of.Children2 children` -0.302515 0.608433 -0.497 0.619045
## `Number.of.Children>= 3 children` -0.920554 1.460357 -0.630 0.528457
## Motor.CertificateYes -1.343825 0.544182 -2.469 0.013532
## Car.CertificateYes 0.038586 0.945690 0.041 0.967454
## Motor.OwningYes -0.746040 0.527259 -1.415 0.157086
## Car.OwningYes 0.036269 1.448918 0.025 0.980030
## Number.of.Motors1 0.618812 0.808973 0.765 0.444310
## Number.of.Motors2 0.222040 0.802489 0.277 0.782019
## Number.of.Motors3 -0.523223 0.881065 -0.594 0.552610
## `Number.of.Motors>3` -0.510007 1.380742 -0.369 0.711851
## Number.of.Car1 -1.162398 0.858510 -1.354 0.175745
## `Number.of.Car>=2` -1.072850 2.226960 -0.482 0.629980
## Distance 0.688046 0.098829 6.962 3.36e-12
## Travel.Period 0.009410 0.015030 0.626 0.531270
## Cost -0.618352 0.094949 -6.512 7.39e-11
##
## (Intercept)
## Central.AreaYes **
## `PurposePicking Children`
## PurposeEntertainment ***
## PurposeOthers
## `Frequency2 times` *
## `Frequency3 times`
## `Frequency> 3 times`
## `Temporary.Stop.Number1 stop` ***
## `Temporary.Stop.Number>=2 stops`
## WeatherRainny .
## WeatherCool
## Bus.Stop.PresentYes
## `Bus.Stop.PresentDon't know`
## `Age25-60`
## `Age>60` *
## `OccupationOfficers/Workers`
## `OccupationHousewife/Unemployed` *
## `OccupationFree labor`
## OccupationOthers
## `Income(8-15) millions` *
## `Income(15-25) millions`
## `Income>25 millions` *
## `Number.of.Children1 child`
## `Number.of.Children2 children`
## `Number.of.Children>= 3 children`
## Motor.CertificateYes *
## Car.CertificateYes
## Motor.OwningYes
## Car.OwningYes
## Number.of.Motors1
## Number.of.Motors2
## Number.of.Motors3
## `Number.of.Motors>3`
## Number.of.Car1
## `Number.of.Car>=2`
## Distance ***
## Travel.Period
## Cost ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 254.29 on 770 degrees of freedom
## AIC: 332.29
##
## Number of Fisher Scoring iterations: 9
pred1mg <- predict(mgtrain1, newdata = datBus, type = "prob")
rmg <- roc(datBus$Bus, pred1mg$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg$auc
## Area under the curve: 0.942
plot(rmg, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)
plot(smooth(rmg), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "greenyellow", identity = F)
## Có thể dùng các lệnh đơn giản
auc(rmg)
## Area under the curve: 0.942
plot.roc(rmg)
ci(rmg)
## 95% CI: 0.9137-0.9702 (DeLong)
## Mô hình mg2
mg2train1 <- train(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, data = datBus, method = "glm", family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mg2train1)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4647 -0.4365 -0.2316 -0.1143 3.1417
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.77606 0.54706 1.419 0.156015
## Central.AreaYes 0.80313 0.27165 2.957 0.003111 **
## `PurposePicking Children` -1.02909 0.56419 -1.824 0.068150 .
## PurposeEntertainment -2.36946 0.44919 -5.275 1.33e-07 ***
## PurposeOthers -0.97031 0.73516 -1.320 0.186878
## `Frequency2 times` 0.46866 0.55317 0.847 0.396874
## `Frequency3 times` 0.20536 0.54499 0.377 0.706315
## `Frequency> 3 times` -1.80115 0.46243 -3.895 9.82e-05 ***
## `Temporary.Stop.Number1 stop` -1.74861 0.39604 -4.415 1.01e-05 ***
## `Temporary.Stop.Number>=2 stops` 0.13088 0.34701 0.377 0.706052
## `Age25-60` -0.45058 0.30490 -1.478 0.139470
## `Age>60` 2.66331 0.55312 4.815 1.47e-06 ***
## Motor.CertificateYes -1.59017 0.32545 -4.886 1.03e-06 ***
## Travel.Period 0.04184 0.00925 4.524 6.08e-06 ***
## Cost -0.09852 0.02766 -3.562 0.000368 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 411.48 on 794 degrees of freedom
## AIC: 441.48
##
## Number of Fisher Scoring iterations: 7
pred1mg2 <- predict(mg2train1, newdata = datBus, type = "prob")
rmg2 <- roc(datBus$Bus, pred1mg2$Yes)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
rmg2$auc
## Area under the curve: 0.882
auc(rmg2)
## Area under the curve: 0.882
plot.roc(rmg2)
ci(rmg2)
## 95% CI: 0.8444-0.9196 (DeLong)
plot(rmg2, print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)
plot(smooth(rmg2), print.thres = "best", col = "red", print.auc = TRUE, auc.polygon = TRUE, auc.polygon.col = "yellow", identity = F)
## Tính hệ số Gini = 2AUC-1
Ginimg <- 2*(rmg$auc)-1
Ginimg
## [1] 0.8839207
Ginimg2 <- 2*(rmg2$auc)-1
Ginimg2
## [1] 0.7640302
5. Importance of variables in models
library(caret)
varImp(mg2)
## Overall
## Central.AreaYes 2.9565217
## PurposePicking Children 1.8240135
## PurposeEntertainment 5.2749505
## PurposeOthers 1.3198716
## Frequency2 times 0.8472177
## Frequency3 times 0.3768097
## Frequency> 3 times 3.8949517
## Temporary.Stop.Number1 stop 4.4152481
## Temporary.Stop.Number>=2 stops 0.3771633
## Age25-60 1.4777667
## Age>60 4.8150353
## Motor.CertificateYes 4.8860187
## Travel.Period 4.5237408
## Cost 3.5621070
plot(varImp(mg2))
varImp(mg)
## Overall
## Central.AreaYes 3.12139274
## PurposePicking Children 1.09171417
## PurposeEntertainment 3.46782157
## PurposeOthers 0.25747503
## Frequency2 times 2.05257212
## Frequency3 times 0.67423751
## Frequency> 3 times 1.61500743
## Temporary.Stop.Number1 stop 3.79696941
## Temporary.Stop.Number>=2 stops 0.01620000
## WeatherRainny 1.83477545
## WeatherCool 1.15945771
## Bus.Stop.PresentYes 1.14340863
## Bus.Stop.PresentDon't know 1.25291614
## Age25-60 0.98786437
## Age>60 2.30729503
## OccupationOfficers/Workers 0.27636808
## OccupationHousewife/Unemployed 2.12691327
## OccupationFree labor 0.52606942
## OccupationOthers 1.54313029
## Income(8-15) millions 2.15594049
## Income(15-25) millions 0.55899226
## Income>25 millions 2.00178931
## Number.of.Children1 child 1.55742325
## Number.of.Children2 children 0.49720424
## Number.of.Children>= 3 children 0.63036243
## Motor.CertificateYes 2.46944311
## Car.CertificateYes 0.04080213
## Motor.OwningYes 1.41494082
## Car.OwningYes 0.02503163
## Number.of.Motors1 0.76493566
## Number.of.Motors2 0.27668869
## Number.of.Motors3 0.59385335
## Number.of.Motors>3 0.36937161
## Number.of.Car1 1.35397226
## Number.of.Car>=2 0.48175552
## Distance 6.96197233
## Travel.Period 0.62606908
## Cost 6.51245585
6. Probility of bus use - Prohibit model
# Prohibit Models
library(aod)
## Warning: package 'aod' was built under R version 3.6.3
##
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
##
## rats
## Model mg
prohibitmg <- glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Weather + Bus.Stop.Present + Age + Occupation + Income + Number.of.Children + Motor.Certificate + Car.Certificate + Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car + Distance + Travel.Period + Cost, family = binomial(link = "probit"), data = datBus)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(prohibitmg)
##
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Weather + Bus.Stop.Present + Age + Occupation + Income +
## Number.of.Children + Motor.Certificate + Car.Certificate +
## Motor.Owning + Car.Owning + Number.of.Motors + Number.of.Car +
## Distance + Travel.Period + Cost, family = binomial(link = "probit"),
## data = datBus)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6426 -0.3016 -0.1042 -0.0043 3.5865
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.402449 0.584282 0.689 0.490954
## Central.AreaYes 0.584441 0.183328 3.188 0.001433 **
## PurposePicking Children -0.516126 0.389421 -1.325 0.185050
## PurposeEntertainment -1.098028 0.309321 -3.550 0.000386 ***
## PurposeOthers -0.130938 0.478141 -0.274 0.784202
## Frequency2 times 0.847701 0.425218 1.994 0.046200 *
## Frequency3 times 0.308207 0.424978 0.725 0.468311
## Frequency> 3 times -0.549188 0.350443 -1.567 0.117085
## Temporary.Stop.Number1 stop -1.010253 0.257255 -3.927 8.60e-05 ***
## Temporary.Stop.Number>=2 stops -0.003833 0.249232 -0.015 0.987730
## WeatherRainny 0.988161 0.553723 1.785 0.074330 .
## WeatherCool -0.268920 0.235831 -1.140 0.254158
## Bus.Stop.PresentYes -0.371152 0.284494 -1.305 0.192027
## Bus.Stop.PresentDon't know -0.630963 0.421369 -1.497 0.134286
## Age25-60 -0.236704 0.343305 -0.689 0.490518
## Age>60 1.292995 0.493032 2.623 0.008728 **
## OccupationOfficers/Workers 0.014321 0.364257 0.039 0.968638
## OccupationHousewife/Unemployed 0.956887 0.501140 1.909 0.056208 .
## OccupationFree labor -0.269399 0.348788 -0.772 0.439886
## OccupationOthers 0.629580 0.451720 1.394 0.163396
## Income(8-15) millions -0.523111 0.217347 -2.407 0.016093 *
## Income(15-25) millions -0.223731 0.256079 -0.874 0.382292
## Income>25 millions -1.286046 0.579842 -2.218 0.026560 *
## Number.of.Children1 child -0.341940 0.214248 -1.596 0.110489
## Number.of.Children2 children -0.187685 0.312806 -0.600 0.548505
## Number.of.Children>= 3 children -0.287871 0.691383 -0.416 0.677140
## Motor.CertificateYes -0.597675 0.277590 -2.153 0.031312 *
## Car.CertificateYes -0.053636 0.470492 -0.114 0.909238
## Motor.OwningYes -0.504480 0.260076 -1.940 0.052411 .
## Car.OwningYes 0.167182 0.702423 0.238 0.811876
## Number.of.Motors1 0.466191 0.426873 1.092 0.274787
## Number.of.Motors2 0.164327 0.415433 0.396 0.692433
## Number.of.Motors3 -0.203764 0.451891 -0.451 0.652052
## Number.of.Motors>3 -0.049101 0.630898 -0.078 0.937966
## Number.of.Car1 -0.651396 0.422560 -1.542 0.123184
## Number.of.Car>=2 -0.548670 1.071047 -0.512 0.608459
## Distance 0.349429 0.049228 7.098 1.26e-12 ***
## Travel.Period 0.001293 0.008218 0.157 0.874980
## Cost -0.314582 0.047784 -6.583 4.60e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 262.71 on 770 degrees of freedom
## AIC: 340.71
##
## Number of Fisher Scoring iterations: 10
## Model mg2
prohibitmg2 <- glm(Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number + Age + Motor.Certificate + Travel.Period + Cost, family = binomial(link = "probit"), data = datBus)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(prohibitmg2)
##
## Call:
## glm(formula = Bus ~ Central.Area + Purpose + Frequency + Temporary.Stop.Number +
## Age + Motor.Certificate + Travel.Period + Cost, family = binomial(link = "probit"),
## data = datBus)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3347 -0.4543 -0.2357 -0.0880 3.2910
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.321332 0.307844 1.044 0.296571
## Central.AreaYes 0.449759 0.142742 3.151 0.001628 **
## PurposePicking Children -0.441866 0.283879 -1.557 0.119582
## PurposeEntertainment -1.176172 0.231109 -5.089 3.59e-07 ***
## PurposeOthers -0.428349 0.399398 -1.072 0.283501
## Frequency2 times 0.250444 0.310939 0.805 0.420562
## Frequency3 times 0.133451 0.305593 0.437 0.662334
## Frequency> 3 times -0.924517 0.259446 -3.563 0.000366 ***
## Temporary.Stop.Number1 stop -0.858111 0.192319 -4.462 8.12e-06 ***
## Temporary.Stop.Number>=2 stops 0.072113 0.187312 0.385 0.700247
## Age25-60 -0.262606 0.158451 -1.657 0.097452 .
## Age>60 1.410811 0.291861 4.834 1.34e-06 ***
## Motor.CertificateYes -0.855405 0.180384 -4.742 2.11e-06 ***
## Travel.Period 0.021625 0.004887 4.425 9.66e-06 ***
## Cost -0.053923 0.014555 -3.705 0.000212 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 628.32 on 808 degrees of freedom
## Residual deviance: 414.71 on 794 degrees of freedom
## AIC: 444.71
##
## Number of Fisher Scoring iterations: 8
# Đánh giá tác động tổng thể Purpose (Total effect of Travel Purpose) trong mô hinhg mg2 có ảnh hưởng hay không đến khả năng chọn xe buýt (thứ tự hệ số ứng với Purpose là từ 3:6)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 3:6) # Giá trị 19.8 ứng với p-value < 0.05 --> KL: tác động tổng thể về Purpose (mục đich chuyến đi) của người sử dụng lên khả năng chọn xe buýt là tồn tại và có ý nghĩa thống kê
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 26.2, df = 4, P(> X2) = 2.9e-05
# Đánh giá tác động tổng thể Temporary.Stop.Number (Total effect of Temporary.Stop.Number) trong mô hình mg2 có ảnh hưởng hay không đến khả năng chọn xe buýt (thứ tự hệ số ứng với Temporary.Stop.Number là từ 11:13)
wald.test(b = coef(prohibitmg2), Sigma = vcov(prohibitmg2), Terms = 11:13)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 63.4, df = 3, P(> X2) = 1.1e-13
6. Reason why choose or doesn’t choose bus
t = "F:\\NGHIEN CUU SINH\\NCS - PHUONG ANH\\Part 1-Mode choice\\SO LIEU R\\Bus choice in DN.csv"
MC = read.csv(t, header = T)
head(MC)
## Travel.Mode Bus.Stop.Condition Central.Area Purpose Frequency
## 1 6 1 0 3 3
## 2 3 1 0 1 4
## 3 3 1 0 1 4
## 4 3 1 0 1 4
## 5 3 1 0 1 4
## 6 4 1 0 1 4
## Departure.Time Distance Travel.Period Sidewalk.Clearance Lane.Separate
## 1 1 2 10 1 1
## 2 1 8 15 1 1
## 3 3 5 15 1 1
## 4 1 5 10 1 1
## 5 3 8 15 1 1
## 6 1 20 30 1 1
## Temporary.Stop.Number Mode.Choice.Reason Weather Weekend Non.Bus.Reason
## 1 0 4 1 0 1
## 2 1 2 3 0 1
## 3 1 4 1 0 2
## 4 1 2 1 0 4
## 5 0 2 3 0 2
## 6 0 5 3 0 2
## Cost Bus.Stop.Present Gender Age Occupation Income Number.of.Children
## 1 12 1 0 5 4 2 2
## 2 8 2 0 3 6 3 1
## 3 5 1 1 3 2 4 0
## 4 5 1 1 3 2 3 0
## 5 8 2 0 4 6 3 1
## 6 40 2 1 4 3 4 2
## Motor.Certificate Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## 1 0 0 0 0 0
## 2 1 0 1 1 0
## 3 1 0 0 1 0
## 4 1 0 1 1 0
## 5 1 0 0 1 0
## 6 1 1 1 1 1
## Number.of.Bicycles Number.of.Motors Number.of.Car
## 1 1 2 1
## 2 1 3 0
## 3 0 3 0
## 4 1 3 0
## 5 0 2 0
## 6 1 2 1
# View data (number of rows and colume)
dim(MC)
## [1] 809 30
# Sumerize Data
summary(MC)
## Travel.Mode Bus.Stop.Condition Central.Area Purpose
## Min. :1.000 Min. :0.0000 Min. :0.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.000
## Median :3.000 Median :1.0000 Median :0.000 Median :1.000
## Mean :3.663 Mean :0.5117 Mean :0.471 Mean :1.576
## 3rd Qu.:4.000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:2.000
## Max. :7.000 Max. :1.0000 Max. :1.000 Max. :4.000
##
## Frequency Departure.Time Distance Travel.Period
## Min. :1.000 Min. :1.000 Min. : 0.015 Min. : 0.00
## 1st Qu.:4.000 1st Qu.:1.000 1st Qu.: 3.000 1st Qu.: 10.00
## Median :4.000 Median :1.000 Median : 5.000 Median : 15.00
## Mean :3.555 Mean :1.832 Mean : 6.654 Mean : 17.55
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:10.000 3rd Qu.: 20.00
## Max. :4.000 Max. :4.000 Max. :35.000 Max. :180.00
##
## Sidewalk.Clearance Lane.Separate Temporary.Stop.Number
## Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :1.000 Median :1.0000 Median :0.0000
## Mean :0.864 Mean :0.7886 Mean :0.6922
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.000 Max. :1.0000 Max. :3.0000
##
## Mode.Choice.Reason Weather Weekend Non.Bus.Reason
## Min. :1.000 Min. :1.00 Min. :0.0000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:0.0000 1st Qu.:2.000
## Median :2.000 Median :1.00 Median :0.0000 Median :2.000
## Mean :2.714 Mean :1.64 Mean :0.2435 Mean :2.882
## 3rd Qu.:4.000 3rd Qu.:3.00 3rd Qu.:0.0000 3rd Qu.:4.000
## Max. :6.000 Max. :3.00 Max. :1.0000 Max. :6.000
## NA's :106
## Cost Bus.Stop.Present Gender Age
## Min. : 0.000 Min. :0.000 Min. :0.000 Min. :1.000
## 1st Qu.: 3.000 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:3.000
## Median : 5.000 Median :1.000 Median :1.000 Median :4.000
## Mean : 8.754 Mean :1.005 Mean :0.581 Mean :3.677
## 3rd Qu.: 10.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:4.000
## Max. :285.000 Max. :2.000 Max. :1.000 Max. :6.000
##
## Occupation Income Number.of.Children Motor.Certificate
## Min. :1.000 Min. :1.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :3.000 Median :2.000 Median :1.0000 Median :1.0000
## Mean :4.298 Mean :2.206 Mean :0.7651 Mean :0.8838
## 3rd Qu.:7.000 3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :8.000 Max. :4.000 Max. :3.0000 Max. :1.0000
##
## Car.Certificate Bicycle.Owning Motor.Owning Car.Owning
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.1916 Mean :0.3956 Mean :0.8183 Mean :0.1471
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Number.of.Bicycles Number.of.Motors Number.of.Car
## Min. :0.0000 Min. :0.00 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:2.00 1st Qu.:0.0000
## Median :1.0000 Median :2.00 Median :0.0000
## Mean :0.6873 Mean :2.13 Mean :0.2447
## 3rd Qu.:1.0000 3rd Qu.:3.00 3rd Qu.:0.0000
## Max. :3.0000 Max. :4.00 Max. :2.0000
##
# Create new variable - Bus
MC$Bus[MC$Travel.Mode < 7] <- 0
MC$Bus[MC$Travel.Mode == 7] <- 1
# Vẽ biểu đồ reason why choose bus or doesn't choose bus
library(magrittr)
library(tidyverse)
library(ggplot2)
library(car)
# Lấy 1 phần dữ liệu gồm Bus, Mode.Choice.Reason, Non.Bus.Reason
str(MC)
## 'data.frame': 809 obs. of 31 variables:
## $ Travel.Mode : int 6 3 3 3 3 4 4 3 3 3 ...
## $ Bus.Stop.Condition : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Central.Area : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Purpose : int 3 1 1 1 1 1 1 1 1 1 ...
## $ Frequency : int 3 4 4 4 4 4 4 4 4 4 ...
## $ Departure.Time : int 1 1 3 1 3 1 1 1 2 1 ...
## $ Distance : num 2 8 5 5 8 20 15 10 12 10 ...
## $ Travel.Period : num 10 15 15 10 15 30 20 25 30 25 ...
## $ Sidewalk.Clearance : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Lane.Separate : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Temporary.Stop.Number: int 0 1 1 1 0 0 0 0 1 1 ...
## $ Mode.Choice.Reason : int 4 2 4 2 2 5 5 2 2 2 ...
## $ Weather : int 1 3 1 1 3 3 3 3 1 3 ...
## $ Weekend : int 0 0 0 0 0 0 0 1 0 0 ...
## $ Non.Bus.Reason : int 1 1 2 4 2 2 4 2 1 4 ...
## $ Cost : int 12 8 5 5 8 40 30 10 12 10 ...
## $ Bus.Stop.Present : int 1 2 1 1 2 2 2 1 1 1 ...
## $ Gender : int 0 0 1 1 0 1 1 0 0 1 ...
## $ Age : int 5 3 3 3 4 4 5 4 3 4 ...
## $ Occupation : int 4 6 2 2 6 3 3 7 7 3 ...
## $ Income : int 2 3 4 3 3 4 4 2 3 3 ...
## $ Number.of.Children : int 2 1 0 0 1 2 2 0 1 0 ...
## $ Motor.Certificate : int 0 1 1 1 1 1 1 1 1 1 ...
## $ Car.Certificate : int 0 0 0 0 0 1 1 0 0 0 ...
## $ Bicycle.Owning : int 0 1 0 1 0 1 0 0 1 0 ...
## $ Motor.Owning : int 0 1 1 1 1 1 1 1 1 1 ...
## $ Car.Owning : int 0 0 0 0 0 1 1 0 0 0 ...
## $ Number.of.Bicycles : int 1 1 0 1 0 1 0 0 1 0 ...
## $ Number.of.Motors : int 2 3 3 3 2 2 2 2 3 3 ...
## $ Number.of.Car : int 1 0 0 0 0 1 1 0 0 0 ...
## $ Bus : num 0 0 0 0 0 0 0 0 0 0 ...
dat1 <- MC[,c(12,15,31)]
head(dat1)
## Mode.Choice.Reason Non.Bus.Reason Bus
## 1 4 1 0
## 2 2 1 0
## 3 4 2 0
## 4 2 4 0
## 5 2 2 0
## 6 5 2 0
# Dữ liệu sử dụng xe buýt
datBusonly <- subset(dat1, Bus == 1)
datBusonly$Bus.Choice.Reason <- datBusonly$Mode.Choice.Reason
head(datBusonly)
## Mode.Choice.Reason Non.Bus.Reason Bus Bus.Choice.Reason
## 57 1 NA 1 1
## 123 1 NA 1 1
## 234 2 NA 1 2
## 267 2 NA 1 2
## 327 1 NA 1 1
## 328 2 NA 1 2
str(datBusonly)
## 'data.frame': 106 obs. of 4 variables:
## $ Mode.Choice.Reason: int 1 1 2 2 1 2 1 1 3 3 ...
## $ Non.Bus.Reason : int NA NA NA NA NA NA NA NA NA NA ...
## $ Bus : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Bus.Choice.Reason : int 1 1 2 2 1 2 1 1 3 3 ...
# reformat variables in factor variables
attach(datBusonly)
## The following objects are masked from MC:
##
## Bus, Mode.Choice.Reason, Non.Bus.Reason
datBusonly = within(datBusonly, {
Bus.Choice.Reason = factor(Bus.Choice.Reason, labels = c("Safety", "Comfortable", "Low price", "Accessibility", "others"))
} )
head(datBusonly)
## Mode.Choice.Reason Non.Bus.Reason Bus Bus.Choice.Reason
## 57 1 NA 1 Safety
## 123 1 NA 1 Safety
## 234 2 NA 1 Comfortable
## 267 2 NA 1 Comfortable
## 327 1 NA 1 Safety
## 328 2 NA 1 Comfortable
str(datBusonly)
## 'data.frame': 106 obs. of 4 variables:
## $ Mode.Choice.Reason: int 1 1 2 2 1 2 1 1 3 3 ...
## $ Non.Bus.Reason : int NA NA NA NA NA NA NA NA NA NA ...
## $ Bus : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Bus.Choice.Reason : Factor w/ 5 levels "Safety","Comfortable",..: 1 1 2 2 1 2 1 1 3 3 ...
p.bus = ggplot(datBusonly, aes(x = Bus.Choice.Reason, fill = Bus.Choice.Reason))
p.bus = p.bus + geom_bar(width = 0.8, col = "blue")
p.bus = p.bus + theme(legend.position = "none")
p.bus = p.bus + coord_flip()
p.bus
# Dữ liệu không sử dụng xe buýt
datNonBusonly <- subset(dat1, Bus == 0)
head(datNonBusonly)
## Mode.Choice.Reason Non.Bus.Reason Bus
## 1 4 1 0
## 2 2 1 0
## 3 4 2 0
## 4 2 4 0
## 5 2 2 0
## 6 5 2 0
str(datNonBusonly)
## 'data.frame': 703 obs. of 3 variables:
## $ Mode.Choice.Reason: int 4 2 4 2 2 5 5 2 2 2 ...
## $ Non.Bus.Reason : int 1 1 2 4 2 2 4 2 1 4 ...
## $ Bus : num 0 0 0 0 0 0 0 0 0 0 ...
attach(datBusonly)
## The following objects are masked from datBusonly (pos = 3):
##
## Bus, Bus.Choice.Reason, Mode.Choice.Reason, Non.Bus.Reason
##
## The following objects are masked from MC:
##
## Bus, Mode.Choice.Reason, Non.Bus.Reason
datNonBusonly = within(datNonBusonly, {
Non.Bus.Reason = factor(Non.Bus.Reason, labels = c("No Route", "Inconvenient", "Unsafety", "Long waiting time", "Unreliability", "others"))
} )
str(datNonBusonly)
## 'data.frame': 703 obs. of 3 variables:
## $ Mode.Choice.Reason: int 4 2 4 2 2 5 5 2 2 2 ...
## $ Non.Bus.Reason : Factor w/ 6 levels "No Route","Inconvenient",..: 1 1 2 4 2 2 4 2 1 4 ...
## $ Bus : num 0 0 0 0 0 0 0 0 0 0 ...
p.nonbus = ggplot(datNonBusonly, aes(x = Non.Bus.Reason, fill = Non.Bus.Reason))
p.nonbus = p.nonbus + geom_bar(width = 0.8, col = "blue")
p.nonbus = p.nonbus + theme(legend.position = "none")
p.nonbus = p.nonbus + coord_flip()
p.nonbus