1. Read Data

# Read Data directly
t = "F:/NGHIEN CUU SINH/NCS - PHUONG ANH/Part 2-Satisfaction and Loyalty/So lieu/So lieu - PA xu ly/873_Loyaltyofbuspassenger_PAcode_NonBus_Mising data_outliers_forMLM_Analyse.csv"
DataLOY = read.csv(t, header = T)
head(DataLOY)
##   ID AGE CITY FRE TripPurpose Departure TimeUseonBus TravelTime PSSW PSSS PSAB
## 1  3   1    2   1           5         0            4       3.00  4.9  5.4  5.5
## 2  4   1    2   2           7         0            4       2.00  3.4  2.4  3.5
## 3  5   1    2   1           5         1            4       0.17  3.9  3.4  4.5
## 4  6   1    2   1           5         1            1       4.00  4.1  4.4  5.5
## 5  7   1    2   1           5         1            4       2.00  3.7  2.6  3.5
## 6  8   1    2   1           5         1            6       2.00  4.6  2.7  3.8
##   PSEB PSQ SAT LOY IMA PHB PEV ATM PPI SIM PPA SBE EXB EC_Stop WC_Stop EC_Bus
## 1  5.8 4.8 6.0 5.7 5.6 6.6 4.0 4.3 6.0 3.5 4.5 4.0 4.8       2       1      2
## 2  6.0 4.6 4.7 4.3 4.8 4.0 5.3 4.1 4.0 4.0 4.0 4.8 4.2       2       2      1
## 3  4.5 2.7 2.0 3.7 3.0 5.0 5.8 3.1 2.7 3.5 4.5 6.0 4.0       2       2      2
## 4  6.0 4.8 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0       2       1      2
## 5  4.5 4.5 4.0 4.6 4.6 5.6 6.0 3.3 3.0 5.5 5.0 5.0 4.2       2       1      2
## 6  4.0 3.9 4.7 4.4 3.0 5.4 5.3 2.7 3.0 2.0 2.5 2.0 5.0       2       1      2
##   WC_Bus Gender MarriedStatus Occupation Education Income
## 1      1      2             1          1         2      1
## 2      2      2             1          1         2      1
## 3      2      1             1          1         2      1
## 4      1      1             1          1         2      1
## 5      1      1             1          1         2      1
## 6      1      2             1          1         3      1
names(DataLOY)
##  [1] "ID"            "AGE"           "CITY"          "FRE"          
##  [5] "TripPurpose"   "Departure"     "TimeUseonBus"  "TravelTime"   
##  [9] "PSSW"          "PSSS"          "PSAB"          "PSEB"         
## [13] "PSQ"           "SAT"           "LOY"           "IMA"          
## [17] "PHB"           "PEV"           "ATM"           "PPI"          
## [21] "SIM"           "PPA"           "SBE"           "EXB"          
## [25] "EC_Stop"       "WC_Stop"       "EC_Bus"        "WC_Bus"       
## [29] "Gender"        "MarriedStatus" "Occupation"    "Education"    
## [33] "Income"
dim(DataLOY)
## [1] 873  33

2. Desscriptive statistic

# 2.2. Change type of LOY, SAT and other factor variables
head(DataLOY)
##   ID AGE CITY FRE TripPurpose Departure TimeUseonBus TravelTime PSSW PSSS PSAB
## 1  3   1    2   1           5         0            4       3.00  4.9  5.4  5.5
## 2  4   1    2   2           7         0            4       2.00  3.4  2.4  3.5
## 3  5   1    2   1           5         1            4       0.17  3.9  3.4  4.5
## 4  6   1    2   1           5         1            1       4.00  4.1  4.4  5.5
## 5  7   1    2   1           5         1            4       2.00  3.7  2.6  3.5
## 6  8   1    2   1           5         1            6       2.00  4.6  2.7  3.8
##   PSEB PSQ SAT LOY IMA PHB PEV ATM PPI SIM PPA SBE EXB EC_Stop WC_Stop EC_Bus
## 1  5.8 4.8 6.0 5.7 5.6 6.6 4.0 4.3 6.0 3.5 4.5 4.0 4.8       2       1      2
## 2  6.0 4.6 4.7 4.3 4.8 4.0 5.3 4.1 4.0 4.0 4.0 4.8 4.2       2       2      1
## 3  4.5 2.7 2.0 3.7 3.0 5.0 5.8 3.1 2.7 3.5 4.5 6.0 4.0       2       2      2
## 4  6.0 4.8 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0       2       1      2
## 5  4.5 4.5 4.0 4.6 4.6 5.6 6.0 3.3 3.0 5.5 5.0 5.0 4.2       2       1      2
## 6  4.0 3.9 4.7 4.4 3.0 5.4 5.3 2.7 3.0 2.0 2.5 2.0 5.0       2       1      2
##   WC_Bus Gender MarriedStatus Occupation Education Income
## 1      1      2             1          1         2      1
## 2      2      2             1          1         2      1
## 3      2      1             1          1         2      1
## 4      1      1             1          1         2      1
## 5      1      1             1          1         2      1
## 6      1      2             1          1         3      1
str(DataLOY)
## 'data.frame':    873 obs. of  33 variables:
##  $ ID           : int  3 4 5 6 7 8 10 11 12 13 ...
##  $ AGE          : int  1 1 1 1 1 1 4 1 1 2 ...
##  $ CITY         : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ FRE          : int  1 2 1 1 1 1 1 1 1 1 ...
##  $ TripPurpose  : int  5 7 5 5 5 5 4 5 5 5 ...
##  $ Departure    : int  0 0 1 1 1 1 1 1 0 1 ...
##  $ TimeUseonBus : int  4 4 4 1 4 6 4 4 3 3 ...
##  $ TravelTime   : num  3 2 0.17 4 2 2 2 2.5 1.5 2 ...
##  $ PSSW         : num  4.9 3.4 3.9 4.1 3.7 4.6 3.9 3.4 5.3 4 ...
##  $ PSSS         : num  5.4 2.4 3.4 4.4 2.6 2.7 4 2.7 3.1 4.7 ...
##  $ PSAB         : num  5.5 3.5 4.5 5.5 3.5 3.8 6 3.5 4.8 5.5 ...
##  $ PSEB         : num  5.8 6 4.5 6 4.5 4 4.5 6.3 7 4.5 ...
##  $ PSQ          : num  4.8 4.6 2.7 4.8 4.5 3.9 4.2 4.1 6 4.5 ...
##  $ SAT          : num  6 4.7 2 5 4 4.7 6.7 5 7 6.3 ...
##  $ LOY          : num  5.7 4.3 3.7 5 4.6 4.4 6.6 5.3 5.9 5.6 ...
##  $ IMA          : num  5.6 4.8 3 5 4.6 3 5.8 5.8 6.8 6 ...
##  $ PHB          : num  6.6 4 5 5 5.6 5.4 5.8 5.6 7 6.4 ...
##  $ PEV          : num  4 5.3 5.8 5 6 5.3 6.3 6.5 7 5.3 ...
##  $ ATM          : num  4.3 4.1 3.1 5 3.3 2.7 4 4.1 4.4 3.9 ...
##  $ PPI          : num  6 4 2.7 5 3 3 6 2 3.3 5.3 ...
##  $ SIM          : num  3.5 4 3.5 5 5.5 2 6 5.3 4.3 6 ...
##  $ PPA          : num  4.5 4 4.5 5 5 2.5 4 4 4 4.3 ...
##  $ SBE          : num  4 4.8 6 5 5 2 5.3 4 5 3.8 ...
##  $ EXB          : num  4.8 4.2 4 5 4.2 5 5.3 5.3 5.3 5 ...
##  $ EC_Stop      : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ WC_Stop      : int  1 2 2 1 1 1 2 1 2 2 ...
##  $ EC_Bus       : int  2 1 2 2 2 2 2 2 2 2 ...
##  $ WC_Bus       : int  1 2 2 1 1 1 2 1 2 2 ...
##  $ Gender       : int  2 2 1 1 1 2 2 2 2 1 ...
##  $ MarriedStatus: int  1 1 1 1 1 1 2 1 1 1 ...
##  $ Occupation   : int  1 1 1 1 1 1 2 1 1 7 ...
##  $ Education    : int  2 2 2 2 2 3 1 2 3 5 ...
##  $ Income       : int  1 1 1 1 1 1 1 1 1 1 ...
attach(DataLOY)
DataLOY = within(DataLOY, {
  AGE = factor(AGE, labels = c("16-25", "26-35", "36-45", "46-55", ">55"))
  CITY = factor(CITY,labels = c("DaNang", "HoChiMinh"))
  FRE = factor(FRE, labels = c(">=3 days/week", "2days/month-2days/week", "2days/year-1day/month", "<2 days/year"))
  TripPurpose = factor(TripPurpose, labels = c("Working", "Studying", "Shopping", "Entertaining", "Others"))
  Departure = factor(Departure, labels = c("Normal", "Peak-Hour"))
  TimeUseonBus = factor(TimeUseonBus, labels = c("Using.telephone", "Reading", "Listening", "Nothing", "Talking", "Others"))
  EC_Stop = factor(EC_Stop, labels = c("Ever", "Never"))
  WC_Stop = factor(WC_Stop, labels = c("Ever", "Never"))
  EC_Bus = factor(EC_Bus, labels = c("Ever", "Never"))
  WC_Bus = factor(WC_Bus, labels = c("Ever", "Never"))
  Gender = factor(Gender, labels = c("Male", "Female"))
  MarriedStatus = factor(MarriedStatus, labels = c("Single", "Married"))
  Occupation = factor(Occupation, labels = c("Students/Pupils", "Full.time.job", "Part.time.job", "Retirement", "No.job", "Housewife", "Others"))
  Education = factor(Education, labels = c("Secondary.school", "Undergraduate", "High.school", "Postgraduate", "Others"))
  Income = factor(Income, labels = c("<5millions", "5-10millions", "10-15millions", ">15millions"))
    } )
str(DataLOY)
## 'data.frame':    873 obs. of  33 variables:
##  $ ID           : int  3 4 5 6 7 8 10 11 12 13 ...
##  $ AGE          : Factor w/ 5 levels "16-25","26-35",..: 1 1 1 1 1 1 3 1 1 2 ...
##  $ CITY         : Factor w/ 2 levels "DaNang","HoChiMinh": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FRE          : Factor w/ 4 levels ">=3 days/week",..: 1 2 1 1 1 1 1 1 1 1 ...
##  $ TripPurpose  : Factor w/ 5 levels "Working","Studying",..: 2 4 2 2 2 2 1 2 2 2 ...
##  $ Departure    : Factor w/ 2 levels "Normal","Peak-Hour": 1 1 2 2 2 2 2 2 1 2 ...
##  $ TimeUseonBus : Factor w/ 6 levels "Using.telephone",..: 4 4 4 1 4 6 4 4 3 3 ...
##  $ TravelTime   : num  3 2 0.17 4 2 2 2 2.5 1.5 2 ...
##  $ PSSW         : num  4.9 3.4 3.9 4.1 3.7 4.6 3.9 3.4 5.3 4 ...
##  $ PSSS         : num  5.4 2.4 3.4 4.4 2.6 2.7 4 2.7 3.1 4.7 ...
##  $ PSAB         : num  5.5 3.5 4.5 5.5 3.5 3.8 6 3.5 4.8 5.5 ...
##  $ PSEB         : num  5.8 6 4.5 6 4.5 4 4.5 6.3 7 4.5 ...
##  $ PSQ          : num  4.8 4.6 2.7 4.8 4.5 3.9 4.2 4.1 6 4.5 ...
##  $ SAT          : num  6 4.7 2 5 4 4.7 6.7 5 7 6.3 ...
##  $ LOY          : num  5.7 4.3 3.7 5 4.6 4.4 6.6 5.3 5.9 5.6 ...
##  $ IMA          : num  5.6 4.8 3 5 4.6 3 5.8 5.8 6.8 6 ...
##  $ PHB          : num  6.6 4 5 5 5.6 5.4 5.8 5.6 7 6.4 ...
##  $ PEV          : num  4 5.3 5.8 5 6 5.3 6.3 6.5 7 5.3 ...
##  $ ATM          : num  4.3 4.1 3.1 5 3.3 2.7 4 4.1 4.4 3.9 ...
##  $ PPI          : num  6 4 2.7 5 3 3 6 2 3.3 5.3 ...
##  $ SIM          : num  3.5 4 3.5 5 5.5 2 6 5.3 4.3 6 ...
##  $ PPA          : num  4.5 4 4.5 5 5 2.5 4 4 4 4.3 ...
##  $ SBE          : num  4 4.8 6 5 5 2 5.3 4 5 3.8 ...
##  $ EXB          : num  4.8 4.2 4 5 4.2 5 5.3 5.3 5.3 5 ...
##  $ EC_Stop      : Factor w/ 2 levels "Ever","Never": 2 2 2 2 2 2 2 2 2 2 ...
##  $ WC_Stop      : Factor w/ 2 levels "Ever","Never": 1 2 2 1 1 1 2 1 2 2 ...
##  $ EC_Bus       : Factor w/ 2 levels "Ever","Never": 2 1 2 2 2 2 2 2 2 2 ...
##  $ WC_Bus       : Factor w/ 2 levels "Ever","Never": 1 2 2 1 1 1 2 1 2 2 ...
##  $ Gender       : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 2 2 2 2 1 ...
##  $ MarriedStatus: Factor w/ 2 levels "Single","Married": 1 1 1 1 1 1 2 1 1 1 ...
##  $ Occupation   : Factor w/ 7 levels "Students/Pupils",..: 1 1 1 1 1 1 2 1 1 7 ...
##  $ Education    : Factor w/ 5 levels "Secondary.school",..: 2 2 2 2 2 3 1 2 3 5 ...
##  $ Income       : Factor w/ 4 levels "<5millions","5-10millions",..: 1 1 1 1 1 1 1 1 1 1 ...
dim(DataLOY)
## [1] 873  33
# 2.3. Descritive Table
library(tableone)
require(tableone)
library(magrittr)
summary(DataLOY)
##        ID           AGE             CITY                         FRE     
##  Min.   :  3.0   16-25:425   DaNang   :422   >=3 days/week         :508  
##  1st Qu.:273.0   26-35:172   HoChiMinh:451   2days/month-2days/week:168  
##  Median :526.0   36-45:105                   2days/year-1day/month : 99  
##  Mean   :521.7   46-55: 77                   <2 days/year          : 98  
##  3rd Qu.:769.0   >55  : 94                                               
##  Max.   :993.0                                                           
##                                                                          
##        TripPurpose      Departure            TimeUseonBus   TravelTime    
##  Working     :305   Normal   :296   Using.telephone:198   Min.   : 0.000  
##  Studying    :303   Peak-Hour:577   Reading        : 53   1st Qu.: 0.500  
##  Shopping    : 60                   Listening      :138   Median : 1.000  
##  Entertaining:100                   Nothing        :428   Mean   : 1.291  
##  Others      :105                   Talking        : 34   3rd Qu.: 2.000  
##                                     Others         : 22   Max.   :20.000  
##                                                                           
##       PSSW            PSSS            PSAB            PSEB      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.300   1st Qu.:3.700   1st Qu.:4.000   1st Qu.:5.500  
##  Median :4.900   Median :4.600   Median :5.500   Median :6.000  
##  Mean   :4.768   Mean   :4.478   Mean   :5.047   Mean   :5.786  
##  3rd Qu.:5.400   3rd Qu.:5.400   3rd Qu.:6.000   3rd Qu.:6.300  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##       PSQ             SAT             LOY             IMA       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.800   1st Qu.:5.000   1st Qu.:4.900   1st Qu.:4.800  
##  Median :5.400   Median :5.700   Median :5.700   Median :5.600  
##  Mean   :5.226   Mean   :5.464   Mean   :5.436   Mean   :5.338  
##  3rd Qu.:5.800   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##       PHB             PEV             ATM             PPI       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:5.200   1st Qu.:5.300   1st Qu.:4.300   1st Qu.:3.700  
##  Median :5.800   Median :6.000   Median :5.100   Median :4.300  
##  Mean   :5.611   Mean   :5.619   Mean   :4.979   Mean   :4.373  
##  3rd Qu.:6.200   3rd Qu.:6.300   3rd Qu.:5.900   3rd Qu.:5.300  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##       SIM             PPA             SBE            EXB        EC_Stop   
##  Min.   :1.000   Min.   :1.000   Min.   :1.80   Min.   :2.00   Ever : 36  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.30   1st Qu.:5.00   Never:837  
##  Median :5.000   Median :5.000   Median :5.30   Median :5.80              
##  Mean   :4.901   Mean   :4.957   Mean   :5.18   Mean   :5.59              
##  3rd Qu.:6.000   3rd Qu.:5.800   3rd Qu.:6.00   3rd Qu.:6.00              
##  Max.   :7.000   Max.   :7.000   Max.   :7.00   Max.   :7.00              
##                                                                           
##   WC_Stop      EC_Bus      WC_Bus       Gender    MarriedStatus
##  Ever :121   Ever : 42   Ever :116   Male  :364   Single :535  
##  Never:752   Never:831   Never:757   Female:509   Married:338  
##                                                                
##                                                                
##                                                                
##                                                                
##                                                                
##            Occupation             Education             Income   
##  Students/Pupils:368   Secondary.school: 57   <5millions   :465  
##  Full.time.job  :305   Undergraduate   :287   5-10millions :249  
##  Part.time.job  : 69   High.school     :368   10-15millions:122  
##  Retirement     : 46   Postgraduate    :106   >15millions  : 37  
##  No.job         :  3   Others          : 55                      
##  Housewife      : 54                                             
##  Others         : 28
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
Tab1_LOY <- table1(~ PSSW + PSSS + PSAB + PSEB + PSQ + SAT + LOY + IMA + PHB + PEV + ATM + PPI + SIM + PPA + SBE + EXB + EC_Stop + WC_Stop + EC_Bus + WC_Bus + Gender + MarriedStatus + Occupation + Education + Income + AGE + FRE + TripPurpose + Departure + TimeUseonBus + TravelTime| CITY , data = DataLOY)
Tab1_LOY
DaNang
(N=422)
HoChiMinh
(N=451)
Overall
(N=873)
PSSW
Mean (SD) 5.13 (0.814) 4.43 (1.05) 4.77 (1.01)
Median [Min, Max] 5.30 [1.00, 7.00] 4.60 [1.00, 7.00] 4.90 [1.00, 7.00]
PSSS
Mean (SD) 4.83 (1.01) 4.15 (1.22) 4.48 (1.17)
Median [Min, Max] 4.90 [1.00, 7.00] 4.30 [1.00, 7.00] 4.60 [1.00, 7.00]
PSAB
Mean (SD) 5.57 (1.03) 4.56 (1.27) 5.05 (1.26)
Median [Min, Max] 5.80 [1.50, 7.00] 4.80 [1.00, 7.00] 5.50 [1.00, 7.00]
PSEB
Mean (SD) 5.98 (0.885) 5.61 (0.969) 5.79 (0.948)
Median [Min, Max] 6.00 [1.00, 7.00] 6.00 [1.00, 7.00] 6.00 [1.00, 7.00]
PSQ
Mean (SD) 5.44 (0.713) 5.02 (0.953) 5.23 (0.871)
Median [Min, Max] 5.60 [1.50, 7.00] 5.10 [1.00, 7.00] 5.40 [1.00, 7.00]
SAT
Mean (SD) 5.63 (0.909) 5.31 (1.14) 5.46 (1.05)
Median [Min, Max] 6.00 [2.00, 7.00] 5.70 [1.00, 7.00] 5.70 [1.00, 7.00]
LOY
Mean (SD) 5.64 (0.835) 5.25 (1.07) 5.44 (0.984)
Median [Min, Max] 5.70 [2.00, 7.00] 5.60 [1.00, 7.00] 5.70 [1.00, 7.00]
IMA
Mean (SD) 5.55 (0.754) 5.14 (1.09) 5.34 (0.964)
Median [Min, Max] 5.60 [2.80, 7.00] 5.40 [1.00, 7.00] 5.60 [1.00, 7.00]
PHB
Mean (SD) 5.72 (0.789) 5.51 (1.16) 5.61 (1.00)
Median [Min, Max] 5.80 [2.60, 7.00] 6.00 [1.00, 7.00] 5.80 [1.00, 7.00]
PEV
Mean (SD) 5.77 (0.877) 5.48 (1.44) 5.62 (1.21)
Median [Min, Max] 6.00 [1.00, 7.00] 6.00 [1.00, 7.00] 6.00 [1.00, 7.00]
ATM
Mean (SD) 5.35 (0.944) 4.63 (1.24) 4.98 (1.16)
Median [Min, Max] 5.60 [1.90, 7.00] 4.70 [1.00, 7.00] 5.10 [1.00, 7.00]
PPI
Mean (SD) 4.76 (1.23) 4.01 (1.48) 4.37 (1.41)
Median [Min, Max] 5.00 [1.00, 7.00] 4.00 [1.00, 7.00] 4.30 [1.00, 7.00]
SIM
Mean (SD) 5.13 (1.09) 4.69 (1.15) 4.90 (1.14)
Median [Min, Max] 5.50 [1.00, 7.00] 4.50 [1.00, 7.00] 5.00 [1.00, 7.00]
PPA
Mean (SD) 5.23 (0.924) 4.70 (1.04) 4.96 (1.02)
Median [Min, Max] 5.50 [2.00, 7.00] 4.50 [1.00, 7.00] 5.00 [1.00, 7.00]
SBE
Mean (SD) 5.57 (0.846) 4.82 (1.09) 5.18 (1.05)
Median [Min, Max] 5.80 [1.80, 7.00] 5.00 [1.80, 7.00] 5.30 [1.80, 7.00]
EXB
Mean (SD) 5.72 (0.751) 5.47 (0.973) 5.59 (0.881)
Median [Min, Max] 5.80 [2.50, 7.00] 5.70 [2.00, 7.00] 5.80 [2.00, 7.00]
EC_Stop
Ever 4 (0.9%) 32 (7.1%) 36 (4.1%)
Never 418 (99.1%) 419 (92.9%) 837 (95.9%)
WC_Stop
Ever 22 (5.2%) 99 (22.0%) 121 (13.9%)
Never 400 (94.8%) 352 (78.0%) 752 (86.1%)
EC_Bus
Ever 3 (0.7%) 39 (8.6%) 42 (4.8%)
Never 419 (99.3%) 412 (91.4%) 831 (95.2%)
WC_Bus
Ever 17 (4.0%) 99 (22.0%) 116 (13.3%)
Never 405 (96.0%) 352 (78.0%) 757 (86.7%)
Gender
Male 188 (44.5%) 176 (39.0%) 364 (41.7%)
Female 234 (55.5%) 275 (61.0%) 509 (58.3%)
MarriedStatus
Single 270 (64.0%) 265 (58.8%) 535 (61.3%)
Married 152 (36.0%) 186 (41.2%) 338 (38.7%)
Occupation
Students/Pupils 207 (49.1%) 161 (35.7%) 368 (42.2%)
Full.time.job 116 (27.5%) 189 (41.9%) 305 (34.9%)
Part.time.job 28 (6.6%) 41 (9.1%) 69 (7.9%)
Retirement 33 (7.8%) 13 (2.9%) 46 (5.3%)
No.job 3 (0.7%) 0 (0%) 3 (0.3%)
Housewife 27 (6.4%) 27 (6.0%) 54 (6.2%)
Others 8 (1.9%) 20 (4.4%) 28 (3.2%)
Education
Secondary.school 30 (7.1%) 27 (6.0%) 57 (6.5%)
Undergraduate 134 (31.8%) 153 (33.9%) 287 (32.9%)
High.school 176 (41.7%) 192 (42.6%) 368 (42.2%)
Postgraduate 63 (14.9%) 43 (9.5%) 106 (12.1%)
Others 19 (4.5%) 36 (8.0%) 55 (6.3%)
Income
<5millions 276 (65.4%) 189 (41.9%) 465 (53.3%)
5-10millions 86 (20.4%) 163 (36.1%) 249 (28.5%)
10-15millions 51 (12.1%) 71 (15.7%) 122 (14.0%)
>15millions 9 (2.1%) 28 (6.2%) 37 (4.2%)
AGE
16-25 226 (53.6%) 199 (44.1%) 425 (48.7%)
26-35 76 (18.0%) 96 (21.3%) 172 (19.7%)
36-45 37 (8.8%) 68 (15.1%) 105 (12.0%)
46-55 36 (8.5%) 41 (9.1%) 77 (8.8%)
>55 47 (11.1%) 47 (10.4%) 94 (10.8%)
FRE
>=3 days/week 269 (63.7%) 239 (53.0%) 508 (58.2%)
2days/month-2days/week 85 (20.1%) 83 (18.4%) 168 (19.2%)
2days/year-1day/month 29 (6.9%) 70 (15.5%) 99 (11.3%)
<2 days/year 39 (9.2%) 59 (13.1%) 98 (11.2%)
TripPurpose
Working 117 (27.7%) 188 (41.7%) 305 (34.9%)
Studying 168 (39.8%) 135 (29.9%) 303 (34.7%)
Shopping 46 (10.9%) 14 (3.1%) 60 (6.9%)
Entertaining 53 (12.6%) 47 (10.4%) 100 (11.5%)
Others 38 (9.0%) 67 (14.9%) 105 (12.0%)
Departure
Normal 151 (35.8%) 145 (32.2%) 296 (33.9%)
Peak-Hour 271 (64.2%) 306 (67.8%) 577 (66.1%)
TimeUseonBus
Using.telephone 107 (25.4%) 91 (20.2%) 198 (22.7%)
Reading 22 (5.2%) 31 (6.9%) 53 (6.1%)
Listening 55 (13.0%) 83 (18.4%) 138 (15.8%)
Nothing 207 (49.1%) 221 (49.0%) 428 (49.0%)
Talking 25 (5.9%) 9 (2.0%) 34 (3.9%)
Others 6 (1.4%) 16 (3.5%) 22 (2.5%)
TravelTime
Mean (SD) 1.12 (0.826) 1.45 (1.86) 1.29 (1.46)
Median [Min, Max] 1.00 [0, 6.00] 1.00 [0, 20.0] 1.00 [0, 20.0]
library(compareGroups)
Des_LOY <- compareGroups(CITY ~ PSSW + PSSS + PSAB + PSEB + PSQ + SAT + LOY + IMA + PHB + PEV + ATM + PPI + SIM + PPA + SBE + EXB + EC_Stop + WC_Stop + EC_Bus + WC_Bus + Gender + MarriedStatus + Occupation + Education + Income + AGE + FRE + TripPurpose + Departure + TimeUseonBus + TravelTime, data = DataLOY)
createTable(Des_LOY)
## 
## --------Summary descriptives table by 'CITY'---------
## 
## ____________________________________________________________ 
##                              DaNang     HoChiMinh  p.overall 
##                               N=422       N=451              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## PSSW                       5.13 (0.81) 4.43 (1.05)  <0.001   
## PSSS                       4.83 (1.01) 4.15 (1.22)  <0.001   
## PSAB                       5.57 (1.03) 4.56 (1.27)  <0.001   
## PSEB                       5.98 (0.89) 5.61 (0.97)  <0.001   
## PSQ                        5.44 (0.71) 5.02 (0.95)  <0.001   
## SAT                        5.63 (0.91) 5.31 (1.14)  <0.001   
## LOY                        5.64 (0.83) 5.25 (1.07)  <0.001   
## IMA                        5.55 (0.75) 5.14 (1.09)  <0.001   
## PHB                        5.72 (0.79) 5.51 (1.16)   0.001   
## PEV                        5.77 (0.88) 5.48 (1.44)  <0.001   
## ATM                        5.35 (0.94) 4.63 (1.24)  <0.001   
## PPI                        4.76 (1.23) 4.01 (1.48)  <0.001   
## SIM                        5.13 (1.09) 4.69 (1.15)  <0.001   
## PPA                        5.23 (0.92) 4.70 (1.04)  <0.001   
## SBE                        5.57 (0.85) 4.82 (1.09)  <0.001   
## EXB                        5.72 (0.75) 5.47 (0.97)  <0.001   
## EC_Stop:                                            <0.001   
##     Ever                    4 (0.95%)  32 (7.10%)            
##     Never                  418 (99.1%) 419 (92.9%)           
## WC_Stop:                                            <0.001   
##     Ever                   22 (5.21%)  99 (22.0%)            
##     Never                  400 (94.8%) 352 (78.0%)           
## EC_Bus:                                             <0.001   
##     Ever                    3 (0.71%)  39 (8.65%)            
##     Never                  419 (99.3%) 412 (91.4%)           
## WC_Bus:                                             <0.001   
##     Ever                   17 (4.03%)  99 (22.0%)            
##     Never                  405 (96.0%) 352 (78.0%)           
## Gender:                                              0.113   
##     Male                   188 (44.5%) 176 (39.0%)           
##     Female                 234 (55.5%) 275 (61.0%)           
## MarriedStatus:                                       0.130   
##     Single                 270 (64.0%) 265 (58.8%)           
##     Married                152 (36.0%) 186 (41.2%)           
## Occupation:                                            .     
##     Students/Pupils        207 (49.1%) 161 (35.7%)           
##     Full.time.job          116 (27.5%) 189 (41.9%)           
##     Part.time.job          28 (6.64%)  41 (9.09%)            
##     Retirement             33 (7.82%)  13 (2.88%)            
##     No.job                  3 (0.71%)   0 (0.00%)            
##     Housewife              27 (6.40%)  27 (5.99%)            
##     Others                  8 (1.90%)  20 (4.43%)            
## Education:                                           0.037   
##     Secondary.school       30 (7.11%)  27 (5.99%)            
##     Undergraduate          134 (31.8%) 153 (33.9%)           
##     High.school            176 (41.7%) 192 (42.6%)           
##     Postgraduate           63 (14.9%)  43 (9.53%)            
##     Others                 19 (4.50%)  36 (7.98%)            
## Income:                                             <0.001   
##     <5millions             276 (65.4%) 189 (41.9%)           
##     5-10millions           86 (20.4%)  163 (36.1%)           
##     10-15millions          51 (12.1%)  71 (15.7%)            
##     >15millions             9 (2.13%)  28 (6.21%)            
## AGE:                                                 0.014   
##     16-25                  226 (53.6%) 199 (44.1%)           
##     26-35                  76 (18.0%)  96 (21.3%)            
##     36-45                  37 (8.77%)  68 (15.1%)            
##     46-55                  36 (8.53%)  41 (9.09%)            
##     >55                    47 (11.1%)  47 (10.4%)            
## FRE:                                                <0.001   
##     >=3 days/week          269 (63.7%) 239 (53.0%)           
##     2days/month-2days/week 85 (20.1%)  83 (18.4%)            
##     2days/year-1day/month  29 (6.87%)  70 (15.5%)            
##     <2 days/year           39 (9.24%)  59 (13.1%)            
## TripPurpose:                                        <0.001   
##     Working                117 (27.7%) 188 (41.7%)           
##     Studying               168 (39.8%) 135 (29.9%)           
##     Shopping               46 (10.9%)  14 (3.10%)            
##     Entertaining           53 (12.6%)  47 (10.4%)            
##     Others                 38 (9.00%)  67 (14.9%)            
## Departure:                                           0.289   
##     Normal                 151 (35.8%) 145 (32.2%)           
##     Peak-Hour              271 (64.2%) 306 (67.8%)           
## TimeUseonBus:                                        0.001   
##     Using.telephone        107 (25.4%) 91 (20.2%)            
##     Reading                22 (5.21%)  31 (6.87%)            
##     Listening              55 (13.0%)  83 (18.4%)            
##     Nothing                207 (49.1%) 221 (49.0%)           
##     Talking                25 (5.92%)   9 (2.00%)            
##     Others                  6 (1.42%)  16 (3.55%)            
## TravelTime                 1.12 (0.83) 1.45 (1.86)   0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

3. Describe Data by graph

library(magrittr)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract()   masks magrittr::extract()
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)
library(car)
## Warning: package 'car' was built under R version 4.0.4
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
hist(DataLOY$LOY)

# Correlations between LOY and continuous variables of users' perception
Cor1_1 = data.frame(DataLOY$LOY, DataLOY$PSSW, DataLOY$PSSS, DataLOY$PSAB, DataLOY$PSEB, DataLOY$PSQ, DataLOY$SAT, DataLOY$IMA)
pairs.panels(Cor1_1)

Cor1_2 = data.frame(DataLOY$LOY, DataLOY$PHB, DataLOY$PEV, DataLOY$ATM, DataLOY$PPI, DataLOY$SIM, DataLOY$PPA, DataLOY$SBE, DataLOY$EXB)
pairs.panels(Cor1_2)

# Correlation between LOY and variables of users experiences
Cor2 = data.frame(DataLOY$LOY, DataLOY$EC_Stop, DataLOY$WC_Stop, DataLOY$EC_Bus, DataLOY$WC_Bus)
pairs.panels(Cor2)

# Correlation between LOY and variables of travel characteristics
Cor3 = data.frame(DataLOY$LOY, DataLOY$CITY, DataLOY$FRE, DataLOY$TripPurpose, DataLOY$Departure, DataLOY$TimeUseonBus, DataLOY$TravelTime)
pairs.panels(Cor3)

# Correlation between LOY and variables of socioeconomics
Cor4 = data.frame(DataLOY$LOY, DataLOY$Gender, DataLOY$MarriedStatus, DataLOY$Occupation, DataLOY$Education, DataLOY$Income, DataLOY$AGE)
pairs.panels(Cor4)

# Boxplot of variables/CITY
# Chia cot theo cach khac : library(gridExtra) ; grid.arrange(p1, p2, ncol=2)
par(mfrow = c(1,2))
boxplot (DataLOY$PSSW ~ DataLOY$CITY, main = "Perceived Security & Safety on the way to/from bus stops", xlab = "PSSW", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$PSSW ~ DataLOY$Gender, main = "Perceived Security & Safety on the way to/from bus stops", xlab = "PSSW", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$PSSS ~ DataLOY$CITY, main = "Perceived Security & Safety at bus Stations", xlab = "PSSS", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$PSSS ~ DataLOY$Gender, main = "Perceived Security & Safety at bus Stations", xlab = "PSSS", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$PSAB ~ DataLOY$CITY, main = "Perceived Safety on Buses", xlab = "PSAB", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$PSAB ~ DataLOY$Gender, main = "Perceived Safety on Buses", xlab = "PSAB", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$PSEB ~ DataLOY$CITY, main = "Perceived Security on Buses", xlab = "PSEB", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
## Warning in bxp(list(stats = structure(c(4.8, 5.8, 6, 6.5, 7, 3.5, 5, 6, : some
## notches went outside hinges ('box'): maybe set notch=FALSE
boxplot (DataLOY$PSEB ~ DataLOY$Gender, main = "Perceived Security on Buses", xlab = "PSEB", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$PSQ ~ DataLOY$CITY, main = "Perceived Service Quality", xlab = "PSQ", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$PSQ ~ DataLOY$Gender, main = "Perceived Service Quality", xlab = "PSQ", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$IMA ~ DataLOY$CITY, main = "Perceived Image", xlab = "IMA", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$IMA ~ DataLOY$Gender, main = "Perceived Image", xlab = "IMA", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$PHB ~ DataLOY$CITY, main = "Perceived Health Benefits", xlab = "PHB", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
## Warning in bxp(list(stats = structure(c(4.4, 5.4, 5.8, 6.2, 7, 3.6, 5, 6, : some
## notches went outside hinges ('box'): maybe set notch=FALSE
boxplot (DataLOY$PHB ~ DataLOY$Gender, main = "Perceived Health Benefits", xlab = "PHB", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$PEV ~ DataLOY$CITY, main = "Perceived Environment Value/Benefits", xlab = "PEV", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$PEV ~ DataLOY$Gender, main = "Perceived Environment Value/Benefits", xlab = "PEV", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$ATM ~ DataLOY$CITY, main = "Perceived Atmospheric", xlab = "ATM", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$ATM ~ DataLOY$Gender, main = "Perceived Atmospheric", xlab = "ATM", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$PPI ~ DataLOY$CITY, main = "Passenger to Passenger Interaction", xlab = "PPI", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$PPI ~ DataLOY$Gender, main = "Passenger to Passenger Interaction", xlab = "PPI", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$SIM ~ DataLOY$CITY, main = "Similarity", xlab = "SIM", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$SIM ~ DataLOY$Gender, main = "Similarity", xlab = "SIM", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$PPA ~ DataLOY$CITY, main = "Perceived Physical Appearance", xlab = "PPA", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$PPA ~ DataLOY$Gender, main = "Perceived Physical Appearance", xlab = "PPA", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$SBE ~ DataLOY$CITY, main = "Perceived Suitable Behavior", xlab = "SBE", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$SBE ~ DataLOY$Gender, main = "Perceived Suitable Behavior", xlab = "SBE", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$EXB ~ DataLOY$CITY, main = "Experience on the Bus", xlab = "EXB", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$EXB ~ DataLOY$Gender, main = "Experience on the Bus", xlab = "EXB", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$LOY ~ DataLOY$CITY, main = "Perceived loyalty", xlab = "LOY", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
boxplot (DataLOY$LOY ~ DataLOY$Gender, main = "Perceived loyalty", xlab = "LOY", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

boxplot (DataLOY$SAT ~ DataLOY$CITY, main = "Perceived Satisfaction", xlab = "SAT", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Da Nang", "Ho Chi Minh"))
## Warning in bxp(list(stats = structure(c(3.7, 5, 6, 6, 7, 3, 4.7, 5.7, 6, : some
## notches went outside hinges ('box'): maybe set notch=FALSE
boxplot (DataLOY$SAT ~ DataLOY$Gender, main = "Perceived Satisfaction", xlab = "SAT", ylab = "Point (1-7)", col = c("Red", "blue"), notch = T, names = c("Male", "Female"))

4. Descriptive statistical analysis

summary(DataLOY)
##        ID           AGE             CITY                         FRE     
##  Min.   :  3.0   16-25:425   DaNang   :422   >=3 days/week         :508  
##  1st Qu.:273.0   26-35:172   HoChiMinh:451   2days/month-2days/week:168  
##  Median :526.0   36-45:105                   2days/year-1day/month : 99  
##  Mean   :521.7   46-55: 77                   <2 days/year          : 98  
##  3rd Qu.:769.0   >55  : 94                                               
##  Max.   :993.0                                                           
##                                                                          
##        TripPurpose      Departure            TimeUseonBus   TravelTime    
##  Working     :305   Normal   :296   Using.telephone:198   Min.   : 0.000  
##  Studying    :303   Peak-Hour:577   Reading        : 53   1st Qu.: 0.500  
##  Shopping    : 60                   Listening      :138   Median : 1.000  
##  Entertaining:100                   Nothing        :428   Mean   : 1.291  
##  Others      :105                   Talking        : 34   3rd Qu.: 2.000  
##                                     Others         : 22   Max.   :20.000  
##                                                                           
##       PSSW            PSSS            PSAB            PSEB      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.300   1st Qu.:3.700   1st Qu.:4.000   1st Qu.:5.500  
##  Median :4.900   Median :4.600   Median :5.500   Median :6.000  
##  Mean   :4.768   Mean   :4.478   Mean   :5.047   Mean   :5.786  
##  3rd Qu.:5.400   3rd Qu.:5.400   3rd Qu.:6.000   3rd Qu.:6.300  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##       PSQ             SAT             LOY             IMA       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.800   1st Qu.:5.000   1st Qu.:4.900   1st Qu.:4.800  
##  Median :5.400   Median :5.700   Median :5.700   Median :5.600  
##  Mean   :5.226   Mean   :5.464   Mean   :5.436   Mean   :5.338  
##  3rd Qu.:5.800   3rd Qu.:6.000   3rd Qu.:6.000   3rd Qu.:6.000  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##       PHB             PEV             ATM             PPI       
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:5.200   1st Qu.:5.300   1st Qu.:4.300   1st Qu.:3.700  
##  Median :5.800   Median :6.000   Median :5.100   Median :4.300  
##  Mean   :5.611   Mean   :5.619   Mean   :4.979   Mean   :4.373  
##  3rd Qu.:6.200   3rd Qu.:6.300   3rd Qu.:5.900   3rd Qu.:5.300  
##  Max.   :7.000   Max.   :7.000   Max.   :7.000   Max.   :7.000  
##                                                                 
##       SIM             PPA             SBE            EXB        EC_Stop   
##  Min.   :1.000   Min.   :1.000   Min.   :1.80   Min.   :2.00   Ever : 36  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.30   1st Qu.:5.00   Never:837  
##  Median :5.000   Median :5.000   Median :5.30   Median :5.80              
##  Mean   :4.901   Mean   :4.957   Mean   :5.18   Mean   :5.59              
##  3rd Qu.:6.000   3rd Qu.:5.800   3rd Qu.:6.00   3rd Qu.:6.00              
##  Max.   :7.000   Max.   :7.000   Max.   :7.00   Max.   :7.00              
##                                                                           
##   WC_Stop      EC_Bus      WC_Bus       Gender    MarriedStatus
##  Ever :121   Ever : 42   Ever :116   Male  :364   Single :535  
##  Never:752   Never:831   Never:757   Female:509   Married:338  
##                                                                
##                                                                
##                                                                
##                                                                
##                                                                
##            Occupation             Education             Income   
##  Students/Pupils:368   Secondary.school: 57   <5millions   :465  
##  Full.time.job  :305   Undergraduate   :287   5-10millions :249  
##  Part.time.job  : 69   High.school     :368   10-15millions:122  
##  Retirement     : 46   Postgraduate    :106   >15millions  : 37  
##  No.job         :  3   Others          : 55                      
##  Housewife      : 54                                             
##  Others         : 28
table(LOY) 
## LOY
##   1 1.9   2 2.1 2.3 2.4 2.6 2.7 2.9   3 3.1 3.3 3.4 3.6 3.7 3.9   4 4.1 4.3 4.4 
##   1   1   5   2   1   2   1   3   5   2   8   1   5   3   8  12  29  11  27  23 
## 4.6 4.7 4.9   5 5.1 5.3 5.4 5.6 5.7 5.9   6 6.1 6.3 6.4 6.6 6.7 6.9   7 
##  26  29  17  45  36  25  26  68  68  40 168  33  32  20  22  17   7  44
# Descriptive Statistics of categorical variables
# with(DataLOY, do.call(rbind, tapply(TravelTime, LOY, function(x) c(M = mean(x), SD = sd(x)))))
# with(DataLOY, do.call(rbind, tapply(PSSW, LOY, function(x) c(M = mean(x), SD = sd(x)))))

5. Estimate Multinominal Logit Regression Model - DataLOY for 2 cities

library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
## 
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
## 
##     heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.5-5)
attach(DataLOY)
## The following objects are masked from DataLOY (pos = 26):
## 
##     AGE, ATM, CITY, Departure, EC_Bus, EC_Stop, Education, EXB, FRE,
##     Gender, ID, IMA, Income, LOY, MarriedStatus, Occupation, PEV, PHB,
##     PPA, PPI, PSAB, PSEB, PSQ, PSSS, PSSW, SAT, SBE, SIM, TimeUseonBus,
##     TravelTime, TripPurpose, WC_Bus, WC_Stop
str(DataLOY)
## 'data.frame':    873 obs. of  33 variables:
##  $ ID           : int  3 4 5 6 7 8 10 11 12 13 ...
##  $ AGE          : Factor w/ 5 levels "16-25","26-35",..: 1 1 1 1 1 1 3 1 1 2 ...
##  $ CITY         : Factor w/ 2 levels "DaNang","HoChiMinh": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FRE          : Factor w/ 4 levels ">=3 days/week",..: 1 2 1 1 1 1 1 1 1 1 ...
##  $ TripPurpose  : Factor w/ 5 levels "Working","Studying",..: 2 4 2 2 2 2 1 2 2 2 ...
##  $ Departure    : Factor w/ 2 levels "Normal","Peak-Hour": 1 1 2 2 2 2 2 2 1 2 ...
##  $ TimeUseonBus : Factor w/ 6 levels "Using.telephone",..: 4 4 4 1 4 6 4 4 3 3 ...
##  $ TravelTime   : num  3 2 0.17 4 2 2 2 2.5 1.5 2 ...
##  $ PSSW         : num  4.9 3.4 3.9 4.1 3.7 4.6 3.9 3.4 5.3 4 ...
##  $ PSSS         : num  5.4 2.4 3.4 4.4 2.6 2.7 4 2.7 3.1 4.7 ...
##  $ PSAB         : num  5.5 3.5 4.5 5.5 3.5 3.8 6 3.5 4.8 5.5 ...
##  $ PSEB         : num  5.8 6 4.5 6 4.5 4 4.5 6.3 7 4.5 ...
##  $ PSQ          : num  4.8 4.6 2.7 4.8 4.5 3.9 4.2 4.1 6 4.5 ...
##  $ SAT          : num  6 4.7 2 5 4 4.7 6.7 5 7 6.3 ...
##  $ LOY          : num  5.7 4.3 3.7 5 4.6 4.4 6.6 5.3 5.9 5.6 ...
##  $ IMA          : num  5.6 4.8 3 5 4.6 3 5.8 5.8 6.8 6 ...
##  $ PHB          : num  6.6 4 5 5 5.6 5.4 5.8 5.6 7 6.4 ...
##  $ PEV          : num  4 5.3 5.8 5 6 5.3 6.3 6.5 7 5.3 ...
##  $ ATM          : num  4.3 4.1 3.1 5 3.3 2.7 4 4.1 4.4 3.9 ...
##  $ PPI          : num  6 4 2.7 5 3 3 6 2 3.3 5.3 ...
##  $ SIM          : num  3.5 4 3.5 5 5.5 2 6 5.3 4.3 6 ...
##  $ PPA          : num  4.5 4 4.5 5 5 2.5 4 4 4 4.3 ...
##  $ SBE          : num  4 4.8 6 5 5 2 5.3 4 5 3.8 ...
##  $ EXB          : num  4.8 4.2 4 5 4.2 5 5.3 5.3 5.3 5 ...
##  $ EC_Stop      : Factor w/ 2 levels "Ever","Never": 2 2 2 2 2 2 2 2 2 2 ...
##  $ WC_Stop      : Factor w/ 2 levels "Ever","Never": 1 2 2 1 1 1 2 1 2 2 ...
##  $ EC_Bus       : Factor w/ 2 levels "Ever","Never": 2 1 2 2 2 2 2 2 2 2 ...
##  $ WC_Bus       : Factor w/ 2 levels "Ever","Never": 1 2 2 1 1 1 2 1 2 2 ...
##  $ Gender       : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 2 2 2 2 1 ...
##  $ MarriedStatus: Factor w/ 2 levels "Single","Married": 1 1 1 1 1 1 2 1 1 1 ...
##  $ Occupation   : Factor w/ 7 levels "Students/Pupils",..: 1 1 1 1 1 1 2 1 1 7 ...
##  $ Education    : Factor w/ 5 levels "Secondary.school",..: 2 2 2 2 2 3 1 2 3 5 ...
##  $ Income       : Factor w/ 4 levels "<5millions","5-10millions",..: 1 1 1 1 1 1 1 1 1 1 ...
# All variables - rempve: EC_Stop + WC_Stop + EC_Bus + WC_Bus + SAT and variable have cor > 0.7 - AGE, PPA
yvar <- DataLOY$LOY
xvars <- DataLOY[c(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 23, 24, 29, 30, 31, 32, 33)]
bma <- bicreg(xvars, yvar, strict = F, OR = 20)
summary(bma)
## 
## Call:
## bicreg(x = xvars, y = yvar, strict = F, OR = 20)
## 
## 
##   56  models were selected
##  Best  5  models (cumulative posterior probability =  0.3461 ): 
## 
##                            p!=0    EV         SD        model 1     model 2   
## Intercept                  100.0  -0.1075750  0.169367    -0.17388    -0.13105
## CITYHoChiMinh                5.2  -0.0040619  0.020102       .           .    
## FRE2days.month.2days.week    0.6  -0.0004366  0.006869       .           .    
## FRE2days.year.1day.month    41.2  -0.0659515  0.088988       .        -0.15560
## FRE.2.days.year            100.0  -0.3004670  0.065128    -0.29080    -0.31172
## TripPurposeStudying          0.7  -0.0003732  0.005775       .           .    
## TripPurposeShopping         26.5   0.0513611  0.095437       .           .    
## TimeUseonBusReading          0.0   0.0000000  0.000000       .           .    
## TimeUseonBusTalking          0.0   0.0000000  0.000000       .           .    
## TravelTime                   1.4  -0.0002163  0.002457       .           .    
## PSAB                         8.9   0.0033583  0.012288       .           .    
## PSEB                         0.0   0.0000000  0.000000       .           .    
## PSQ                        100.0   0.3225604  0.032753     0.32604     0.32497
## IMA                        100.0   0.1962299  0.029074     0.19669     0.19724
## PHB                        100.0   0.1736473  0.035552     0.16318     0.16689
## PEV                         74.3   0.0458205  0.032675     0.06253     0.05940
## ATM                          1.5  -0.0004427  0.004661       .           .    
## PPI                          8.6   0.0026952  0.010110       .           .    
## SIM                        100.0   0.0869241  0.021199     0.08769     0.08849
## SBE                        100.0  -0.1009979  0.024160    -0.09886    -0.09833
## EXB                        100.0   0.2958414  0.033875     0.29695     0.29170
## GenderFemale                 0.7   0.0003204  0.005125       .           .    
## OccupationFull.time.job      0.9   0.0004574  0.006291       .           .    
## OccupationPart.time.job      5.1   0.0067109  0.033605       .           .    
## OccupationRetirement         4.2   0.0067630  0.037421       .           .    
## OccupationNo.job             0.0   0.0000000  0.000000       .           .    
## OccupationHousewife          1.9   0.0023418  0.020436       .           .    
## OccupationOthers             0.7  -0.0008094  0.013577       .           .    
## EducationUndergraduate      80.1   0.1037275  0.064700     0.13373     0.13146
## EducationOthers              0.7  -0.0005549  0.009669       .           .    
## Income10.15millions          1.7   0.0012838  0.012454       .           .    
##                                                                               
## nVar                                                         9           10   
## r2                                                         0.644       0.647  
## BIC                                                     -841.64076  -840.92878
## post prob                                                  0.119       0.083  
##                            model 3     model 4     model 5   
## Intercept                    -0.05627    -0.15212    -0.09797
## CITYHoChiMinh                   .           .           .    
## FRE2days.month.2days.week       .           .           .    
## FRE2days.year.1day.month     -0.16622       .           .    
## FRE.2.days.year              -0.31205    -0.29290    -0.28964
## TripPurposeStudying             .           .           .    
## TripPurposeShopping             .         0.17783       .    
## TimeUseonBusReading             .           .           .    
## TimeUseonBusTalking             .           .           .    
## TravelTime                      .           .           .    
## PSAB                            .           .           .    
## PSEB                            .           .           .    
## PSQ                           0.32641     0.32287     0.32764
## IMA                           0.19642     0.19547     0.19578
## PHB                           0.21128     0.16212     0.20981
## PEV                             .         0.06292       .    
## ATM                             .           .           .    
## PPI                             .           .           .    
## SIM                           0.08676     0.08697     0.08580
## SBE                          -0.09812    -0.09771    -0.09868
## EXB                           0.29445     0.29629     0.30023
## GenderFemale                    .           .           .    
## OccupationFull.time.job         .           .           .    
## OccupationPart.time.job         .           .           .    
## OccupationRetirement            .           .           .    
## OccupationNo.job                .           .           .    
## OccupationHousewife             .           .           .    
## OccupationOthers                .           .           .    
## EducationUndergraduate        0.13170     0.11646     0.13414
## EducationOthers                 .           .           .    
## Income10.15millions             .           .           .    
##                                                              
## nVar                            9           10          8    
## r2                            0.644       0.646       0.641  
## BIC                        -839.87504  -839.81711  -839.78974
## post prob                     0.049       0.048       0.047
imageplot.bma(bma)

# Model linear regression model
attach(DataLOY)
## The following objects are masked from DataLOY (pos = 3):
## 
##     AGE, ATM, CITY, Departure, EC_Bus, EC_Stop, Education, EXB, FRE,
##     Gender, ID, IMA, Income, LOY, MarriedStatus, Occupation, PEV, PHB,
##     PPA, PPI, PSAB, PSEB, PSQ, PSSS, PSSW, SAT, SBE, SIM, TimeUseonBus,
##     TravelTime, TripPurpose, WC_Bus, WC_Stop
## The following objects are masked from DataLOY (pos = 27):
## 
##     AGE, ATM, CITY, Departure, EC_Bus, EC_Stop, Education, EXB, FRE,
##     Gender, ID, IMA, Income, LOY, MarriedStatus, Occupation, PEV, PHB,
##     PPA, PPI, PSAB, PSEB, PSQ, PSSS, PSSW, SAT, SBE, SIM, TimeUseonBus,
##     TravelTime, TripPurpose, WC_Bus, WC_Stop
# Model - All variables - rempve: EC_Stop + WC_Stop + EC_Bus + WC_Bus + SAT and variable have cor > 0.7 - AGE, PPA
    #DataLOY$LOY <- relevel (DataLOY$LOY, ref = "Notloyal")
m <- lm(LOY ~ PSSW + PSSS + PSAB + PSEB + PSQ + IMA + PHB + PEV + ATM + PPI + SIM + SBE + EXB + Gender + MarriedStatus + Occupation + Education + Income + CITY + FRE + TripPurpose + Departure + TimeUseonBus + TravelTime, data = DataLOY)
summary(m)
## 
## Call:
## lm(formula = LOY ~ PSSW + PSSS + PSAB + PSEB + PSQ + IMA + PHB + 
##     PEV + ATM + PPI + SIM + SBE + EXB + Gender + MarriedStatus + 
##     Occupation + Education + Income + CITY + FRE + TripPurpose + 
##     Departure + TimeUseonBus + TravelTime, data = DataLOY)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1922 -0.3054  0.0294  0.3434  1.7183 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -0.101385   0.228968  -0.443 0.658032    
## PSSW                       0.001044   0.029409   0.035 0.971699    
## PSSS                       0.010754   0.026144   0.411 0.680920    
## PSAB                       0.020487   0.024499   0.836 0.403264    
## PSEB                      -0.020817   0.027714  -0.751 0.452788    
## PSQ                        0.329068   0.038538   8.539  < 2e-16 ***
## IMA                        0.203384   0.030181   6.739 2.99e-11 ***
## PHB                        0.157066   0.031646   4.963 8.42e-07 ***
## PEV                        0.069201   0.022786   3.037 0.002465 ** 
## ATM                       -0.052171   0.025715  -2.029 0.042792 *  
## PPI                        0.033701   0.017685   1.906 0.057044 .  
## SIM                        0.084757   0.022160   3.825 0.000141 ***
## SBE                       -0.110924   0.025652  -4.324 1.72e-05 ***
## EXB                        0.282647   0.034762   8.131 1.55e-15 ***
## GenderFemale               0.041179   0.044409   0.927 0.354055    
## MarriedStatusMarried       0.019967   0.061924   0.322 0.747198    
## OccupationFull.time.job    0.163213   0.098321   1.660 0.097293 .  
## OccupationPart.time.job    0.268830   0.111207   2.417 0.015847 *  
## OccupationRetirement       0.311940   0.124206   2.511 0.012212 *  
## OccupationNo.job           0.484454   0.346607   1.398 0.162576    
## OccupationHousewife        0.261847   0.122237   2.142 0.032474 *  
## OccupationOthers           0.068640   0.141210   0.486 0.627035    
## EducationUndergraduate     0.080578   0.088667   0.909 0.363737    
## EducationHigh.school      -0.034730   0.092158  -0.377 0.706381    
## EducationPostgraduate     -0.050533   0.106749  -0.473 0.636064    
## EducationOthers           -0.112316   0.114447  -0.981 0.326692    
## Income5-10millions         0.036860   0.068180   0.541 0.588906    
## Income10-15millions        0.089771   0.089075   1.008 0.313833    
## Income>15millions          0.052912   0.120706   0.438 0.661240    
## CITYHoChiMinh             -0.058281   0.050808  -1.147 0.251678    
## FRE2days/month-2days/week -0.090760   0.056572  -1.604 0.109022    
## FRE2days/year-1day/month  -0.191370   0.075426  -2.537 0.011357 *  
## FRE<2 days/year           -0.334304   0.074431  -4.491 8.08e-06 ***
## TripPurposeStudying        0.140921   0.077088   1.828 0.067899 .  
## TripPurposeShopping        0.090078   0.099964   0.901 0.367794    
## TripPurposeEntertaining   -0.008724   0.083920  -0.104 0.917228    
## TripPurposeOthers          0.030321   0.083899   0.361 0.717894    
## DeparturePeak-Hour         0.007923   0.044752   0.177 0.859520    
## TimeUseonBusReading       -0.042649   0.094165  -0.453 0.650728    
## TimeUseonBusListening      0.025727   0.066809   0.385 0.700279    
## TimeUseonBusNothing        0.022087   0.053872   0.410 0.681913    
## TimeUseonBusTalking       -0.109801   0.114634  -0.958 0.338422    
## TimeUseonBusOthers         0.037826   0.136868   0.276 0.782332    
## TravelTime                -0.014211   0.014523  -0.979 0.328097    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.584 on 829 degrees of freedom
## Multiple R-squared:  0.6649, Adjusted R-squared:  0.6475 
## F-statistic: 38.25 on 43 and 829 DF,  p-value: < 2.2e-16
# All variables - rempve: EC_Stop + WC_Stop + EC_Bus + WC_Bus + SAT and variable have cor > 0.7 - AGE, PPA and cor > 0.6 : PHB, PSQ, PSSW, PSAB
m3 <- lm(LOY ~ PSSS + PSEB + IMA + PEV + ATM + PPI + SIM + SBE + EXB + Gender + MarriedStatus + Occupation + Education + Income + CITY + FRE + TripPurpose + Departure + TimeUseonBus + TravelTime, data = DataLOY)
summary(m3)
## 
## Call:
## lm(formula = LOY ~ PSSS + PSEB + IMA + PEV + ATM + PPI + SIM + 
##     SBE + EXB + Gender + MarriedStatus + Occupation + Education + 
##     Income + CITY + FRE + TripPurpose + Departure + TimeUseonBus + 
##     TravelTime, data = DataLOY)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3456 -0.2928  0.0274  0.3192  1.9636 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.123935   0.235625   0.526 0.599038    
## PSSS                       0.068900   0.022823   3.019 0.002615 ** 
## PSEB                       0.080680   0.026753   3.016 0.002641 ** 
## IMA                        0.303181   0.029762  10.187  < 2e-16 ***
## PEV                        0.117258   0.020601   5.692 1.74e-08 ***
## ATM                        0.006318   0.026249   0.241 0.809845    
## PPI                        0.037064   0.018608   1.992 0.046713 *  
## SIM                        0.078607   0.023270   3.378 0.000764 ***
## SBE                       -0.098942   0.026869  -3.682 0.000246 ***
## EXB                        0.372988   0.034704  10.748  < 2e-16 ***
## GenderFemale               0.050833   0.046631   1.090 0.275980    
## MarriedStatusMarried       0.047863   0.064802   0.739 0.460358    
## OccupationFull.time.job    0.233748   0.102910   2.271 0.023378 *  
## OccupationPart.time.job    0.331637   0.116894   2.837 0.004663 ** 
## OccupationRetirement       0.371421   0.130763   2.840 0.004615 ** 
## OccupationNo.job           0.559724   0.365566   1.531 0.126120    
## OccupationHousewife        0.272852   0.128770   2.119 0.034394 *  
## OccupationOthers           0.101310   0.148785   0.681 0.496112    
## EducationUndergraduate     0.039324   0.093325   0.421 0.673599    
## EducationHigh.school      -0.054438   0.097104  -0.561 0.575209    
## EducationPostgraduate     -0.071184   0.112189  -0.634 0.525930    
## EducationOthers           -0.088108   0.120036  -0.734 0.463144    
## Income5-10millions         0.008147   0.071771   0.114 0.909645    
## Income10-15millions        0.037292   0.093396   0.399 0.689784    
## Income>15millions          0.006831   0.126945   0.054 0.957096    
## CITYHoChiMinh             -0.042722   0.050979  -0.838 0.402260    
## FRE2days/month-2days/week -0.081859   0.059578  -1.374 0.169821    
## FRE2days/year-1day/month  -0.194161   0.079380  -2.446 0.014652 *  
## FRE<2 days/year           -0.352869   0.078239  -4.510 7.41e-06 ***
## TripPurposeStudying        0.141869   0.081345   1.744 0.081519 .  
## TripPurposeShopping        0.109622   0.105355   1.041 0.298408    
## TripPurposeEntertaining    0.017462   0.088502   0.197 0.843639    
## TripPurposeOthers          0.031233   0.088409   0.353 0.723969    
## DeparturePeak-Hour        -0.010755   0.047060  -0.229 0.819289    
## TimeUseonBusReading       -0.032060   0.098958  -0.324 0.746040    
## TimeUseonBusListening      0.034805   0.070043   0.497 0.619380    
## TimeUseonBusNothing        0.002083   0.056618   0.037 0.970660    
## TimeUseonBusTalking       -0.091163   0.120250  -0.758 0.448599    
## TimeUseonBusOthers         0.016608   0.144218   0.115 0.908344    
## TravelTime                -0.023790   0.015271  -1.558 0.119656    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6164 on 833 degrees of freedom
## Multiple R-squared:  0.6249, Adjusted R-squared:  0.6073 
## F-statistic: 35.58 on 39 and 833 DF,  p-value: < 2.2e-16
# Model 1 - Remove 2 variables non significant : Gender, Departure compare to m
m1 <- lm(LOY ~ PSSW + PSSS + PSAB + PSEB + PSQ + IMA + PHB + PEV + ATM + PPI + SIM + SBE + EXB + MarriedStatus + Occupation + Education + Income + CITY + FRE + TripPurpose + TimeUseonBus + TravelTime, data = DataLOY)
summary(m1)
## 
## Call:
## lm(formula = LOY ~ PSSW + PSSS + PSAB + PSEB + PSQ + IMA + PHB + 
##     PEV + ATM + PPI + SIM + SBE + EXB + MarriedStatus + Occupation + 
##     Education + Income + CITY + FRE + TripPurpose + TimeUseonBus + 
##     TravelTime, data = DataLOY)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1762 -0.3031  0.0312  0.3477  1.6968 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -0.0648307  0.2222024  -0.292 0.770540    
## PSSW                       0.0004334  0.0293757   0.015 0.988233    
## PSSS                       0.0086879  0.0260219   0.334 0.738562    
## PSAB                       0.0195801  0.0244103   0.802 0.422709    
## PSEB                      -0.0200996  0.0276840  -0.726 0.468021    
## PSQ                        0.3310120  0.0383505   8.631  < 2e-16 ***
## IMA                        0.2034479  0.0301312   6.752 2.74e-11 ***
## PHB                        0.1559343  0.0315994   4.935 9.70e-07 ***
## PEV                        0.0697981  0.0227239   3.072 0.002199 ** 
## ATM                       -0.0509681  0.0256408  -1.988 0.047165 *  
## PPI                        0.0332411  0.0176221   1.886 0.059599 .  
## SIM                        0.0835412  0.0219054   3.814 0.000147 ***
## SBE                       -0.1123159  0.0255710  -4.392 1.27e-05 ***
## EXB                        0.2842080  0.0346452   8.203 8.85e-16 ***
## MarriedStatusMarried       0.0280965  0.0612362   0.459 0.646482    
## OccupationFull.time.job    0.1616708  0.0982109   1.646 0.100109    
## OccupationPart.time.job    0.2660193  0.1110583   2.395 0.016827 *  
## OccupationRetirement       0.3087798  0.1240724   2.489 0.013016 *  
## OccupationNo.job           0.4741910  0.3454208   1.373 0.170187    
## OccupationHousewife        0.2727130  0.1216073   2.243 0.025187 *  
## OccupationOthers           0.0682730  0.1411143   0.484 0.628646    
## EducationUndergraduate     0.0786538  0.0885800   0.888 0.374829    
## EducationHigh.school      -0.0325641  0.0919861  -0.354 0.723421    
## EducationPostgraduate     -0.0467447  0.1065746  -0.439 0.661058    
## EducationOthers           -0.1116478  0.1143170  -0.977 0.329026    
## Income5-10millions         0.0348635  0.0680762   0.512 0.608700    
## Income10-15millions        0.0780952  0.0880006   0.887 0.375099    
## Income>15millions          0.0333255  0.1186741   0.281 0.778922    
## CITYHoChiMinh             -0.0558992  0.0506704  -1.103 0.270263    
## FRE2days/month-2days/week -0.0931485  0.0564723  -1.649 0.099432 .  
## FRE2days/year-1day/month  -0.1947984  0.0752863  -2.587 0.009838 ** 
## FRE<2 days/year           -0.3373366  0.0741336  -4.550 6.15e-06 ***
## TripPurposeStudying        0.1384766  0.0769840   1.799 0.072418 .  
## TripPurposeShopping        0.0896948  0.0991064   0.905 0.365709    
## TripPurposeEntertaining   -0.0162267  0.0825642  -0.197 0.844240    
## TripPurposeOthers          0.0226305  0.0833420   0.272 0.786045    
## TimeUseonBusReading       -0.0464313  0.0938159  -0.495 0.620788    
## TimeUseonBusListening      0.0289673  0.0666326   0.435 0.663870    
## TimeUseonBusNothing        0.0232029  0.0537810   0.431 0.666264    
## TimeUseonBusTalking       -0.1021275  0.1142656  -0.894 0.371702    
## TimeUseonBusOthers         0.0421775  0.1366816   0.309 0.757717    
## TravelTime                -0.0149664  0.0144828  -1.033 0.301722    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5836 on 831 degrees of freedom
## Multiple R-squared:  0.6645, Adjusted R-squared:  0.648 
## F-statistic: 40.15 on 41 and 831 DF,  p-value: < 2.2e-16
# Model 2 - Only variables related to personal perception , remove SAT, remove cor > 0.7: PPA
m2 <- lm(LOY ~ PSSW + PSSS + PSAB + PSEB + PSQ + IMA + PHB + PEV + ATM + PPI + SIM + SBE + EXB + CITY, data = DataLOY)
summary(m2)
## 
## Call:
## lm(formula = LOY ~ PSSW + PSSS + PSAB + PSEB + PSQ + IMA + PHB + 
##     PEV + ATM + PPI + SIM + SBE + EXB + CITY, data = DataLOY)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3876 -0.2846  0.0276  0.3267  1.7734 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.1163591  0.1855822  -0.627 0.530829    
## PSSW           0.0072222  0.0294414   0.245 0.806277    
## PSSS          -0.0006931  0.0257829  -0.027 0.978559    
## PSAB           0.0329384  0.0240887   1.367 0.171864    
## PSEB          -0.0205383  0.0277496  -0.740 0.459423    
## PSQ            0.3313710  0.0387861   8.544  < 2e-16 ***
## IMA            0.2079326  0.0300860   6.911 9.37e-12 ***
## PHB            0.1577494  0.0313535   5.031 5.93e-07 ***
## PEV            0.0644413  0.0220538   2.922 0.003569 ** 
## ATM           -0.0559182  0.0255124  -2.192 0.028661 *  
## PPI            0.0373896  0.0175613   2.129 0.033531 *  
## SIM            0.0787483  0.0219304   3.591 0.000348 ***
## SBE           -0.1180293  0.0256175  -4.607 4.70e-06 ***
## EXB            0.3146558  0.0343806   9.152  < 2e-16 ***
## CITYHoChiMinh -0.0657524  0.0472890  -1.390 0.164756    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5968 on 858 degrees of freedom
## Multiple R-squared:  0.6378, Adjusted R-squared:  0.6319 
## F-statistic: 107.9 on 14 and 858 DF,  p-value: < 2.2e-16
# CHON 1: Model m1 - all varibles , removing : EC_Stop + WC_Stop + EC_Bus + WC_Bus + SAT and variable have cor > 0.7 - AGE, PPA and nonsignificant variables : Gender, Departure
summary(m1)
## 
## Call:
## lm(formula = LOY ~ PSSW + PSSS + PSAB + PSEB + PSQ + IMA + PHB + 
##     PEV + ATM + PPI + SIM + SBE + EXB + MarriedStatus + Occupation + 
##     Education + Income + CITY + FRE + TripPurpose + TimeUseonBus + 
##     TravelTime, data = DataLOY)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1762 -0.3031  0.0312  0.3477  1.6968 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -0.0648307  0.2222024  -0.292 0.770540    
## PSSW                       0.0004334  0.0293757   0.015 0.988233    
## PSSS                       0.0086879  0.0260219   0.334 0.738562    
## PSAB                       0.0195801  0.0244103   0.802 0.422709    
## PSEB                      -0.0200996  0.0276840  -0.726 0.468021    
## PSQ                        0.3310120  0.0383505   8.631  < 2e-16 ***
## IMA                        0.2034479  0.0301312   6.752 2.74e-11 ***
## PHB                        0.1559343  0.0315994   4.935 9.70e-07 ***
## PEV                        0.0697981  0.0227239   3.072 0.002199 ** 
## ATM                       -0.0509681  0.0256408  -1.988 0.047165 *  
## PPI                        0.0332411  0.0176221   1.886 0.059599 .  
## SIM                        0.0835412  0.0219054   3.814 0.000147 ***
## SBE                       -0.1123159  0.0255710  -4.392 1.27e-05 ***
## EXB                        0.2842080  0.0346452   8.203 8.85e-16 ***
## MarriedStatusMarried       0.0280965  0.0612362   0.459 0.646482    
## OccupationFull.time.job    0.1616708  0.0982109   1.646 0.100109    
## OccupationPart.time.job    0.2660193  0.1110583   2.395 0.016827 *  
## OccupationRetirement       0.3087798  0.1240724   2.489 0.013016 *  
## OccupationNo.job           0.4741910  0.3454208   1.373 0.170187    
## OccupationHousewife        0.2727130  0.1216073   2.243 0.025187 *  
## OccupationOthers           0.0682730  0.1411143   0.484 0.628646    
## EducationUndergraduate     0.0786538  0.0885800   0.888 0.374829    
## EducationHigh.school      -0.0325641  0.0919861  -0.354 0.723421    
## EducationPostgraduate     -0.0467447  0.1065746  -0.439 0.661058    
## EducationOthers           -0.1116478  0.1143170  -0.977 0.329026    
## Income5-10millions         0.0348635  0.0680762   0.512 0.608700    
## Income10-15millions        0.0780952  0.0880006   0.887 0.375099    
## Income>15millions          0.0333255  0.1186741   0.281 0.778922    
## CITYHoChiMinh             -0.0558992  0.0506704  -1.103 0.270263    
## FRE2days/month-2days/week -0.0931485  0.0564723  -1.649 0.099432 .  
## FRE2days/year-1day/month  -0.1947984  0.0752863  -2.587 0.009838 ** 
## FRE<2 days/year           -0.3373366  0.0741336  -4.550 6.15e-06 ***
## TripPurposeStudying        0.1384766  0.0769840   1.799 0.072418 .  
## TripPurposeShopping        0.0896948  0.0991064   0.905 0.365709    
## TripPurposeEntertaining   -0.0162267  0.0825642  -0.197 0.844240    
## TripPurposeOthers          0.0226305  0.0833420   0.272 0.786045    
## TimeUseonBusReading       -0.0464313  0.0938159  -0.495 0.620788    
## TimeUseonBusListening      0.0289673  0.0666326   0.435 0.663870    
## TimeUseonBusNothing        0.0232029  0.0537810   0.431 0.666264    
## TimeUseonBusTalking       -0.1021275  0.1142656  -0.894 0.371702    
## TimeUseonBusOthers         0.0421775  0.1366816   0.309 0.757717    
## TravelTime                -0.0149664  0.0144828  -1.033 0.301722    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5836 on 831 degrees of freedom
## Multiple R-squared:  0.6645, Adjusted R-squared:  0.648 
## F-statistic: 40.15 on 41 and 831 DF,  p-value: < 2.2e-16
# According ti bma : 9 variables: FRE, PSQ, IMA, PHB, PEV, SIM, SBE, EXB, Education
m.bma <- lm(LOY ~ FRE + PSQ + IMA + PHB + PEV + SIM + SBE + EXB + Education, data = DataLOY)
summary(m.bma)
## 
## Call:
## lm(formula = LOY ~ FRE + PSQ + IMA + PHB + PEV + SIM + SBE + 
##     EXB + Education, data = DataLOY)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.14959 -0.27188  0.01943  0.33391  1.79542 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -0.01110    0.18183  -0.061  0.95134    
## FRE2days/month-2days/week -0.06888    0.05257  -1.310  0.19046    
## FRE2days/year-1day/month  -0.17097    0.06498  -2.631  0.00866 ** 
## FRE<2 days/year           -0.32979    0.06562  -5.026 6.10e-07 ***
## PSQ                        0.32609    0.03218  10.133  < 2e-16 ***
## IMA                        0.19743    0.02889   6.834 1.56e-11 ***
## PHB                        0.16743    0.03013   5.558 3.65e-08 ***
## PEV                        0.05892    0.02147   2.745  0.00618 ** 
## SIM                        0.08531    0.02082   4.098 4.57e-05 ***
## SBE                       -0.09875    0.02364  -4.176 3.26e-05 ***
## EXB                        0.29134    0.03375   8.633  < 2e-16 ***
## EducationUndergraduate     0.04223    0.08613   0.490  0.62401    
## EducationHigh.school      -0.11092    0.08473  -1.309  0.19082    
## EducationPostgraduate     -0.03551    0.09762  -0.364  0.71614    
## EducationOthers           -0.15753    0.11174  -1.410  0.15895    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5876 on 858 degrees of freedom
## Multiple R-squared:  0.6489, Adjusted R-squared:  0.6432 
## F-statistic: 113.3 on 14 and 858 DF,  p-value: < 2.2e-16
# CHON 2: 
m.bma1 <- lm(LOY ~ FRE + PSQ + IMA + PHB + PEV + SIM + SBE + EXB + Occupation, data = DataLOY)
summary(m.bma1)
## 
## Call:
## lm(formula = LOY ~ FRE + PSQ + IMA + PHB + PEV + SIM + SBE + 
##     EXB + Occupation, data = DataLOY)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2622 -0.2758  0.0321  0.3423  1.9313 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -0.008517   0.159233  -0.053 0.957355    
## FRE2days/month-2days/week -0.093612   0.052768  -1.774 0.076416 .  
## FRE2days/year-1day/month  -0.236027   0.066365  -3.556 0.000396 ***
## FRE<2 days/year           -0.356726   0.065595  -5.438 7.02e-08 ***
## PSQ                        0.316324   0.032027   9.877  < 2e-16 ***
## IMA                        0.205650   0.028772   7.148 1.89e-12 ***
## PHB                        0.143764   0.030031   4.787 1.99e-06 ***
## PEV                        0.075661   0.021587   3.505 0.000480 ***
## SIM                        0.094240   0.020755   4.541 6.42e-06 ***
## SBE                       -0.103798   0.023588  -4.401 1.22e-05 ***
## EXB                        0.273962   0.033767   8.113 1.70e-15 ***
## OccupationFull.time.job    0.110982   0.046080   2.408 0.016230 *  
## OccupationPart.time.job    0.241821   0.078753   3.071 0.002204 ** 
## OccupationRetirement       0.284966   0.094265   3.023 0.002577 ** 
## OccupationNo.job           0.476977   0.341370   1.397 0.162702    
## OccupationHousewife        0.268097   0.087997   3.047 0.002385 ** 
## OccupationOthers          -0.006113   0.116588  -0.052 0.958197    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5849 on 856 degrees of freedom
## Multiple R-squared:  0.653,  Adjusted R-squared:  0.6465 
## F-statistic: 100.7 on 16 and 856 DF,  p-value: < 2.2e-16

6. Validating the importance of variables - DataLOY for 2 cities

# Based on regression coefficient
# Based on R2 of each variable according to package "relaimpo" with function calc.relimp or method of boostrap
## Relaimpo
#library(relaimpo)
#metrics <- calc.relimp(m1, type = c("lmg"))
#matrics
## Boostrap
#boot <- boot.relimp(m1, b = 10, type = c("lmg"), fixed = F)
#booteval.relimp(boot, typesel = c("lmg"), level = 0.9, bty = "perc", nodiff = T)