rm(list = ls())library(foreign)
clogit <- read.csv("clogit.csv")
summary(clogit)## MODE TTME INVC INVT
## Min. :0.00 Min. : 0.00 Min. : 2.00 Min. : 63.0
## 1st Qu.:0.00 1st Qu.: 0.75 1st Qu.: 23.00 1st Qu.: 234.0
## Median :0.00 Median :35.00 Median : 39.00 Median : 397.0
## Mean :0.25 Mean :34.59 Mean : 47.76 Mean : 486.2
## 3rd Qu.:0.25 3rd Qu.:53.00 3rd Qu.: 66.25 3rd Qu.: 795.5
## Max. :1.00 Max. :99.00 Max. :180.00 Max. :1440.0
## GC CHAIR HINC PSIZE
## Min. : 30.0 Min. :0.0000 Min. : 2.00 Min. :1.000
## 1st Qu.: 71.0 1st Qu.:0.0000 1st Qu.:20.00 1st Qu.:1.000
## Median :101.5 Median :0.0000 Median :34.50 Median :1.000
## Mean :110.9 Mean :0.2762 Mean :34.55 Mean :1.743
## 3rd Qu.:144.0 3rd Qu.:1.0000 3rd Qu.:50.00 3rd Qu.:2.000
## Max. :269.0 Max. :1.0000 Max. :72.00 Max. :6.000
## AASC TASC BASC CASC HINCA
## Min. :0.00 Min. :0.00 Min. :0.00 Min. :0.00 Min. : 0.000
## 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.: 0.000
## Median :0.00 Median :0.00 Median :0.00 Median :0.00 Median : 0.000
## Mean :0.25 Mean :0.25 Mean :0.25 Mean :0.25 Mean : 8.637
## 3rd Qu.:0.25 3rd Qu.:0.25 3rd Qu.:0.25 3rd Qu.:0.25 3rd Qu.: 0.500
## Max. :1.00 Max. :1.00 Max. :1.00 Max. :1.00 Max. :72.000
## PSIZEA
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.4357
## 3rd Qu.:0.2500
## Max. :6.0000
summary(clogit)## MODE TTME INVC INVT
## Min. :0.00 Min. : 0.00 Min. : 2.00 Min. : 63.0
## 1st Qu.:0.00 1st Qu.: 0.75 1st Qu.: 23.00 1st Qu.: 234.0
## Median :0.00 Median :35.00 Median : 39.00 Median : 397.0
## Mean :0.25 Mean :34.59 Mean : 47.76 Mean : 486.2
## 3rd Qu.:0.25 3rd Qu.:53.00 3rd Qu.: 66.25 3rd Qu.: 795.5
## Max. :1.00 Max. :99.00 Max. :180.00 Max. :1440.0
## GC CHAIR HINC PSIZE
## Min. : 30.0 Min. :0.0000 Min. : 2.00 Min. :1.000
## 1st Qu.: 71.0 1st Qu.:0.0000 1st Qu.:20.00 1st Qu.:1.000
## Median :101.5 Median :0.0000 Median :34.50 Median :1.000
## Mean :110.9 Mean :0.2762 Mean :34.55 Mean :1.743
## 3rd Qu.:144.0 3rd Qu.:1.0000 3rd Qu.:50.00 3rd Qu.:2.000
## Max. :269.0 Max. :1.0000 Max. :72.00 Max. :6.000
## AASC TASC BASC CASC HINCA
## Min. :0.00 Min. :0.00 Min. :0.00 Min. :0.00 Min. : 0.000
## 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.: 0.000
## Median :0.00 Median :0.00 Median :0.00 Median :0.00 Median : 0.000
## Mean :0.25 Mean :0.25 Mean :0.25 Mean :0.25 Mean : 8.637
## 3rd Qu.:0.25 3rd Qu.:0.25 3rd Qu.:0.25 3rd Qu.:0.25 3rd Qu.: 0.500
## Max. :1.00 Max. :1.00 Max. :1.00 Max. :1.00 Max. :72.000
## PSIZEA
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.4357
## 3rd Qu.:0.2500
## Max. :6.0000
str(clogit)## 'data.frame': 840 obs. of 14 variables:
## $ MODE : int 0 0 0 1 0 0 0 1 0 0 ...
## $ TTME : int 69 34 35 0 64 44 53 0 69 34 ...
## $ INVC : int 59 31 25 10 58 31 25 11 115 98 ...
## $ INVT : int 100 372 417 180 68 354 399 255 125 892 ...
## $ GC : int 70 71 70 30 68 84 85 50 129 195 ...
## $ CHAIR : int 0 0 0 0 0 0 0 0 0 0 ...
## $ HINC : int 35 35 35 35 30 30 30 30 40 40 ...
## $ PSIZE : int 1 1 1 1 2 2 2 2 1 1 ...
## $ AASC : int 1 0 0 0 1 0 0 0 1 0 ...
## $ TASC : int 0 1 0 0 0 1 0 0 0 1 ...
## $ BASC : int 0 0 1 0 0 0 1 0 0 0 ...
## $ CASC : int 0 0 0 1 0 0 0 1 0 0 ...
## $ HINCA : int 35 0 0 0 30 0 0 0 40 0 ...
## $ PSIZEA: num 1 0 0 0 2 0 0 0 1 0 ...
head(clogit)## MODE TTME INVC INVT GC CHAIR HINC PSIZE AASC TASC BASC CASC HINCA PSIZEA
## 1 0 69 59 100 70 0 35 1 1 0 0 0 35 1
## 2 0 34 31 372 71 0 35 1 0 1 0 0 0 0
## 3 0 35 25 417 70 0 35 1 0 0 1 0 0 0
## 4 1 0 10 180 30 0 35 1 0 0 0 1 0 0
## 5 0 64 58 68 68 0 30 2 1 0 0 0 30 2
## 6 0 44 31 354 84 0 30 2 0 1 0 0 0 0
save(clogit, file = '/Users/admin/Desktop/Linh Data Studio/Transport modelling/clogit.rdata')
load('/Users/admin/Desktop/Linh Data Studio/Transport modelling/clogit.rdata')clogit$mode.ids<-factor(rep(1:4,210),labels=c("air","train","bus","car"))library(mlogit)## Loading required package: dfidx
##
## Attaching package: 'dfidx'
## The following object is masked from 'package:stats':
##
## filter
CLOGIT <- mlogit.data(clogit, shape = "long",
choice = "MODE", alt.var = "mode.ids")
head(index(CLOGIT), 5)## ~~~ indexes ~~~~
## chid alt
## 1 1 air
## 2 1 train
## 3 1 bus
## 4 1 car
## 5 2 air
## indexes: 1, 2
# Part_1 is the “basic” specification, as discussed above. Typically, variables in this part will vary with the alternative under consideration (for example, in a mode choice model, they might describe a mode’s cost (fare) or travel times
res1 <- mlogit(MODE ~ TTME + GC, data = clogit, shape = "long",
alt.var = "mode.ids")
summary(res1)##
## Call:
## mlogit(formula = MODE ~ TTME + GC, data = clogit, shape = "long",
## alt.var = "mode.ids", method = "nr")
##
## Frequencies of alternatives:choice
## air train bus car
## 0.27619 0.30000 0.14286 0.28095
##
## nr method
## 5 iterations, 0h:0m:0s
## g'(-H)^-1g = 0.000221
## successive function values within tolerance limits
##
## Coefficients :
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept):train -1.8533538 0.3700925 -5.0078 5.505e-07 ***
## (Intercept):bus -2.5656173 0.3843251 -6.6756 2.461e-11 ***
## (Intercept):car -5.7763487 0.6559187 -8.8065 < 2.2e-16 ***
## TTME -0.0970904 0.0104351 -9.3042 < 2.2e-16 ***
## GC -0.0157837 0.0043828 -3.6013 0.0003166 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-Likelihood: -199.98
## McFadden R^2: 0.29526
## Likelihood ratio test : chisq = 167.56 (p.value = < 2.22e-16)
res2 <- mlogit(MODE ~ TTME + GC, data = CLOGIT)
summary(res2)##
## Call:
## mlogit(formula = MODE ~ TTME + GC, data = CLOGIT, method = "nr")
##
## Frequencies of alternatives:choice
## air train bus car
## 0.27619 0.30000 0.14286 0.28095
##
## nr method
## 5 iterations, 0h:0m:0s
## g'(-H)^-1g = 0.000221
## successive function values within tolerance limits
##
## Coefficients :
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept):train -1.8533538 0.3700925 -5.0078 5.505e-07 ***
## (Intercept):bus -2.5656173 0.3843251 -6.6756 2.461e-11 ***
## (Intercept):car -5.7763487 0.6559187 -8.8065 < 2.2e-16 ***
## TTME -0.0970904 0.0104351 -9.3042 < 2.2e-16 ***
## GC -0.0157837 0.0043828 -3.6013 0.0003166 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-Likelihood: -199.98
## McFadden R^2: 0.29526
## Likelihood ratio test : chisq = 167.56 (p.value = < 2.22e-16)
# Part 2 variables: gia tri ve ca nhan (e.g. gioi tinh, thu nhap ho gia dinh), ko anh huong den ban chat cua Phuong tien Mode
# part_2 consists of the names of variables whose values do not vary by alternative (individual-specific variables, for example, gender or perhaps age).
res3 <- mlogit(MODE ~ TTME + GC | HINC, data = CLOGIT)
summary(res3)##
## Call:
## mlogit(formula = MODE ~ TTME + GC | HINC, data = CLOGIT, method = "nr")
##
## Frequencies of alternatives:choice
## air train bus car
## 0.27619 0.30000 0.14286 0.28095
##
## nr method
## 5 iterations, 0h:0m:0s
## g'(-H)^-1g = 0.000614
## successive function values within tolerance limits
##
## Coefficients :
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept):train -0.3249576 0.5763335 -0.5638 0.572866
## (Intercept):bus -1.7445354 0.6775004 -2.5750 0.010025 *
## (Intercept):car -5.8747921 0.8020903 -7.3244 2.4e-13 ***
## TTME -0.0954602 0.0104732 -9.1147 < 2.2e-16 ***
## GC -0.0109273 0.0045878 -2.3818 0.017226 *
## HINC:train -0.0511880 0.0147352 -3.4739 0.000513 ***
## HINC:bus -0.0232100 0.0162306 -1.4300 0.152712
## HINC:car 0.0053735 0.0115294 0.4661 0.641163
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-Likelihood: -189.53
## McFadden R^2: 0.33209
## Likelihood ratio test : chisq = 188.47 (p.value = < 2.22e-16)
# Finally, part_3 allows you to specify that variables describing characteris- tics of the alternatives have coefficients that differ across alternatives.
res4 <- mlogit(MODE ~ TTME | HINC | GC, data = CLOGIT)
summary(res4)##
## Call:
## mlogit(formula = MODE ~ TTME | HINC | GC, data = CLOGIT, method = "nr")
##
## Frequencies of alternatives:choice
## air train bus car
## 0.27619 0.30000 0.14286 0.28095
##
## nr method
## 5 iterations, 0h:0m:0s
## g'(-H)^-1g = 0.000795
## successive function values within tolerance limits
##
## Coefficients :
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept):train 1.8492221 1.0578547 1.7481 0.0804489 .
## (Intercept):bus 0.2332096 1.2150069 0.1919 0.8477885
## (Intercept):car -3.5030068 1.1092306 -3.1581 0.0015883 **
## TTME -0.0962846 0.0104908 -9.1780 < 2.2e-16 ***
## HINC:train -0.0547168 0.0150964 -3.6245 0.0002895 ***
## HINC:bus -0.0249718 0.0163743 -1.5251 0.1272444
## HINC:car 0.0034443 0.0119876 0.2873 0.7738634
## GC:air 0.0104705 0.0091005 1.1505 0.2499210
## GC:train -0.0088560 0.0050541 -1.7522 0.0797317 .
## GC:bus -0.0070853 0.0074225 -0.9546 0.3397949
## GC:car -0.0110825 0.0056119 -1.9748 0.0482870 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-Likelihood: -184.95
## McFadden R^2: 0.34822
## Likelihood ratio test : chisq = 197.62 (p.value = < 2.22e-16)
Note that the GC variable is now interacted with dummys for all the modes: this is possible because gc varies with alternatives, not with individuals. The bottom line is that gc’s coefficient now differs by alternative. It’s also worth noting that the significance levels have changed quite a bit, suggesting that this specification may not be appropriate (at least for this dataset).
# res5<-mlogit(mode~ttme+gc,data=CLOGIT,reflevel="car",
# rpar=c("air:(intercept)"="n","bus:(intercept)"="n",
# "train:(intercept)"="n"),R=500,halton=NA,print.level=1)
# summary(res5)
#
# # https://stackoverflow.com/questions/60713637/mlogit-in-r-coefficients-and-unknown-random-parameter
res5<-mlogit(MODE~TTME+GC,data=CLOGIT,reflevel="car",
rpar=c("(Intercept):air"="n","(Intercept):bus"="n",
"(Intercept):train"="n"),R=500,halton=NA,print.level=1)## Initial value of the function : 200.021471160969
## iteration 1, step = 0.00390625, lnL = 200.00700181, chi2 = 42.89405615
## iteration 2, step = 0.015625, lnL = 200.00530375, chi2 = 0.23847745
## iteration 3, step = 0.125, lnL = 199.73904009, chi2 = 0.77798228
## iteration 4, step = 0.25, lnL = 199.71467864, chi2 = 0.25353518
## iteration 5, step = 0.5, lnL = 199.62259894, chi2 = 2.54825399
## iteration 6, step = 1, lnL = 198.92335441, chi2 = 0.9368018
## iteration 7, step = 1, lnL = 198.21614612, chi2 = 1.17814832
## iteration 8, step = 1, lnL = 197.78031543, chi2 = 0.73589678
## iteration 9, step = 1, lnL = 197.54249151, chi2 = 0.36853162
## iteration 10, step = 1, lnL = 197.48342838, chi2 = 0.0944876
## iteration 11, step = 1, lnL = 197.47108299, chi2 = 0.0189415
## iteration 12, step = 1, lnL = 197.46807699, chi2 = 0.00507095
## iteration 13, step = 1, lnL = 197.46781515, chi2 = 0.00046435
## iteration 14, step = 1, lnL = 197.46780495, chi2 = 1.855e-05
## iteration 15, step = 1, lnL = 197.46780438, chi2 = 8.2e-07
summary(res5)##
## Call:
## mlogit(formula = MODE ~ TTME + GC, data = CLOGIT, reflevel = "car",
## rpar = c(`(Intercept):air` = "n", `(Intercept):bus` = "n",
## `(Intercept):train` = "n"), R = 500, halton = NA, print.level = 1)
##
## Frequencies of alternatives:choice
## car air train bus
## 0.28095 0.27619 0.30000 0.14286
##
## bfgs method
## 15 iterations, 0h:0m:8s
## g'(-H)^-1g = 8.22E-07
## gradient close to zero
##
## Coefficients :
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept):air 6.1592355 1.1386852 5.4091 6.335e-08 ***
## (Intercept):train 5.1095985 0.8174536 6.2506 4.088e-10 ***
## (Intercept):bus 4.1487347 0.9042209 4.5882 4.471e-06 ***
## TTME -0.1144355 0.0199023 -5.7499 8.932e-09 ***
## GC -0.0308410 0.0077524 -3.9782 6.943e-05 ***
## sd.(Intercept):air 2.9380453 0.9163970 3.2061 0.001346 **
## sd.(Intercept):train 0.0382613 21.4589826 0.0018 0.998577
## sd.(Intercept):bus 0.0016575 76.6920349 0.0000 0.999983
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-Likelihood: -197.47
## McFadden R^2: 0.3041
## Likelihood ratio test : chisq = 172.58 (p.value = < 2.22e-16)
##
## random coefficients
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## (Intercept):air -Inf 4.177554 6.159236 6.159236 8.140917 Inf
## (Intercept):bus -Inf 4.147617 4.148735 4.148735 4.149853 Inf
## (Intercept):train -Inf 5.083792 5.109599 5.109599 5.135405 Inf
# print info on what was estimated for a random parameter
rpar(res5,"(Intercept):air")## normal distribution with parameters 6.159 (mean) and 2.938 (sd)
Note: The estimates denoted sd.air:(intercept) (etc) represent (in this case) the estimated standard deviations of the random coefficients: in this example we estimate that the air dummy is normal with mean 6:1436 and variance 2:97682 D 8: 861 3; ie N .6:1436; 8:8613/:21 Note that the results here strongly suggest that the train and bus dummys are not in fact random (because we cannot reject the null hypothesis of zero standard deviations)
Note: As we have noted, the clogit dataset is not a random sample, but choice-based. The name arises because instead of picking out people at random (say from the phone book) we sample them by surveying them at their chosen modes (say, on the bus, or at the parking lot). Thus our sample depends on the choices they actually made. Choice-based sampling may be appropriate when we wish to over-sample some lightly used alternatives in order to ensure that their characteristics are represented in the dataset, or because they are simpler to collect.
Choice-based sampling + mlogit <- this makes the likelihood function wrong (since it is based on the assumption of random sampling).
# Let Pi_i be the sample proportion using mode i.
# let Pi_i_1 be the population proportion using this mode. Pi_i_1 require external information. For clogit the Pi_i_1 are: air = 0.14 ; train = 0.13 ; bus = 0.09 ; and car = 0.64
Pi_i <- c(0.27619, 0.30000, 0.14286, 0.28095 ) #value from res1 (Frequencies of alternatives:choice <- air train bus car )
Pi_i_1 <- c(0.14, 0.13, 0.09, 0.64) #external values
wts_ratio <- Pi_i_1/Pi_i
wts <- rep(c(0.507, 0.433, 0.63, 2.278), 210)res1 <- mlogit(MODE ~ TTME + GC, data = clogit, shape = "long",
alt.var = "mode.ids", weights = wts)
summary(res1)##
## Call:
## mlogit(formula = MODE ~ TTME + GC, data = clogit, weights = wts,
## shape = "long", alt.var = "mode.ids", method = "nr")
##
## Frequencies of alternatives:choice
## air train bus car
## 0.27619 0.30000 0.14286 0.28095
##
## nr method
## 5 iterations, 0h:0m:0s
## g'(-H)^-1g = 0.000221
## successive function values within tolerance limits
##
## Coefficients :
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept):train -1.8533538 0.3700925 -5.0078 5.505e-07 ***
## (Intercept):bus -2.5656173 0.3843251 -6.6756 2.461e-11 ***
## (Intercept):car -5.7763487 0.6559187 -8.8065 < 2.2e-16 ***
## TTME -0.0970904 0.0104351 -9.3042 < 2.2e-16 ***
## GC -0.0157837 0.0043828 -3.6013 0.0003166 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-Likelihood: -199.98
## McFadden R^2: 0.29526
## Likelihood ratio test : chisq = 167.56 (p.value = < 2.22e-16)