if(!require(qdapRegex)) {install.packages("qdapRegex")}
if(!require(maxLik)) {install.packages("maxLik")}
if(!require(microbenchmark)) {install.packages("microbenchmark")}
if(!require(ggplot2)) {install.packages("ggplot2")}
if(!require(ggthemes)) {install.packages("ggthemes")}
balt <- read.csv(file.choose(), header = T)
formula <- trips ~ dk + ee + fi + lit + lv + pl + ruc + se + male + hinc + bocc
pois <- PoissonModel(formula, data = balt)
summary(pois)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.097 0.033 -2.948 0.003***
## dk 1.551 0.032 49.043 0.000***
## ee 0.470 0.038 12.446 0.000***
## fi 1.147 0.033 34.871 0.000***
## lit 0.458 0.039 11.807 0.000***
## lv 0.889 0.036 24.932 0.000***
## pl -0.039 0.042 -0.924 0.355
## ruc -0.894 0.056 -15.898 0.000***
## se 1.536 0.032 47.593 0.000***
## male 0.132 0.013 10.263 0.000***
## hinc 0.089 0.006 14.478 0.000***
## bocc 0.495 0.020 24.280 0.000***
##
## Symbols ***,**,* denote significance at 1%, 5% and 10% respectively
##
## N.Obs = 8 893
pois2 <- PoissonModel(trips ~ de + pl + hinc, data = balt)
pois3 <- PoissonModel(trips ~ de+ hinc + pinc,data = balt)
pois4 <- PoissonModel(trips ~ de + pl + hinc + pinc,data = balt)
AIC(pois2, pois4, pois3, k = 3)
##
## Information criteria: comparison between models
## Depvar Expvars AIC BIC LogLik
## 1 trips de + pl + hinc 102461.29 102485.66 -51224.65
## 2 trips de + pl + hinc + pinc 102352.31* 102382.78* -51168.66
## 3 trips de + hinc + pinc 103621.75 103646.12 -51804.87
##
## Best fitted model is marked with '*'
balt$ldist <- log(balt$dist)
zeroinformula <- trips ~ ldist
zip <- PoissonModel(formula, data = balt, zeroinfl = zeroinformula)
summary(zip)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.567 0.033 46.862 0.000***
## dk 0.635 0.032 19.985 0.000***
## ee -0.556 0.039 -14.389 0.000***
## fi 0.425 0.033 12.847 0.000***
## lit -0.071 0.039 -1.816 0.069*
## lv 0.068 0.036 1.898 0.058*
## pl -0.375 0.042 -8.892 0.000***
## ruc 0.253 0.057 4.440 0.000***
## se 0.573 0.033 17.617 0.000***
## male 0.156 0.013 12.022 0.000***
## hinc -0.019 0.007 -2.868 0.004***
## bocc 0.291 0.020 14.246 0.000***
## infl_(Intercept) -6.131 0.156 -39.230 0.000***
## infl_ldist 1.312 0.030 43.307 0.000***
##
## Symbols ***,**,* denote significance at 1%, 5% and 10% respectively
##
## N.Obs = 8 893
VuongTest(zip,pois)
## H0: no difference between zip and pois.
##
## V >> 0 favours zip.
## V << 0 favours pois.
##
## V = 15.197
## P-value = 0 indicates that H0 should be rejected (at 5% significance).
balt_trunc <- balt[balt$trips!=0,]
ptr <- PoissonModel(formula, data = balt_trunc, zerotrunc = TRUE)
summary(ptr)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.565 0.034 46.348 0.000***
## dk 0.640 0.032 19.946 0.000***
## ee -0.577 0.040 -14.576 0.000***
## fi 0.429 0.033 12.859 0.000***
## lit -0.071 0.040 -1.804 0.071*
## lv 0.071 0.036 1.951 0.051*
## pl -0.393 0.043 -9.068 0.000***
## ruc 0.259 0.057 4.568 0.000***
## se 0.579 0.033 17.626 0.000***
## male 0.158 0.013 12.155 0.000***
## hinc -0.021 0.007 -3.065 0.002***
## bocc 0.293 0.021 14.290 0.000***
##
## Symbols ***,**,* denote significance at 1%, 5% and 10% respectively
##
## N.Obs = 3 802
cep <- PoissonModel(formula, data = balt, censored = c(0,400))
summary(cep)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.574 0.034 46.071 0.000***
## dk 0.633 0.032 19.548 0.000***
## ee -0.526 0.039 -13.625 0.000***
## fi 0.423 0.034 12.567 0.000***
## lit -0.070 0.040 -1.758 0.079*
## lv 0.068 0.037 1.862 0.063*
## pl -0.369 0.043 -8.647 0.000***
## ruc 0.254 0.058 4.417 0.000***
## se 0.572 0.033 17.293 0.000***
## male 0.154 0.013 11.967 0.000***
## hinc -0.020 0.007 -3.070 0.002***
## bocc 0.286 0.020 14.056 0.000***
##
## Symbols ***,**,* denote significance at 1%, 5% and 10% respectively
##
## N.Obs = 8 893
nb <- NBinomialModel(formula, data = balt)
summary(nb)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.268 0.105 -2.563 0.010**
## dk 1.588 0.101 15.676 0.000***
## ee 0.602 0.110 5.494 0.000***
## fi 1.143 0.102 11.192 0.000***
## lit 0.583 0.112 5.222 0.000***
## lv 1.015 0.110 9.206 0.000***
## pl 0.076 0.108 0.704 0.482
## ruc -0.806 0.116 -6.928 0.000***
## se 1.479 0.105 14.094 0.000***
## male 0.037 0.050 0.742 0.458
## hinc 0.178 0.029 6.155 0.000***
## bocc 0.518 0.096 5.370 0.000***
## lna_(Intercept) 1.514 0.021 70.540 0.000***
##
## Symbols ***,**,* denote significance at 1%, 5% and 10% respectively
##
## N.Obs = 8 893
overdformula <- trips ~ se + lv
nbo <- NBinomialModel(formula, data = balt, overdispersion = overdformula)
summary(nbo)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.271 0.106 -2.552 0.011**
## dk 1.591 0.107 14.909 0.000***
## ee 0.606 0.115 5.290 0.000***
## fi 1.143 0.107 10.639 0.000***
## lit 0.581 0.116 5.004 0.000***
## lv 1.011 0.111 9.103 0.000***
## pl 0.075 0.113 0.662 0.508
## ruc -0.809 0.121 -6.687 0.000***
## se 1.484 0.099 14.984 0.000***
## male 0.062 0.050 1.245 0.213
## hinc 0.172 0.028 6.126 0.000***
## bocc 0.512 0.096 5.319 0.000***
## lna_(Intercept) 1.630 0.026 63.355 0.000***
## lna_se -0.558 0.056 -9.919 0.000***
## lna_lv -0.178 0.066 -2.714 0.007***
##
## Symbols ***,**,* denote significance at 1%, 5% and 10% respectively
##
## N.Obs = 8 893
ZINB<-NBinomialModel(trips ~ de + pl + hinc + pinc + ldist + bocc + age + male + hhsize + tc_km, data = balt, zeroinfl = trips ~ ldist)
summary(ZINB)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.207 0.123 26.122 0.000***
## de 0.512 0.095 5.376 0.000***
## pl 0.098 0.096 1.024 0.306
## hinc 0.256 0.019 13.204 0.000***
## pinc 0.000 0.000 -8.860 0.000***
## ldist -0.419 0.020 -20.671 0.000***
## bocc 0.173 0.081 2.127 0.033**
## age -0.084 0.015 -5.621 0.000***
## male -0.001 0.044 -0.027 0.979
## hhsize -0.119 0.019 -6.340 0.000***
## tc_km -0.007 0.001 -5.154 0.000***
## lna_(Intercept) 0.751 0.029 26.297 0.000***
## infl_(Intercept) -8.053 0.373 -21.616 0.000***
## infl_ldist 1.384 0.064 21.700 0.000***
##
## Symbols ***,**,* denote significance at 1%, 5% and 10% respectively
##
## N.Obs = 8 893
VuongTest(nbo,ZINB)
## H0: no difference between nbo and ZINB.
##
## V >> 0 favours nbo.
## V << 0 favours ZINB.
##
## V = -17.349
## P-value = 0 indicates that H0 should be rejected (at 5% significance).
Porównano szybkość wykonywania kodu pomiędzy analitycznym wyznaczeniem Hesjanu a numerycznym za pomocą funkcji maxBFGS
microbenchmark(PoissonModel(formula, data = balt)
,PoissonModel(formula, data = balt, czyH = TRUE))
## Unit: milliseconds
## expr min lq
## PoissonModel(formula, data = balt) 330.7759 340.2920
## PoissonModel(formula, data = balt, czyH = TRUE) 353.8737 362.6823
## mean median uq max neval
## 391.9871 349.9464 434.1787 903.9013 100
## 402.0490 368.0996 443.1520 1055.0617 100