library(readxl) #Library used to import excel files
library(tidyverse) # Pack of most used libraries
library(skimr) # Library used for providing a summary of the data
library(DataExplorer)
library(corrplot) # Library used for correlation plots
library(car) # Library used for testing autocorrelation (Durbin Watson)
library(olsrr)dataset <- read_excel("TDM_Class3_MLR_Chicago_Example.xls")
class(dataset)## [1] "tbl_df" "tbl" "data.frame"
df <- data.frame(dataset)skim(df)| Name | df |
| Number of rows | 57 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| TODU | 0 | 1 | 5.37 | 1.33 | 3.02 | 4.54 | 5.10 | 6.13 | 9.14 | ▃▇▅▃▁ |
| ACO | 0 | 1 | 0.81 | 0.18 | 0.50 | 0.67 | 0.79 | 0.92 | 1.32 | ▆▇▇▃▁ |
| AHS | 0 | 1 | 3.19 | 0.39 | 1.83 | 3.00 | 3.19 | 3.37 | 4.50 | ▁▂▇▂▁ |
| SI | 0 | 1 | 13.07 | 12.19 | 2.17 | 6.82 | 9.86 | 15.08 | 62.53 | ▇▂▁▁▁ |
| SRI | 0 | 1 | 49.56 | 15.84 | 20.89 | 38.14 | 49.37 | 60.85 | 87.38 | ▅▆▇▅▂ |
| UI | 0 | 1 | 52.62 | 13.46 | 24.08 | 44.80 | 55.51 | 61.09 | 83.66 | ▃▅▇▅▁ |
summary(df)## TODU ACO AHS SI
## Min. :3.020 Min. :0.5000 Min. :1.830 Min. : 2.17
## 1st Qu.:4.540 1st Qu.:0.6700 1st Qu.:3.000 1st Qu.: 6.82
## Median :5.100 Median :0.7900 Median :3.190 Median : 9.86
## Mean :5.373 Mean :0.8118 Mean :3.185 Mean :13.07
## 3rd Qu.:6.130 3rd Qu.:0.9200 3rd Qu.:3.370 3rd Qu.:15.08
## Max. :9.140 Max. :1.3200 Max. :4.500 Max. :62.53
## SRI UI
## Min. :20.89 Min. :24.08
## 1st Qu.:38.14 1st Qu.:44.80
## Median :49.37 Median :55.51
## Mean :49.56 Mean :52.62
## 3rd Qu.:60.85 3rd Qu.:61.09
## Max. :87.38 Max. :83.66
# Linear relation: if the independent variables have linear relation with the dependent variable
par(mfrow = c(2,3))
plot(x = df$TODU, y = df$ACO, xlab = "TODU", ylab = "ACO")
plot(x = df$TODU, y = df$AHS, xlab = "TODU", ylab = "AHS")
plot(x = df$TODU, y = df$SI, xlab = "TODU", ylab = "SI")
plot(x = df$TODU, y = df$SRI, xlab = "TODU", ylab = "SRI")
plot(x = df$TODU, y = df$UI, xlab = "TODU", ylab = "UI")
# or pairwise scatterplot matrix, that compares every variable with each other
pairs(df[,1:6], pch = 19, lower.panel = NULL)# Normal distribution of dependent variable
# If the sample is smaller than 2000 observations, use Shapiro-Wilk test
shapiro.test(df$TODU) # p value > 0.05, accept null hypothesis normal distribution ##
## Shapiro-Wilk normality test
##
## data: df$TODU
## W = 0.96816, p-value = 0.1377
# if not, use the Kolmogorov-Smirnov test (only for sample > 2,000)
ks.test(df$TODU, "pnorm", mean=mean(df$TODU), sd = sd(df$TODU))## Warning in ks.test(df$TODU, "pnorm", mean = mean(df$TODU), sd = sd(df$TODU)):
## ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: df$TODU
## D = 0.12231, p-value = 0.3612
## alternative hypothesis: two-sided
model <- lm(TODU ~ ACO + AHS + SI + SRI + UI, data = df)
summary(model)##
## Call:
## lm(formula = TODU ~ ACO + AHS + SI + SRI + UI, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4771 -0.3842 -0.0262 0.4116 2.0806
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.817367 2.208380 1.276 0.207820
## ACO 3.646707 0.956500 3.813 0.000372 ***
## AHS 0.323673 0.412119 0.785 0.435860
## SI 0.005325 0.009279 0.574 0.568550
## SRI 0.008135 0.008804 0.924 0.359783
## UI -0.036264 0.013330 -2.720 0.008894 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7554 on 51 degrees of freedom
## Multiple R-squared: 0.7042, Adjusted R-squared: 0.6752
## F-statistic: 24.28 on 5 and 51 DF, p-value: 2.04e-12
# Residuals
par(mfrow=c(2,2))
plot(model)# Autocorrelation
durbinWatsonTest(model)## lag Autocorrelation D-W Statistic p-value
## 1 0.1416308 1.597747 0.062
## Alternative hypothesis: rho != 0
# Multicollinearity
ols_vif_tol(model)## Variables Tolerance VIF
## 1 ACO 0.3528890 2.833752
## 2 AHS 0.3963709 2.522889
## 3 SI 0.7968916 1.254876
## 4 SRI 0.5236950 1.909508
## 5 UI 0.3165801 3.158758
ols_eigen_cindex(model)## Eigenvalue Condition Index intercept ACO AHS
## 1 5.386537577 1.000000 6.994331e-05 0.0005136938 0.0001916512
## 2 0.444466338 3.481252 6.484243e-05 0.0026682253 0.0001278701
## 3 0.084386209 7.989491 5.829055e-05 0.0478676279 0.0091615336
## 4 0.073784878 8.544195 1.355679e-03 0.0031699136 0.0100934045
## 5 0.009322827 24.037043 5.414145e-03 0.7943557055 0.2105218176
## 6 0.001502171 59.881840 9.930371e-01 0.1514248340 0.7699037229
## SI SRI UI
## 1 0.007888051 0.001515333 6.216297e-04
## 2 0.693175876 0.006641788 7.488285e-07
## 3 0.051400736 0.055585833 1.128114e-01
## 4 0.152292605 0.382488929 4.801705e-02
## 5 0.090809203 0.374832118 1.851308e-01
## 6 0.004433528 0.178935999 6.534183e-01
ols_coll_diag(model)## Tolerance and Variance Inflation Factor
## ---------------------------------------
## Variables Tolerance VIF
## 1 ACO 0.3528890 2.833752
## 2 AHS 0.3963709 2.522889
## 3 SI 0.7968916 1.254876
## 4 SRI 0.5236950 1.909508
## 5 UI 0.3165801 3.158758
##
##
## Eigenvalue and Condition Index
## ------------------------------
## Eigenvalue Condition Index intercept ACO AHS
## 1 5.386537577 1.000000 6.994331e-05 0.0005136938 0.0001916512
## 2 0.444466338 3.481252 6.484243e-05 0.0026682253 0.0001278701
## 3 0.084386209 7.989491 5.829055e-05 0.0478676279 0.0091615336
## 4 0.073784878 8.544195 1.355679e-03 0.0031699136 0.0100934045
## 5 0.009322827 24.037043 5.414145e-03 0.7943557055 0.2105218176
## 6 0.001502171 59.881840 9.930371e-01 0.1514248340 0.7699037229
## SI SRI UI
## 1 0.007888051 0.001515333 6.216297e-04
## 2 0.693175876 0.006641788 7.488285e-07
## 3 0.051400736 0.055585833 1.128114e-01
## 4 0.152292605 0.382488929 4.801705e-02
## 5 0.090809203 0.374832118 1.851308e-01
## 6 0.004433528 0.178935999 6.534183e-01
Filipe Moura, Gabriel Valença, Miguel Costa, Carlos Roque, & Rosa Félix. (2021, March). U-Shift/Transport-Demand-Modelling: Supporting materials to Transportation Demand Modelling classes at Instituto Superior Técnico - University of Lisbon. (Version 2021.0). GitHub. http://doi.org/10.5281/zenodo.4599525