library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(readxl)
library(wooldridge)
library(car)
## Loading required package: carData
library(dreamerr)
OccupancyData<-read_xlsx("FZM01_Datos_SP_eng21.xlsx",sheet = "Occupancy")
GasData<-read_xlsx("FZM01_Datos_SP_eng21.xlsx",sheet = "Gasoline")
GasProyection<-read_xlsx("FZM01_Proy_SP_eng21.xlsx",sheet = "Gasoline")
OccupancyProjection<-read_xlsx("FZM01_Proy_SP_eng21.xlsx",sheet = "Occupancy")
MODELO DEL HOTEL
YearD<-OccupancyData$Year
HOD<-OccupancyData$`Hotel Occupancy`
TGDPD<-OccupancyData$`Turism GDP`
NTD<-OccupancyData$`National Tourists`
ITD<-OccupancyData$`International Tourists`
ASD<-OccupancyData$`Average Stay`
OD_M1<-lm(HOD~YearD+TGDPD+NTD+ITD+ASD)
summary(OD_M1)
##
## Call:
## lm(formula = HOD ~ YearD + TGDPD + NTD + ITD + ASD)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6576 -1.3662 -0.7917 1.7653 5.5615
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.619e+03 1.765e+03 0.917 0.3787
## YearD -8.185e-01 8.956e-01 -0.914 0.3804
## TGDPD 5.256e-01 4.549e-01 1.155 0.2724
## NTD -3.029e-05 3.847e-05 -0.787 0.4477
## ITD 2.215e-05 1.037e-05 2.135 0.0560 .
## ASD 8.961e+00 3.379e+00 2.652 0.0225 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.13 on 11 degrees of freedom
## Multiple R-squared: 0.8278, Adjusted R-squared: 0.7496
## F-statistic: 10.58 on 5 and 11 DF, p-value: 0.0006529
We can observe that the model is significant as a whole, but there are several variables that are non significant. We can also suspect multicollinarity because the r-squared is greater than 80% and we have more that one non significant variable.
bptest(OD_M1)
##
## studentized Breusch-Pagan test
##
## data: OD_M1
## BP = 8.5484, df = 5, p-value = 0.1285
Since the p-value in the bp test is greater than .05 we can conclude that the model generated is homoskedastic.
vif(OD_M1)
## YearD TGDPD NTD ITD ASD
## 33.394838 46.501331 7.019607 10.850695 4.158417
Checking the VIF values we can confirm that 3 variables are causing multicollinarity because they are greater than 10.
jarque.bera.test(residuals(OD_M1))
##
## Jarque Bera Test
##
## data: residuals(OD_M1)
## X-squared = 0.77922, df = 2, p-value = 0.6773
Since the p-value is greater than 0.05 with a 95% confidence level we can say that the residuals are normally distributed
MODELO DEL HOTEL CORRECTO log_NTD<-log(NTD) log_ASD<-log(ASD) log_HOD<-log(HOD)
OD_M2<-lm(HOD~NTD+ASD-1)
summary(OD_M2)
##
## Call:
## lm(formula = HOD ~ NTD + ASD - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.3551 -2.8794 0.2265 2.8344 12.2412
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## NTD 9.063e-05 2.299e-05 3.943 0.0013 **
## ASD 1.173e+01 1.035e+00 11.338 9.35e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.636 on 15 degrees of freedom
## Multiple R-squared: 0.992, Adjusted R-squared: 0.9909
## F-statistic: 930.1 on 2 and 15 DF, p-value: < 2.2e-16
Eliminating the variables that caused multicollinarity we an observe that the model as al whole is statistically significant and all the variables are statistically significant as well.
bptest(OD_M2)
##
## studentized Breusch-Pagan test
##
## data: OD_M2
## BP = 5.1834, df = 1, p-value = 0.0228
By redoing the bp test we can observe that the model had becomed heteroskedastic, but if we resolve for that by using logs it becomes statistically insignificant as a whole and the variables as individuals too.
vif(OD_M2)
## Warning in vif.default(OD_M2): No intercept: vifs may not be sensible.
## NTD ASD
## 8.165018 8.165018
When we check for VIF we found that we resolved the multicollinarity since none of the variables are greater than 10.
jarque.bera.test(residuals(OD_M2))
##
## Jarque Bera Test
##
## data: residuals(OD_M2)
## X-squared = 0.22177, df = 2, p-value = 0.895
When checking the Jarque Bera Test we can conclude that the residuals remain normally distributed since the p-value is greater than 0.05.
Gasoline model
YearG<-GasData$Year
SalesG<-GasData$Sales
PriceG<-GasData$Price
DemandG<-GasData$Demand
VFG<-GasData$`Vehicle Fleet`
OGDPG<-GasData$`Oil GDP`
GD_M1<-lm(SalesG~YearG+PriceG+DemandG+VFG+OGDPG)
summary(GD_M1)
##
## Call:
## lm(formula = SalesG ~ YearG + PriceG + DemandG + VFG + OGDPG)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2061541 -385887 119287 448208 1655753
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.513e+07 5.905e+08 -0.127 0.90057
## YearG 3.981e+04 2.972e+05 0.134 0.89534
## PriceG -6.226e+04 2.336e+05 -0.267 0.79371
## DemandG -6.922e+04 6.830e+04 -1.013 0.32802
## VFG 3.346e-01 1.425e-01 2.348 0.03411 *
## OGDPG 1.129e+01 3.114e+00 3.625 0.00276 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 963700 on 14 degrees of freedom
## Multiple R-squared: 0.9019, Adjusted R-squared: 0.8669
## F-statistic: 25.74 on 5 and 14 DF, p-value: 1.361e-06
We can observe that the model is significant as a whole, but there are several variables that are non significant. We can also suspect multicollinarity because the r-squared is greater than 80% and we have more that one non significant variable.
bptest(GD_M1)
##
## studentized Breusch-Pagan test
##
## data: GD_M1
## BP = 8.308, df = 5, p-value = 0.1401
We can observe that the model is homoskedastic since the p-value is greater than 0.05
jarque.bera.test(residuals(GD_M1))
##
## Jarque Bera Test
##
## data: residuals(GD_M1)
## X-squared = 1.1663, df = 2, p-value = 0.5581
Since the p-value is greater than 0.05 with a 95% confidence level we can say that the residuals are normally distributed
vif(GD_M1)
## YearG PriceG DemandG VFG OGDPG
## 63.232537 27.433889 2.824366 10.417611 3.179085
With the VIF test we can conclude that the model suffers from multicollinarity since 3 variables are greater than 10.
Correct Model
GD_M2<-lm(SalesG~OGDPG)
summary(GD_M2)
##
## Call:
## lm(formula = SalesG ~ OGDPG)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2376705 -650909 -419154 618295 3706546
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.951e+06 2.112e+06 2.818 0.0114 *
## OGDPG 1.774e+01 2.587e+00 6.859 2.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1427000 on 18 degrees of freedom
## Multiple R-squared: 0.7233, Adjusted R-squared: 0.7079
## F-statistic: 47.05 on 1 and 18 DF, p-value: 2.037e-06
We observe that the model is statistically significant as a whole and the variable too. Since the r-squared is less than .8 and all the variables are statistically significant we do not suspect multicollinarity
bptest(GD_M2)
##
## studentized Breusch-Pagan test
##
## data: GD_M2
## BP = 0.0052626, df = 1, p-value = 0.9422
The model remains homoskedastic since the p-value is greater than 0.05
jarque.bera.test(residuals(GD_M2))
##
## Jarque Bera Test
##
## data: residuals(GD_M2)
## X-squared = 5.2457, df = 2, p-value = 0.07259
We can conclude that the residuals of the model are normally distributed because the chi squared is greater than 0.05