Evidencia_EM_A01198018

library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(readxl)
library(wooldridge)
library(car)

## Loading required package: carData

library(dreamerr)

OccupancyData<-read_xlsx("FZM01_Datos_SP_eng21.xlsx",sheet = "Occupancy")
GasData<-read_xlsx("FZM01_Datos_SP_eng21.xlsx",sheet = "Gasoline")

GasProyection<-read_xlsx("FZM01_Proy_SP_eng21.xlsx",sheet = "Gasoline")
OccupancyProjection<-read_xlsx("FZM01_Proy_SP_eng21.xlsx",sheet = "Occupancy")

MODELO DEL HOTEL

YearD<-OccupancyData$Year
HOD<-OccupancyData$`Hotel Occupancy`
TGDPD<-OccupancyData$`Turism GDP`
NTD<-OccupancyData$`National Tourists`
ITD<-OccupancyData$`International Tourists`
ASD<-OccupancyData$`Average Stay`

OD_M1<-lm(HOD~YearD+TGDPD+NTD+ITD+ASD)
summary(OD_M1)

## 
## Call:
## lm(formula = HOD ~ YearD + TGDPD + NTD + ITD + ASD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6576 -1.3662 -0.7917  1.7653  5.5615 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.619e+03  1.765e+03   0.917   0.3787  
## YearD       -8.185e-01  8.956e-01  -0.914   0.3804  
## TGDPD        5.256e-01  4.549e-01   1.155   0.2724  
## NTD         -3.029e-05  3.847e-05  -0.787   0.4477  
## ITD          2.215e-05  1.037e-05   2.135   0.0560 .
## ASD          8.961e+00  3.379e+00   2.652   0.0225 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.13 on 11 degrees of freedom
## Multiple R-squared:  0.8278, Adjusted R-squared:  0.7496 
## F-statistic: 10.58 on 5 and 11 DF,  p-value: 0.0006529

We can observe that the model is significant as a whole, but there are several variables that are non significant. We can also suspect multicollinarity because the r-squared is greater than 80% and we have more that one non significant variable.

bptest(OD_M1)

## 
##  studentized Breusch-Pagan test
## 
## data:  OD_M1
## BP = 8.5484, df = 5, p-value = 0.1285

Since the p-value in the bp test is greater than .05 we can conclude that the model generated is homoskedastic.

vif(OD_M1)

##     YearD     TGDPD       NTD       ITD       ASD 
## 33.394838 46.501331  7.019607 10.850695  4.158417

Checking the VIF values we can confirm that 3 variables are causing multicollinarity because they are greater than 10.

jarque.bera.test(residuals(OD_M1))

## 
##  Jarque Bera Test
## 
## data:  residuals(OD_M1)
## X-squared = 0.77922, df = 2, p-value = 0.6773

Since the p-value is greater than 0.05 with a 95% confidence level we can say that the residuals are normally distributed

MODELO DEL HOTEL CORRECTO log_NTD<-log(NTD) log_ASD<-log(ASD) log_HOD<-log(HOD)

OD_M2<-lm(HOD~NTD+ASD-1)
summary(OD_M2)

## 
## Call:
## lm(formula = HOD ~ NTD + ASD - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.3551  -2.8794   0.2265   2.8344  12.2412 
## 
## Coefficients:
##      Estimate Std. Error t value Pr(>|t|)    
## NTD 9.063e-05  2.299e-05   3.943   0.0013 ** 
## ASD 1.173e+01  1.035e+00  11.338 9.35e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.636 on 15 degrees of freedom
## Multiple R-squared:  0.992,  Adjusted R-squared:  0.9909 
## F-statistic: 930.1 on 2 and 15 DF,  p-value: < 2.2e-16

Eliminating the variables that caused multicollinarity we an observe that the model as al whole is statistically significant and all the variables are statistically significant as well.

bptest(OD_M2)

## 
##  studentized Breusch-Pagan test
## 
## data:  OD_M2
## BP = 5.1834, df = 1, p-value = 0.0228

By redoing the bp test we can observe that the model had becomed heteroskedastic, but if we resolve for that by using logs it becomes statistically insignificant as a whole and the variables as individuals too.

vif(OD_M2)

## Warning in vif.default(OD_M2): No intercept: vifs may not be sensible.

##      NTD      ASD 
## 8.165018 8.165018

When we check for VIF we found that we resolved the multicollinarity since none of the variables are greater than 10.

jarque.bera.test(residuals(OD_M2))

## 
##  Jarque Bera Test
## 
## data:  residuals(OD_M2)
## X-squared = 0.22177, df = 2, p-value = 0.895

When checking the Jarque Bera Test we can conclude that the residuals remain normally distributed since the p-value is greater than 0.05.

Gasoline model

YearG<-GasData$Year
SalesG<-GasData$Sales
PriceG<-GasData$Price
DemandG<-GasData$Demand
VFG<-GasData$`Vehicle Fleet`
OGDPG<-GasData$`Oil GDP`
GD_M1<-lm(SalesG~YearG+PriceG+DemandG+VFG+OGDPG)
summary(GD_M1)

## 
## Call:
## lm(formula = SalesG ~ YearG + PriceG + DemandG + VFG + OGDPG)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2061541  -385887   119287   448208  1655753 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -7.513e+07  5.905e+08  -0.127  0.90057   
## YearG        3.981e+04  2.972e+05   0.134  0.89534   
## PriceG      -6.226e+04  2.336e+05  -0.267  0.79371   
## DemandG     -6.922e+04  6.830e+04  -1.013  0.32802   
## VFG          3.346e-01  1.425e-01   2.348  0.03411 * 
## OGDPG        1.129e+01  3.114e+00   3.625  0.00276 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 963700 on 14 degrees of freedom
## Multiple R-squared:  0.9019, Adjusted R-squared:  0.8669 
## F-statistic: 25.74 on 5 and 14 DF,  p-value: 1.361e-06

bptest(GD_M1)

## 
##  studentized Breusch-Pagan test
## 
## data:  GD_M1
## BP = 8.308, df = 5, p-value = 0.1401

We can observe that the model is homoskedastic since the p-value is greater than 0.05

jarque.bera.test(residuals(GD_M1))

## 
##  Jarque Bera Test
## 
## data:  residuals(GD_M1)
## X-squared = 1.1663, df = 2, p-value = 0.5581

Since the p-value is greater than 0.05 with a 95% confidence level we can say that the residuals are normally distributed

vif(GD_M1)

##     YearG    PriceG   DemandG       VFG     OGDPG 
## 63.232537 27.433889  2.824366 10.417611  3.179085

With the VIF test we can conclude that the model suffers from multicollinarity since 3 variables are greater than 10.

Correct Model

GD_M2<-lm(SalesG~OGDPG)
summary(GD_M2)

## 
## Call:
## lm(formula = SalesG ~ OGDPG)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2376705  -650909  -419154   618295  3706546 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.951e+06  2.112e+06   2.818   0.0114 *  
## OGDPG       1.774e+01  2.587e+00   6.859 2.04e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1427000 on 18 degrees of freedom
## Multiple R-squared:  0.7233, Adjusted R-squared:  0.7079 
## F-statistic: 47.05 on 1 and 18 DF,  p-value: 2.037e-06

We observe that the model is statistically significant as a whole and the variable too. Since the r-squared is less than .8 and all the variables are statistically significant we do not suspect multicollinarity

bptest(GD_M2)

## 
##  studentized Breusch-Pagan test
## 
## data:  GD_M2
## BP = 0.0052626, df = 1, p-value = 0.9422

The model remains homoskedastic since the p-value is greater than 0.05

jarque.bera.test(residuals(GD_M2))

## 
##  Jarque Bera Test
## 
## data:  residuals(GD_M2)
## X-squared = 5.2457, df = 2, p-value = 0.07259

We can conclude that the residuals of the model are normally distributed because the chi squared is greater than 0.05