Contexto

El conjunto de datos es de la unidad de Nueva York y contiene 90 observaciones que incluyen los costoc de 6 aerolineas estadounidenses durante 15 años, de 1970 a 1984.

Las variables son: * I = Aerolinea * T = Year * Q = Salida * C = Total cost, in $1000 * PF = Fuel Price * LF = Load Factor, the average capacity utilization of the fleet

Fuente: Tabla F7.1

Instalar Paquetes y llamar Librerias

#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("gplots")
library(gplots)
## 
## Adjuntando el paquete: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
#install.packages("plm")
library(plm)
## 
## Adjuntando el paquete: 'plm'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
#install.packages("DataExplorer")
#library(DataExplorer)
#install.packages("forecast")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
#install.packages("lavaan")
library(lavaan)
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
#install.packages("lavaanPlot")
library(lavaanPlot)

Tema 1. Datos de Panel

Importar la Base de datos

df <- read.csv("~/vuelos.csv")

Entender la Base de datos

summary(df)
##        I             T            C                 Q          
##  Min.   :1.0   Min.   : 1   Min.   :  68978   Min.   :0.03768  
##  1st Qu.:2.0   1st Qu.: 4   1st Qu.: 292046   1st Qu.:0.14213  
##  Median :3.5   Median : 8   Median : 637001   Median :0.30503  
##  Mean   :3.5   Mean   : 8   Mean   :1122524   Mean   :0.54499  
##  3rd Qu.:5.0   3rd Qu.:12   3rd Qu.:1345968   3rd Qu.:0.94528  
##  Max.   :6.0   Max.   :15   Max.   :4748320   Max.   :1.93646  
##        PF                LF        
##  Min.   : 103795   Min.   :0.4321  
##  1st Qu.: 129848   1st Qu.:0.5288  
##  Median : 357434   Median :0.5661  
##  Mean   : 471683   Mean   :0.5605  
##  3rd Qu.: 849840   3rd Qu.:0.5947  
##  Max.   :1015610   Max.   :0.6763
str(df)
## 'data.frame':    90 obs. of  6 variables:
##  $ I : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ T : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ C : int  1140640 1215690 1309570 1511530 1676730 1823740 2022890 2314760 2639160 3247620 ...
##  $ Q : num  0.953 0.987 1.092 1.176 1.16 ...
##  $ PF: int  106650 110307 110574 121974 196606 265609 263451 316411 384110 569251 ...
##  $ LF: num  0.534 0.532 0.548 0.541 0.591 ...
head(df)
##   I T       C        Q     PF       LF
## 1 1 1 1140640 0.952757 106650 0.534487
## 2 1 2 1215690 0.986757 110307 0.532328
## 3 1 3 1309570 1.091980 110574 0.547736
## 4 1 4 1511530 1.175780 121974 0.540846
## 5 1 5 1676730 1.160170 196606 0.591167
## 6 1 6 1823740 1.173760 265609 0.575417
#create_report(df)
#plot_missing(df)
#plot_histogram(df)
#plot_correlation(df)

Revisar si tenemos Heterogeneidad

plotmeans(C ~ I, main="Heterogeneidad entre aerolineas", data=df)

## Creacion de Datos Panel

df1 <- pdata.frame(df, index = c("I", "T"))

Modelo 1. Regresion Agrupada (pooled)

pooled <- plm(C ~ Q+ PF + LF, data=df1, model = "pooling")
summary(pooled)
## Pooling Model
## 
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "pooling")
## 
## Balanced Panel: n = 6, T = 15, N = 90
## 
## Residuals:
##    Min. 1st Qu.  Median 3rd Qu.    Max. 
## -520654 -250270   37333  208690  849700 
## 
## Coefficients:
##                Estimate  Std. Error t-value  Pr(>|t|)    
## (Intercept)  1.1586e+06  3.6059e+05  3.2129   0.00185 ** 
## Q            2.0261e+06  6.1807e+04 32.7813 < 2.2e-16 ***
## PF           1.2253e+00  1.0372e-01 11.8138 < 2.2e-16 ***
## LF          -3.0658e+06  6.9633e+05 -4.4027 3.058e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1.2647e+14
## Residual Sum of Squares: 6.8177e+12
## R-Squared:      0.94609
## Adj. R-Squared: 0.94421
## F-statistic: 503.118 on 3 and 86 DF, p-value: < 2.22e-16

Modelo 2. Efectos Fijos (Within)

within <- plm(C ~ Q+ PF + LF, data=df1, model = "within")
summary(within)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "within")
## 
## Balanced Panel: n = 6, T = 15, N = 90
## 
## Residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -551783 -159259    1796       0  137226  499296 
## 
## Coefficients:
##       Estimate  Std. Error t-value  Pr(>|t|)    
## Q   3.3190e+06  1.7135e+05 19.3694 < 2.2e-16 ***
## PF  7.7307e-01  9.7319e-02  7.9437 9.698e-12 ***
## LF -3.7974e+06  6.1377e+05 -6.1869 2.375e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    5.0776e+13
## Residual Sum of Squares: 3.5865e+12
## R-Squared:      0.92937
## Adj. R-Squared: 0.92239
## F-statistic: 355.254 on 3 and 81 DF, p-value: < 2.22e-16

Prueba F (Modelo 1 vs Modelo 2) (pooled)

pFtest(within,pooled)
## 
##  F test for individual effects
## 
## data:  C ~ Q + PF + LF
## F = 14.595, df1 = 5, df2 = 81, p-value = 3.467e-10
## alternative hypothesis: significant effects

Modelo 3. Efectos Aleatorios (Walhus)

walhus <- plm(C ~ Q+ PF + LF, data=df1, random.method = "walhus", model="random")
summary(walhus)
## Oneway (individual) effect Random Effect Model 
##    (Wallace-Hussain's transformation)
## 
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random", 
##     random.method = "walhus")
## 
## Balanced Panel: n = 6, T = 15, N = 90
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 7.339e+10 2.709e+05 0.969
## individual    2.363e+09 4.861e+04 0.031
## theta: 0.1788
## 
## Residuals:
##    Min. 1st Qu.  Median 3rd Qu.    Max. 
## -524180 -243611   39332  199517  824905 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept)  1.1267e+06  3.6994e+05  3.0455  0.002323 ** 
## Q            2.0647e+06  7.1927e+04 28.7051 < 2.2e-16 ***
## PF           1.2075e+00  1.0358e-01 11.6578 < 2.2e-16 ***
## LF          -3.0314e+06  7.1431e+05 -4.2438 2.198e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1.0182e+14
## Residual Sum of Squares: 6.5784e+12
## R-Squared:      0.93539
## Adj. R-Squared: 0.93314
## Chisq: 1245.09 on 3 DF, p-value: < 2.22e-16

Modelo 3. Efectos Aleatorios (amemiya)

amemiya <- plm(C ~ Q+ PF + LF, data=df1, random.method = "amemiya", model="random")
summary(amemiya)
## Oneway (individual) effect Random Effect Model 
##    (Amemiya's transformation)
## 
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random", 
##     random.method = "amemiya")
## 
## Balanced Panel: n = 6, T = 15, N = 90
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 4.270e+10 2.066e+05 0.084
## individual    4.640e+11 6.812e+05 0.916
## theta: 0.9219
## 
## Residuals:
##    Min. 1st Qu.  Median 3rd Qu.    Max. 
## -603585 -144415   22641  158005  485417 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept)  1.0746e+06  4.2105e+05  2.5522    0.0107 *  
## Q            3.2090e+06  1.6482e+05 19.4695 < 2.2e-16 ***
## PF           8.1014e-01  9.6147e-02  8.4260 < 2.2e-16 ***
## LF          -3.7168e+06  6.1330e+05 -6.0603 1.359e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    5.1238e+13
## Residual Sum of Squares: 3.8227e+12
## R-Squared:      0.92539
## Adj. R-Squared: 0.92279
## Chisq: 1066.71 on 3 DF, p-value: < 2.22e-16

Modelo 3. Efectos Aleatorios (Nerlove)

nerlove <- plm(C ~ Q+ PF + LF, data=df1, random.method = "nerlove", model="random")
summary(nerlove)
## Oneway (individual) effect Random Effect Model 
##    (Nerlove's transformation)
## 
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random", 
##     random.method = "nerlove")
## 
## Balanced Panel: n = 6, T = 15, N = 90
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 3.985e+10 1.996e+05 0.066
## individual    5.602e+11 7.485e+05 0.934
## theta: 0.9313
## 
## Residuals:
##    Min. 1st Qu.  Median 3rd Qu.    Max. 
## -601947 -145039   18713  154903  483623 
## 
## Coefficients:
##                Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept)  1.0752e+06  4.4535e+05  2.4142   0.01577 *  
## Q            3.2323e+06  1.6521e+05 19.5652 < 2.2e-16 ***
## PF           8.0229e-01  9.5804e-02  8.3743 < 2.2e-16 ***
## LF          -3.7338e+06  6.0963e+05 -6.1247 9.084e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    5.1133e+13
## Residual Sum of Squares: 3.7726e+12
## R-Squared:      0.92622
## Adj. R-Squared: 0.92365
## Chisq: 1079.63 on 3 DF, p-value: < 2.22e-16

Prueba F (Walhus vs within) (pooled)

phtest(walhus, within)
## 
##  Hausman Test
## 
## data:  C ~ Q + PF + LF
## chisq = 65.039, df = 3, p-value = 4.919e-14
## alternative hypothesis: one model is inconsistent

Como nos da 4.919e-14, nos quedamos con within ya que si el P-Value es <0.05, usamos efectos fijos (within)

Tema 2. Series de Tiempo

Generar la Serie de Tiempo

df2 <- df %>%group_by(T) %>%summarise("Cost"= sum(C))
ts <- ts(data=df2$Cost, start=1970, frequency=1)

Generamos el Modelo ARIMA

arima <- auto.arima(ts)
summary(arima)
## Series: ts 
## ARIMA(0,2,1) 
## 
## Coefficients:
##          ma1
##       0.6262
## s.e.  0.2198
## 
## sigma^2 = 9.087e+10:  log likelihood = -182.19
## AIC=368.37   AICc=369.57   BIC=369.5
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE    MAPE      MASE        ACF1
## Training set 27996.87 269624.3 201889.4 0.7953103 2.71744 0.2597085 -0.06184266

Generamos el Pronóstico

pronostico <- forecast(arima, level=95, h=5)
pronostico
##      Point Forecast    Lo 95    Hi 95
## 1985       14087526 13496696 14678356
## 1986       14990145 13329820 16650471
## 1987       15892764 12881265 18904264
## 1988       16795384 12198346 21392421
## 1989       17698003 11310993 24085012
plot(pronostico, main="Costos Totales de las Aerolineas")

Tema 3. Modelos de Ecuaciones Estructurales

Generar el modelo

modelo  <- ' #Regresiones
             Q ~ LF
             C ~ I + T + PF + LF 
             PF ~ T
             LF ~ PF + I
             #Variables Latentes
             # Varianzas y Covarianzas 
             # Intercepto
             '

Generar su diagrama

df3 <- scale(df)
df4 <- cfa(modelo,df3)
summary(df4)
## lavaan 0.6-19 ended normally after 38 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        13
## 
##   Number of observations                            90
## 
## Model Test User Model:
##                                                       
##   Test statistic                               166.924
##   Degrees of freedom                                 5
##   P-value (Chi-square)                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Q ~                                                 
##     LF                0.425    0.095    4.462    0.000
##   C ~                                                 
##     I                 0.105    0.025    4.158    0.000
##     T                 0.140    0.063    2.211    0.027
##     PF                0.194    0.065    2.986    0.003
##     LF                0.271    0.100    2.726    0.006
##   PF ~                                                
##     T                 0.931    0.038   24.233    0.000
##   LF ~                                                
##     PF                0.491    0.085    5.812    0.000
##     I                -0.346    0.085   -4.099    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##  .Q ~~                                                
##    .C                 0.811    0.123    6.612    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Q                 0.810    0.121    6.708    0.000
##    .C                 0.859    0.128    6.708    0.000
##    .PF                0.131    0.020    6.708    0.000
##    .LF                0.636    0.095    6.708    0.000
lavaanPlot(df4, coef=TRUE, cov=TRUE)