# install.packages("tidyverse")
library(tidyverse)
# install.packages("gplots")
library(gplots)
# install.packages("plm")
library(plm)
# install.packages("DataExplorer")
library(DataExplorer)
# install.packages("forecast")
library(forecast)
# install.packages("lavaan")
library(lavaan)
# install.packages("lavaanPlot")
library(lavaanPlot)
El conjunto de datos es de la Universidad de Nueva York y contiene 90 observaciones que incluyen los costos de 6 aerolineas estadounidenses durante 15 años, de 1970 a 1984.
Las variables son:
Fuente:
Tabla
F7.1
df <- read.csv("C:\\Users\\raulc\\OneDrive\\Escritorio\\Generación\\datos.csv")
summary(df)
## I T C Q
## Min. :1.0 Min. : 1 Min. : 68978 Min. :0.03768
## 1st Qu.:2.0 1st Qu.: 4 1st Qu.: 292046 1st Qu.:0.14213
## Median :3.5 Median : 8 Median : 637001 Median :0.30503
## Mean :3.5 Mean : 8 Mean :1122524 Mean :0.54499
## 3rd Qu.:5.0 3rd Qu.:12 3rd Qu.:1345968 3rd Qu.:0.94528
## Max. :6.0 Max. :15 Max. :4748320 Max. :1.93646
## PF LF
## Min. : 103795 Min. :0.4321
## 1st Qu.: 129848 1st Qu.:0.5288
## Median : 357434 Median :0.5661
## Mean : 471683 Mean :0.5605
## 3rd Qu.: 849840 3rd Qu.:0.5947
## Max. :1015610 Max. :0.6763
str(df)
## 'data.frame': 90 obs. of 6 variables:
## $ I : int 1 1 1 1 1 1 1 1 1 1 ...
## $ T : int 1 2 3 4 5 6 7 8 9 10 ...
## $ C : int 1140640 1215690 1309570 1511530 1676730 1823740 2022890 2314760 2639160 3247620 ...
## $ Q : num 0.953 0.987 1.092 1.176 1.16 ...
## $ PF: int 106650 110307 110574 121974 196606 265609 263451 316411 384110 569251 ...
## $ LF: num 0.534 0.532 0.548 0.541 0.591 ...
# create_report(df) # Este reporte lo genera el paquete de Data Explorer
plot_missing(df) # Estas funciones extraen las gráficas del reporte
plot_histogram(df)
plot_correlation(df)
plotmeans(C ~ I, main="Hererogeneidad entre aerolineas", data=df)
plotmeans(C ~ T, main="Hererogeneidad entre años", data=df)
Como el valor promedio (círculo) y el rango intercuartil (líneas azules) varían entre individuos, ambas gráficas indican presencia de heterogeneidad.
df1 <- pdata.frame(df,index=c("I","T"))
pooled1 <- plm(C ~ Q + PF + LF, data=df1, model="pooling")
summary(pooled1)
## Pooling Model
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "pooling")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -520654 -250270 37333 208690 849700
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.1586e+06 3.6059e+05 3.2129 0.00185 **
## Q 2.0261e+06 6.1807e+04 32.7813 < 2.2e-16 ***
## PF 1.2253e+00 1.0372e-01 11.8138 < 2.2e-16 ***
## LF -3.0658e+06 6.9633e+05 -4.4027 3.058e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.2647e+14
## Residual Sum of Squares: 6.8177e+12
## R-Squared: 0.94609
## Adj. R-Squared: 0.94421
## F-statistic: 503.118 on 3 and 86 DF, p-value: < 2.22e-16
Aunque el Modelo de Regresión Agrupada tiene p-values menores del 5% (asteriscos) y una R cuadrada/R cuadrada ajustada cercanas al 100%, no podemos confiar en este modelo ya que ignora por completo la heterogeneidad.
within1 <- plm(C ~ Q + PF + LF, data=df1, model="within")
summary(within1)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "within")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -551783 -159259 1796 0 137226 499296
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## Q 3.3190e+06 1.7135e+05 19.3694 < 2.2e-16 ***
## PF 7.7307e-01 9.7319e-02 7.9437 9.698e-12 ***
## LF -3.7974e+06 6.1377e+05 -6.1869 2.375e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.0776e+13
## Residual Sum of Squares: 3.5865e+12
## R-Squared: 0.92937
## Adj. R-Squared: 0.92239
## F-statistic: 355.254 on 3 and 81 DF, p-value: < 2.22e-16
pFtest(within1,pooled1)
##
## F test for individual effects
##
## data: C ~ Q + PF + LF
## F = 14.595, df1 = 5, df2 = 81, p-value = 3.467e-10
## alternative hypothesis: significant effects
H0: Significa que el Modelo de Regresión Ajustada es
consistente
H1: Significa que el Modelo de Efectos Fijos es consistente
Como la hipotesis alternativa tiene efecto significativo, el
modelo de efectos fijos es consistente.
walhus1 <- plm(C ~ Q + PF + LF, data=df1, model="random", random.method = "walhus")
summary(walhus1)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 7.339e+10 2.709e+05 0.969
## individual 2.363e+09 4.861e+04 0.031
## theta: 0.1788
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -524180 -243611 39332 199517 824905
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.1267e+06 3.6994e+05 3.0455 0.002323 **
## Q 2.0647e+06 7.1927e+04 28.7051 < 2.2e-16 ***
## PF 1.2075e+00 1.0358e-01 11.6578 < 2.2e-16 ***
## LF -3.0314e+06 7.1431e+05 -4.2438 2.198e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.0182e+14
## Residual Sum of Squares: 6.5784e+12
## R-Squared: 0.93539
## Adj. R-Squared: 0.93314
## Chisq: 1245.09 on 3 DF, p-value: < 2.22e-16
amemiya1 <- plm(C ~ Q + PF + LF, data=df1, model="random", random.method = "amemiya")
summary(amemiya1)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "amemiya")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 4.270e+10 2.066e+05 0.084
## individual 4.640e+11 6.812e+05 0.916
## theta: 0.9219
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -603585 -144415 22641 158005 485417
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.0746e+06 4.2105e+05 2.5522 0.0107 *
## Q 3.2090e+06 1.6482e+05 19.4695 < 2.2e-16 ***
## PF 8.1014e-01 9.6147e-02 8.4260 < 2.2e-16 ***
## LF -3.7168e+06 6.1330e+05 -6.0603 1.359e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.1238e+13
## Residual Sum of Squares: 3.8227e+12
## R-Squared: 0.92539
## Adj. R-Squared: 0.92279
## Chisq: 1066.71 on 3 DF, p-value: < 2.22e-16
nerlove1 <- plm(C ~ Q + PF + LF, data=df1, model="random", random.method = "nerlove")
summary(nerlove1)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "nerlove")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 3.985e+10 1.996e+05 0.066
## individual 5.602e+11 7.485e+05 0.934
## theta: 0.9313
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -601947 -145039 18713 154903 483623
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.0752e+06 4.4535e+05 2.4142 0.01577 *
## Q 3.2323e+06 1.6521e+05 19.5652 < 2.2e-16 ***
## PF 8.0229e-01 9.5804e-02 8.3743 < 2.2e-16 ***
## LF -3.7338e+06 6.0963e+05 -6.1247 9.084e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.1133e+13
## Residual Sum of Squares: 3.7726e+12
## R-Squared: 0.92622
## Adj. R-Squared: 0.92365
## Chisq: 1079.63 on 3 DF, p-value: < 2.22e-16
Por lo tanto, el mejor método para el modelo de efectos aleatorios es el walhus.
phtest(within1,walhus1)
##
## Hausman Test
##
## data: C ~ Q + PF + LF
## chisq = 65.039, df = 3, p-value = 4.919e-14
## alternative hypothesis: one model is inconsistent
Como la hipotesis alternativa muestra un modelo inconsistente revisamos los p-values (asteríscos) y el valor de R cuadrada/R cuadrada ajustada, por lo que el modelo de efectos aleatorios (walhus) es el seleccionado.
df2 <- df %>% group_by(T) %>% summarise("Cost" = mean(C) )
ts1 <- ts(data=df2$Cost, start = 1970, frequency=1)
arima1 <- auto.arima(ts1)
summary(arima1)
## Series: ts1
## ARIMA(0,2,1)
##
## Coefficients:
## ma1
## 0.6262
## s.e. 0.2198
##
## sigma^2 = 2.524e+09: log likelihood = -158.89
## AIC=321.79 AICc=322.99 BIC=322.92
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4666.146 44937.38 33648.23 0.7953103 2.71744 0.2597085 -0.06184266
pronostico1 <- forecast(arima1, level=95, h=5)
pronostico1
## Point Forecast Lo 95 Hi 95
## 1985 2347921 2249449 2446393
## 1986 2498358 2221637 2775078
## 1987 2648794 2146878 3150711
## 1988 2799231 2033058 3565404
## 1989 2949667 1885166 4014169
plot(pronostico1, main="Costos en Aerolineas")
modelo1 <- ' # Regresiones
Q ~ LF
C ~ I + T + Q + PF + LF
PF ~ T
LF ~ I + T
# Variables Latentes
# Varianzas y Covarianzas
# Intercepto
'
df3 <- scale(df)
fit1 <- cfa(modelo1, df3)
summary(fit1)
## lavaan 0.6.17 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 90
##
## Model Test User Model:
##
## Test statistic 148.535
## Degrees of freedom 5
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## Q ~
## LF 0.425 0.095 4.452 0.000
## C ~
## I 0.105 0.026 4.101 0.000
## T 0.140 0.066 2.116 0.034
## Q 1.000 0.026 39.152 0.000
## PF 0.194 0.063 3.060 0.002
## LF -0.154 0.034 -4.554 0.000
## PF ~
## T 0.931 0.038 24.233 0.000
## LF ~
## I -0.340 0.076 -4.454 0.000
## T 0.600 0.076 7.863 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .Q 0.810 0.121 6.708 0.000
## .C 0.048 0.007 6.708 0.000
## .PF 0.131 0.020 6.708 0.000
## .LF 0.518 0.077 6.708 0.000
lavaanPlot(fit1, coef= TRUE, cov=TRUE)