El conjunto de datos es de la universidad de Nueva York y contiene 90 observaciones que incluyen los costos de 6 aerolineas estadounidenses durante 15 años, de 1970 a 1984.
Las variables son * I = Airline * T = Year * Q = Output, in revenue passenger miles, index nummber * C = Total cost, in $1000 * PF = Fuel Price * LF = Load factor, the average capacity utilization of the fleet
Fuente:Tabla F7.1
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("gplots")
library(gplots)
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
#install.packages("plm")
library(plm)
##
## Attaching package: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
#install.packages("DataExplorer")
library(DataExplorer)
#install.packages("forecast")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
#install.packages("lavaan")
library(lavaan)
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
#install.packages("lavaanPlot")
library(lavaanPlot)
df <- read.csv("/Users/sebastianfajardo/Downloads/vuelos.csv")
summary(df)
## I T C Q
## Min. :1.0 Min. : 1 Min. : 68978 Min. :0.03768
## 1st Qu.:2.0 1st Qu.: 4 1st Qu.: 292046 1st Qu.:0.14213
## Median :3.5 Median : 8 Median : 637001 Median :0.30503
## Mean :3.5 Mean : 8 Mean :1122524 Mean :0.54499
## 3rd Qu.:5.0 3rd Qu.:12 3rd Qu.:1345968 3rd Qu.:0.94528
## Max. :6.0 Max. :15 Max. :4748320 Max. :1.93646
## PF LF
## Min. : 103795 Min. :0.4321
## 1st Qu.: 129848 1st Qu.:0.5288
## Median : 357434 Median :0.5661
## Mean : 471683 Mean :0.5605
## 3rd Qu.: 849840 3rd Qu.:0.5947
## Max. :1015610 Max. :0.6763
str(df)
## 'data.frame': 90 obs. of 6 variables:
## $ I : int 1 1 1 1 1 1 1 1 1 1 ...
## $ T : int 1 2 3 4 5 6 7 8 9 10 ...
## $ C : int 1140640 1215690 1309570 1511530 1676730 1823740 2022890 2314760 2639160 3247620 ...
## $ Q : num 0.953 0.987 1.092 1.176 1.16 ...
## $ PF: int 106650 110307 110574 121974 196606 265609 263451 316411 384110 569251 ...
## $ LF: num 0.534 0.532 0.548 0.541 0.591 ...
head(df)
## I T C Q PF LF
## 1 1 1 1140640 0.952757 106650 0.534487
## 2 1 2 1215690 0.986757 110307 0.532328
## 3 1 3 1309570 1.091980 110574 0.547736
## 4 1 4 1511530 1.175780 121974 0.540846
## 5 1 5 1676730 1.160170 196606 0.591167
## 6 1 6 1823740 1.173760 265609 0.575417
#create_report(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
## Revisar heterogeneidad
plotmeans(C ~ I, main= "Heterogeneidad entre aerolineas", data=df)
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
df1 <- pdata.frame(df, index=c("I", "T"))
pooled <- plm(C~ Q + PF + LF, data=df1, model="pooling")
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "pooling")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -520654 -250270 37333 208690 849700
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.1586e+06 3.6059e+05 3.2129 0.00185 **
## Q 2.0261e+06 6.1807e+04 32.7813 < 2.2e-16 ***
## PF 1.2253e+00 1.0372e-01 11.8138 < 2.2e-16 ***
## LF -3.0658e+06 6.9633e+05 -4.4027 3.058e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.2647e+14
## Residual Sum of Squares: 6.8177e+12
## R-Squared: 0.94609
## Adj. R-Squared: 0.94421
## F-statistic: 503.118 on 3 and 86 DF, p-value: < 2.22e-16
within <- plm(C~ Q + PF + LF, data=df1, model="within")
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "within")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -551783 -159259 1796 0 137226 499296
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## Q 3.3190e+06 1.7135e+05 19.3694 < 2.2e-16 ***
## PF 7.7307e-01 9.7319e-02 7.9437 9.698e-12 ***
## LF -3.7974e+06 6.1377e+05 -6.1869 2.375e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.0776e+13
## Residual Sum of Squares: 3.5865e+12
## R-Squared: 0.92937
## Adj. R-Squared: 0.92239
## F-statistic: 355.254 on 3 and 81 DF, p-value: < 2.22e-16
pFtest(within,pooled)
##
## F test for individual effects
##
## data: C ~ Q + PF + LF
## F = 14.595, df1 = 5, df2 = 81, p-value = 3.467e-10
## alternative hypothesis: significant effects
walhus <- plm(C~ Q + PF + LF, data=df1, model = "random", random.method = "walhus")
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 7.339e+10 2.709e+05 0.969
## individual 2.363e+09 4.861e+04 0.031
## theta: 0.1788
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -524180 -243611 39332 199517 824905
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.1267e+06 3.6994e+05 3.0455 0.002323 **
## Q 2.0647e+06 7.1927e+04 28.7051 < 2.2e-16 ***
## PF 1.2075e+00 1.0358e-01 11.6578 < 2.2e-16 ***
## LF -3.0314e+06 7.1431e+05 -4.2438 2.198e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.0182e+14
## Residual Sum of Squares: 6.5784e+12
## R-Squared: 0.93539
## Adj. R-Squared: 0.93314
## Chisq: 1245.09 on 3 DF, p-value: < 2.22e-16
amemiya <- plm(C~ Q + PF + LF, data=df1, model = "random", random.method = "amemiya")
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "amemiya")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 4.270e+10 2.066e+05 0.084
## individual 4.640e+11 6.812e+05 0.916
## theta: 0.9219
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -603585 -144415 22641 158005 485417
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.0746e+06 4.2105e+05 2.5522 0.0107 *
## Q 3.2090e+06 1.6482e+05 19.4695 < 2.2e-16 ***
## PF 8.1014e-01 9.6147e-02 8.4260 < 2.2e-16 ***
## LF -3.7168e+06 6.1330e+05 -6.0603 1.359e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.1238e+13
## Residual Sum of Squares: 3.8227e+12
## R-Squared: 0.92539
## Adj. R-Squared: 0.92279
## Chisq: 1066.71 on 3 DF, p-value: < 2.22e-16
nerlove <- plm(C~ Q + PF + LF, data=df1, model = "random", random.method = "walhus")
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 7.339e+10 2.709e+05 0.969
## individual 2.363e+09 4.861e+04 0.031
## theta: 0.1788
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -524180 -243611 39332 199517 824905
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.1267e+06 3.6994e+05 3.0455 0.002323 **
## Q 2.0647e+06 7.1927e+04 28.7051 < 2.2e-16 ***
## PF 1.2075e+00 1.0358e-01 11.6578 < 2.2e-16 ***
## LF -3.0314e+06 7.1431e+05 -4.2438 2.198e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.0182e+14
## Residual Sum of Squares: 6.5784e+12
## R-Squared: 0.93539
## Adj. R-Squared: 0.93314
## Chisq: 1245.09 on 3 DF, p-value: < 2.22e-16
walhus <- plm(C~ Q + PF + LF, data=df1, model = "random", random.method = "walhus")
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 7.339e+10 2.709e+05 0.969
## individual 2.363e+09 4.861e+04 0.031
## theta: 0.1788
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -524180 -243611 39332 199517 824905
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.1267e+06 3.6994e+05 3.0455 0.002323 **
## Q 2.0647e+06 7.1927e+04 28.7051 < 2.2e-16 ***
## PF 1.2075e+00 1.0358e-01 11.6578 < 2.2e-16 ***
## LF -3.0314e+06 7.1431e+05 -4.2438 2.198e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.0182e+14
## Residual Sum of Squares: 6.5784e+12
## R-Squared: 0.93539
## Adj. R-Squared: 0.93314
## Chisq: 1245.09 on 3 DF, p-value: < 2.22e-16
phtest(walhus,within)
##
## Hausman Test
##
## data: C ~ Q + PF + LF
## chisq = 65.039, df = 3, p-value = 4.919e-14
## alternative hypothesis: one model is inconsistent
por lo tanto,nos quedamos con el Modelo de efectos fijos
df2 <- df %>% group_by(T) %>% summarise ("Cost" = sum(C))
ts <- ts(data=df2$Cost, start=1970, frequency = 1)
arima <- auto.arima(ts)
summary(arima)
## Series: ts
## ARIMA(0,2,1)
##
## Coefficients:
## ma1
## 0.6262
## s.e. 0.2198
##
## sigma^2 = 9.087e+10: log likelihood = -182.19
## AIC=368.37 AICc=369.57 BIC=369.5
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 27996.87 269624.3 201889.4 0.7953103 2.71744 0.2597085 -0.06184266
pronostico <- forecast(arima, level=95, h=5)
pronostico
## Point Forecast Lo 95 Hi 95
## 1985 14087526 13496696 14678356
## 1986 14990145 13329820 16650471
## 1987 15892764 12881265 18904264
## 1988 16795384 12198346 21392421
## 1989 17698003 11310993 24085012
plot(pronostico, main="Costos Totales de las Aerolineas")
modelo <- '
#Regresiones
Q ~ LF
C ~ I + T + PF + LF
LF ~ PF + I
#Variables latentes
#Varianzas y covarianzas
#Intercepto
'
df3 <- scale(df)
df4 <-cfa(modelo, df3)
summary(df4)
## lavaan 0.6-19 ended normally after 37 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Number of observations 90
##
## Model Test User Model:
##
## Test statistic 166.804
## Degrees of freedom 4
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## Q ~
## LF 0.425 0.095 4.452 0.000
## C ~
## I 0.105 0.025 4.156 0.000
## T 0.140 0.063 2.209 0.027
## PF 0.194 0.065 2.984 0.003
## LF 0.271 0.100 2.721 0.007
## LF ~
## PF 0.491 0.085 5.812 0.000
## I -0.346 0.085 -4.099 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## .Q ~~
## .C 0.811 0.123 6.612 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .Q 0.810 0.121 6.708 0.000
## .C 0.859 0.128 6.708 0.000
## .LF 0.636 0.095 6.708 0.000
lavaanPlot(df4, coef= TRUE, cov= TRUE)