El conjunto de datos es de la universidad de Nueva York y contiene 90 observaciones que incluyen los costos de 6 aerolineas estadounidenses durante 15 años, de 1970 a 1984.
Las variables son:
Fuente:
Tabla
F7.1
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("gplots")
library(gplots)
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
#install.packages("plm")
library(plm)
##
## Attaching package: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
#install.packages("DataExplorer")
library(DataExplorer)
#install.packages("forecast")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
#install.packages("lavaan")
library(lavaan)
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
#install.packages("lavaanPlot")
library(lavaanPlot)
df <- read.csv("/Users/hugoenrique/Desktop/Universidad/8vo Semestre/Generación de Escenarios/M1/Actividad integradora/vuelos.csv")
summary(df)
## I T C Q
## Min. :1.0 Min. : 1 Min. : 68978 Min. :0.03768
## 1st Qu.:2.0 1st Qu.: 4 1st Qu.: 292046 1st Qu.:0.14213
## Median :3.5 Median : 8 Median : 637001 Median :0.30503
## Mean :3.5 Mean : 8 Mean :1122524 Mean :0.54499
## 3rd Qu.:5.0 3rd Qu.:12 3rd Qu.:1345968 3rd Qu.:0.94528
## Max. :6.0 Max. :15 Max. :4748320 Max. :1.93646
## PF LF
## Min. : 103795 Min. :0.4321
## 1st Qu.: 129848 1st Qu.:0.5288
## Median : 357434 Median :0.5661
## Mean : 471683 Mean :0.5605
## 3rd Qu.: 849840 3rd Qu.:0.5947
## Max. :1015610 Max. :0.6763
str(df)
## 'data.frame': 90 obs. of 6 variables:
## $ I : int 1 1 1 1 1 1 1 1 1 1 ...
## $ T : int 1 2 3 4 5 6 7 8 9 10 ...
## $ C : int 1140640 1215690 1309570 1511530 1676730 1823740 2022890 2314760 2639160 3247620 ...
## $ Q : num 0.953 0.987 1.092 1.176 1.16 ...
## $ PF: int 106650 110307 110574 121974 196606 265609 263451 316411 384110 569251 ...
## $ LF: num 0.534 0.532 0.548 0.541 0.591 ...
head(df)
## I T C Q PF LF
## 1 1 1 1140640 0.952757 106650 0.534487
## 2 1 2 1215690 0.986757 110307 0.532328
## 3 1 3 1309570 1.091980 110574 0.547736
## 4 1 4 1511530 1.175780 121974 0.540846
## 5 1 5 1676730 1.160170 196606 0.591167
## 6 1 6 1823740 1.173760 265609 0.575417
#create_report(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
df
## I T C Q PF LF
## 1 1 1 1140640 0.952757 106650 0.534487
## 2 1 2 1215690 0.986757 110307 0.532328
## 3 1 3 1309570 1.091980 110574 0.547736
## 4 1 4 1511530 1.175780 121974 0.540846
## 5 1 5 1676730 1.160170 196606 0.591167
## 6 1 6 1823740 1.173760 265609 0.575417
## 7 1 7 2022890 1.290510 263451 0.594495
## 8 1 8 2314760 1.390670 316411 0.597409
## 9 1 9 2639160 1.612730 384110 0.638522
## 10 1 10 3247620 1.825440 569251 0.676287
## 11 1 11 3787750 1.546040 871636 0.605735
## 12 1 12 3867750 1.527900 997239 0.614360
## 13 1 13 3996020 1.660200 938002 0.633366
## 14 1 14 4282880 1.822310 859572 0.650117
## 15 1 15 4748320 1.936460 823411 0.625603
## 16 2 1 569292 0.520635 103795 0.490851
## 17 2 2 640614 0.534627 111477 0.473449
## 18 2 3 777655 0.655192 118664 0.503013
## 19 2 4 999294 0.791575 114797 0.512501
## 20 2 5 1203970 0.842945 215322 0.566782
## 21 2 6 1358100 0.852892 281704 0.558133
## 22 2 7 1501350 0.922843 304818 0.558799
## 23 2 8 1709270 1.000000 348609 0.572070
## 24 2 9 2025400 1.198450 374579 0.624763
## 25 2 10 2548370 1.340670 544109 0.628706
## 26 2 11 3137740 1.326240 853356 0.589150
## 27 2 12 3557700 1.248520 1003200 0.532612
## 28 2 13 3717740 1.254320 941977 0.526652
## 29 2 14 3962370 1.371770 856533 0.540163
## 30 2 15 4209390 1.389740 821361 0.528775
## 31 3 1 286298 0.262424 118788 0.524334
## 32 3 2 309290 0.266433 123798 0.537185
## 33 3 3 342056 0.306043 122882 0.582119
## 34 3 4 374595 0.325586 131274 0.579489
## 35 3 5 450037 0.345706 222037 0.606592
## 36 3 6 510412 0.367517 278721 0.607270
## 37 3 7 575347 0.409937 306564 0.582425
## 38 3 8 669331 0.448023 356073 0.573972
## 39 3 9 783799 0.539595 378311 0.654256
## 40 3 10 913883 0.539382 555267 0.631055
## 41 3 11 1041520 0.467967 850322 0.569240
## 42 3 12 1125800 0.450544 1015610 0.589682
## 43 3 13 1096070 0.468793 954508 0.587953
## 44 3 14 1198930 0.494397 886999 0.565388
## 45 3 15 1170470 0.493317 844079 0.577078
## 46 4 1 145167 0.086393 114987 0.432066
## 47 4 2 170192 0.096740 120501 0.439669
## 48 4 3 247506 0.141500 121908 0.488932
## 49 4 4 309391 0.169715 127220 0.484181
## 50 4 5 354338 0.173805 209405 0.529925
## 51 4 6 373941 0.164272 263148 0.532723
## 52 4 7 420915 0.170906 316724 0.549067
## 53 4 8 474017 0.177840 363598 0.557140
## 54 4 9 532590 0.192248 389436 0.611377
## 55 4 10 676771 0.242469 547376 0.645319
## 56 4 11 880438 0.256505 850418 0.611734
## 57 4 12 1052020 0.249657 1011170 0.580884
## 58 4 13 1193680 0.273923 951934 0.572047
## 59 4 14 1303390 0.371131 881323 0.594570
## 60 4 15 1436970 0.421411 831374 0.585525
## 61 5 1 91361 0.051028 118222 0.442875
## 62 5 2 95428 0.052646 116223 0.462473
## 63 5 3 98187 0.056348 115853 0.519118
## 64 5 4 115967 0.066953 129372 0.529331
## 65 5 5 138382 0.070308 243266 0.557797
## 66 5 6 156228 0.073961 277930 0.556181
## 67 5 7 183169 0.084946 317273 0.569327
## 68 5 8 210212 0.095474 358794 0.583465
## 69 5 9 274024 0.119814 397667 0.631818
## 70 5 10 356915 0.150046 566672 0.604723
## 71 5 11 432344 0.144014 848393 0.587921
## 72 5 12 524294 0.169300 1005740 0.616159
## 73 5 13 530924 0.172761 958231 0.605868
## 74 5 14 581447 0.186670 872924 0.594688
## 75 5 15 610257 0.213279 844622 0.635545
## 76 6 1 68978 0.037682 117112 0.448539
## 77 6 2 74904 0.039784 119420 0.475889
## 78 6 3 83829 0.044331 116087 0.500562
## 79 6 4 98148 0.050245 122997 0.500344
## 80 6 5 118449 0.055046 194309 0.528897
## 81 6 6 133161 0.052462 307923 0.495361
## 82 6 7 145062 0.056977 323595 0.510342
## 83 6 8 170711 0.061490 363081 0.518296
## 84 6 9 199775 0.069027 386422 0.546723
## 85 6 10 276797 0.092749 564867 0.554276
## 86 6 11 381478 0.112640 874818 0.517766
## 87 6 12 506969 0.154154 1013170 0.580049
## 88 6 13 633388 0.186461 930477 0.556024
## 89 6 14 804388 0.246847 851676 0.537791
## 90 6 15 1009500 0.304013 819476 0.525775
plotmeans(C ~ I, main= "Heterogeneidad entre aerolineas", data=df)
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
df1 <- pdata.frame(df, index = c("I", "T"))
pooled <- plm(C ~ Q + PF + LF, data = df1, model="pooling")
summary(pooled)
## Pooling Model
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "pooling")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -520654 -250270 37333 208690 849700
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.1586e+06 3.6059e+05 3.2129 0.00185 **
## Q 2.0261e+06 6.1807e+04 32.7813 < 2.2e-16 ***
## PF 1.2253e+00 1.0372e-01 11.8138 < 2.2e-16 ***
## LF -3.0658e+06 6.9633e+05 -4.4027 3.058e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.2647e+14
## Residual Sum of Squares: 6.8177e+12
## R-Squared: 0.94609
## Adj. R-Squared: 0.94421
## F-statistic: 503.118 on 3 and 86 DF, p-value: < 2.22e-16
within <- plm(C ~ Q + PF + LF, data = df1, model="within")
summary(within)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "within")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -551783 -159259 1796 0 137226 499296
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## Q 3.3190e+06 1.7135e+05 19.3694 < 2.2e-16 ***
## PF 7.7307e-01 9.7319e-02 7.9437 9.698e-12 ***
## LF -3.7974e+06 6.1377e+05 -6.1869 2.375e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.0776e+13
## Residual Sum of Squares: 3.5865e+12
## R-Squared: 0.92937
## Adj. R-Squared: 0.92239
## F-statistic: 355.254 on 3 and 81 DF, p-value: < 2.22e-16
pFtest(within,pooled)
##
## F test for individual effects
##
## data: C ~ Q + PF + LF
## F = 14.595, df1 = 5, df2 = 81, p-value = 3.467e-10
## alternative hypothesis: significant effects
walhus <- plm(C ~ Q + PF + LF, data = df1, model="random", random.method="walhus")
summary(walhus)
## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 7.339e+10 2.709e+05 0.969
## individual 2.363e+09 4.861e+04 0.031
## theta: 0.1788
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -524180 -243611 39332 199517 824905
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.1267e+06 3.6994e+05 3.0455 0.002323 **
## Q 2.0647e+06 7.1927e+04 28.7051 < 2.2e-16 ***
## PF 1.2075e+00 1.0358e-01 11.6578 < 2.2e-16 ***
## LF -3.0314e+06 7.1431e+05 -4.2438 2.198e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.0182e+14
## Residual Sum of Squares: 6.5784e+12
## R-Squared: 0.93539
## Adj. R-Squared: 0.93314
## Chisq: 1245.09 on 3 DF, p-value: < 2.22e-16
amemiya <- plm(C ~ Q + PF + LF, data = df1, model="random", random.method="amemiya")
summary(amemiya)
## Oneway (individual) effect Random Effect Model
## (Amemiya's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "amemiya")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 4.270e+10 2.066e+05 0.084
## individual 4.640e+11 6.812e+05 0.916
## theta: 0.9219
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -603585 -144415 22641 158005 485417
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.0746e+06 4.2105e+05 2.5522 0.0107 *
## Q 3.2090e+06 1.6482e+05 19.4695 < 2.2e-16 ***
## PF 8.1014e-01 9.6147e-02 8.4260 < 2.2e-16 ***
## LF -3.7168e+06 6.1330e+05 -6.0603 1.359e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.1238e+13
## Residual Sum of Squares: 3.8227e+12
## R-Squared: 0.92539
## Adj. R-Squared: 0.92279
## Chisq: 1066.71 on 3 DF, p-value: < 2.22e-16
nerlove <- plm(C ~ Q + PF + LF, data = df1, model="random", random.method="nerlove")
summary(nerlove)
## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = C ~ Q + PF + LF, data = df1, model = "random",
## random.method = "nerlove")
##
## Balanced Panel: n = 6, T = 15, N = 90
##
## Effects:
## var std.dev share
## idiosyncratic 3.985e+10 1.996e+05 0.066
## individual 5.602e+11 7.485e+05 0.934
## theta: 0.9313
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -601947 -145039 18713 154903 483623
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.0752e+06 4.4535e+05 2.4142 0.01577 *
## Q 3.2323e+06 1.6521e+05 19.5652 < 2.2e-16 ***
## PF 8.0229e-01 9.5804e-02 8.3743 < 2.2e-16 ***
## LF -3.7338e+06 6.0963e+05 -6.1247 9.084e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 5.1133e+13
## Residual Sum of Squares: 3.7726e+12
## R-Squared: 0.92622
## Adj. R-Squared: 0.92365
## Chisq: 1079.63 on 3 DF, p-value: < 2.22e-16
phtest(walhus, within)
##
## Hausman Test
##
## data: C ~ Q + PF + LF
## chisq = 65.039, df = 3, p-value = 4.919e-14
## alternative hypothesis: one model is inconsistent
Por lo tanto, nos quedamos con el Modelo de Efectos Fijos.
df2 <- df %>% group_by(T) %>% summarise("Cost" = sum(C))
ts <- ts(data = df2$Cost, start = 1970, frequency = 1)
arima <- auto.arima(ts)
arima
## Series: ts
## ARIMA(0,2,1)
##
## Coefficients:
## ma1
## 0.6262
## s.e. 0.2198
##
## sigma^2 = 9.087e+10: log likelihood = -182.19
## AIC=368.37 AICc=369.57 BIC=369.5
pronostico <- forecast(arima, level = 95, h=5)
pronostico
## Point Forecast Lo 95 Hi 95
## 1985 14087526 13496696 14678356
## 1986 14990145 13329820 16650471
## 1987 15892764 12881265 18904264
## 1988 16795384 12198346 21392421
## 1989 17698003 11310993 24085012
plot(pronostico, main="Costos Totales de las Aerolineas")
modelo <- '
# Regresiones
Q ~ LF
C ~ I + T + PF + LF
LF ~ PF + I
PF ~ T
# Variables latentes
# Varianzas y covarianzas
# Intercepto
'
df3 <- scale(df)
df4 <- cfa(modelo, df3)
summary(df4)
## lavaan 0.6-19 ended normally after 39 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 90
##
## Model Test User Model:
##
## Test statistic 166.924
## Degrees of freedom 5
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## Q ~
## LF 0.425 0.095 4.462 0.000
## C ~
## I 0.105 0.025 4.158 0.000
## T 0.140 0.063 2.211 0.027
## PF 0.194 0.065 2.986 0.003
## LF 0.271 0.100 2.726 0.006
## LF ~
## PF 0.491 0.085 5.812 0.000
## I -0.346 0.085 -4.099 0.000
## PF ~
## T 0.931 0.038 24.233 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## .Q ~~
## .C 0.811 0.123 6.612 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .Q 0.810 0.121 6.708 0.000
## .C 0.859 0.128 6.708 0.000
## .LF 0.636 0.095 6.708 0.000
## .PF 0.131 0.020 6.708 0.000
lavaanPlot(df4, coef=TRUE, cov=TRUE)