plot_ly( dfp , x=~dfp$MONTH, y=~dfp$NB_FLIGHT, type = "scatter", name = paste0('Revenue ',iata), mode='lines' )
pairs(dfa[,c(6,9,15,19,26)], pch = 19)
# Model A:
modelA <- lm( REVENUE ~ PPM_DAY0+PPM_DAY1+PPM_DAY2+PPM_DAY3+PPM_DAY4+PPM_DAY5+PPM_DAY6+PPM_DAY7+PPM_DAY15+PPM_DAY21+PPM_DAY30+PPM_DAY60+PPM_DAY90+PPM_DAY180, data = dfa )
# Stats summary:
summary( modelA )
##
## Call:
## lm(formula = REVENUE ~ PPM_DAY0 + PPM_DAY1 + PPM_DAY2 + PPM_DAY3 +
## PPM_DAY4 + PPM_DAY5 + PPM_DAY6 + PPM_DAY7 + PPM_DAY15 + PPM_DAY21 +
## PPM_DAY30 + PPM_DAY60 + PPM_DAY90 + PPM_DAY180, data = dfa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -242.18 -60.32 11.24 60.81 164.54
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1464.18 73.21 19.999 <2e-16 ***
## PPM_DAY0 630.24 274.79 2.294 0.0265 *
## PPM_DAY1 -3124.78 1286.39 -2.429 0.0192 *
## PPM_DAY2 -38.82 1835.31 -0.021 0.9832
## PPM_DAY3 1021.15 1760.34 0.580 0.5648
## PPM_DAY4 4008.15 3510.83 1.142 0.2596
## PPM_DAY5 -3961.02 2800.14 -1.415 0.1641
## PPM_DAY6 -537.53 1442.58 -0.373 0.7112
## PPM_DAY7 1781.67 1658.82 1.074 0.2885
## PPM_DAY15 826.45 748.96 1.103 0.2757
## PPM_DAY21 -262.78 707.20 -0.372 0.7120
## PPM_DAY30 155.18 578.56 0.268 0.7898
## PPM_DAY60 306.82 515.13 0.596 0.5544
## PPM_DAY90 -257.40 420.43 -0.612 0.5435
## PPM_DAY180 337.87 252.92 1.336 0.1883
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 97.59 on 45 degrees of freedom
## Multiple R-squared: 0.5977, Adjusted R-squared: 0.4725
## F-statistic: 4.775 on 14 and 45 DF, p-value: 2.934e-05
dfa$modelA <- predict( modelA , dfa)
# Plot scattered chart: Model Prediction vs Actual
plot( data.frame(cbind(actuals=dfa$REVENUE, predicteds=dfa$modelA )) )
# Plot Hexa Binning chart: Model Prediction vs Actual
plot(hexbin(dfa$REVENUE, dfa$modelA , xbins=5) , main="Hexagonal Binning")
# Model B:
modelB <- lm( REVENUE ~ PPM_DAY0+PPM_DAY3+PPM_DAY6+PPM_DAY30+PPM_DAY180+NB_FLIGHT , data = dfa )
summary( modelB )
##
## Call:
## lm(formula = REVENUE ~ PPM_DAY0 + PPM_DAY3 + PPM_DAY6 + PPM_DAY30 +
## PPM_DAY180 + NB_FLIGHT, data = dfa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -301.157 -55.851 -6.482 75.760 191.368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.388e+03 7.694e+01 18.045 < 2e-16 ***
## PPM_DAY0 4.654e+02 1.882e+02 2.473 0.016619 *
## PPM_DAY3 -1.988e+03 7.304e+02 -2.721 0.008782 **
## PPM_DAY6 1.064e+03 6.893e+02 1.544 0.128452
## PPM_DAY30 -3.833e+02 2.712e+02 -1.413 0.163372
## PPM_DAY180 5.140e+02 2.135e+02 2.407 0.019606 *
## NB_FLIGHT 3.768e-03 9.543e-04 3.949 0.000233 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 104.3 on 53 degrees of freedom
## Multiple R-squared: 0.4583, Adjusted R-squared: 0.3969
## F-statistic: 7.472 on 6 and 53 DF, p-value: 7.908e-06
dfa$modelB <- predict( modelB , dfa)
# Plot scattered chart: Model Prediction vs Actual
plot( data.frame(cbind(actuals=dfa$REVENUE, predicteds=dfa$modelB )) )
# Plot Hexa Binning chart: Model Prediction vs Actual
plot(hexbin(dfa$REVENUE, dfa$modelB , xbins=5) , main="Hexagonal Binning")
Chart below is representing normalized (around their means) the fields: revenue, prediction, day 0 to 180
agg_dfa<-agg_dfa[,c(1,21,22,23)] # Group.1(Quarter), REVENUE, modelA, modelB
dfc <- dfp[as.integer(dfp$YEAR)>2017,]
dfc$modelA <- as.integer( predict( modelA , dfc) )
dfc$modelB <- as.integer( predict( modelB , dfc) )
dfc <- dfc[,c(22,23,24)]
agg_dfc <- aggregate( dfc$modelA , by=list(dfc$period), FUN=mean, na.rm=TRUE)
agg_dfd <- aggregate( dfc$modelB , by=list(dfc$period), FUN=mean, na.rm=TRUE)
agg_dfa[nrow(agg_dfa) + 1,] = c(agg_dfc[1,1],"",as.integer(agg_dfc[1,2]), as.integer( agg_dfd[1,2] ) )
agg_dfa$REVENUE <- as.integer(agg_dfa$REVENUE)
plot_ly(agg_dfa , x=~agg_dfa$Group.1, y=~agg_dfa$REVENUE, type = "scatter", name = paste0('Revenue ',iata), mode='lines' ) %>%
add_trace(y = ~modelA, name = 'Prediction ALL attributes', mode = 'lines+markers') %>%
add_trace(y = ~modelB, name = 'Prediction select att.', mode = 'lines+markers')
## Warning: Can't display both discrete & non-discrete data on same axis