JetBlue ( B6 )

1 - Look for direct correlation between specific ppm (price per mile) on selected ‘future-departing day’ and ‘Revenue’

1 - Box charts future-departing days price per mile variation

### 1b - Number of flight

plot_ly( dfp , x=~dfp$MONTH, y=~dfp$NB_FLIGHT, type = "scatter", name = paste0('Revenue ',iata), mode='lines'  ) 

2 - Direct Correlation

pairs(dfa[,c(6,9,15,19,26)], pch = 19)

3 - Building revenue predictive model based on ppm for all Days

# Model A:
modelA <- lm( REVENUE ~ PPM_DAY0+PPM_DAY1+PPM_DAY2+PPM_DAY3+PPM_DAY4+PPM_DAY5+PPM_DAY6+PPM_DAY7+PPM_DAY15+PPM_DAY21+PPM_DAY30+PPM_DAY60+PPM_DAY90+PPM_DAY180, data = dfa )
# Stats summary:
summary( modelA )
## 
## Call:
## lm(formula = REVENUE ~ PPM_DAY0 + PPM_DAY1 + PPM_DAY2 + PPM_DAY3 + 
##     PPM_DAY4 + PPM_DAY5 + PPM_DAY6 + PPM_DAY7 + PPM_DAY15 + PPM_DAY21 + 
##     PPM_DAY30 + PPM_DAY60 + PPM_DAY90 + PPM_DAY180, data = dfa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -242.18  -60.32   11.24   60.81  164.54 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1464.18      73.21  19.999   <2e-16 ***
## PPM_DAY0      630.24     274.79   2.294   0.0265 *  
## PPM_DAY1    -3124.78    1286.39  -2.429   0.0192 *  
## PPM_DAY2      -38.82    1835.31  -0.021   0.9832    
## PPM_DAY3     1021.15    1760.34   0.580   0.5648    
## PPM_DAY4     4008.15    3510.83   1.142   0.2596    
## PPM_DAY5    -3961.02    2800.14  -1.415   0.1641    
## PPM_DAY6     -537.53    1442.58  -0.373   0.7112    
## PPM_DAY7     1781.67    1658.82   1.074   0.2885    
## PPM_DAY15     826.45     748.96   1.103   0.2757    
## PPM_DAY21    -262.78     707.20  -0.372   0.7120    
## PPM_DAY30     155.18     578.56   0.268   0.7898    
## PPM_DAY60     306.82     515.13   0.596   0.5544    
## PPM_DAY90    -257.40     420.43  -0.612   0.5435    
## PPM_DAY180    337.87     252.92   1.336   0.1883    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 97.59 on 45 degrees of freedom
## Multiple R-squared:  0.5977, Adjusted R-squared:  0.4725 
## F-statistic: 4.775 on 14 and 45 DF,  p-value: 2.934e-05
dfa$modelA <- predict( modelA , dfa)

# Plot scattered chart: Model Prediction vs Actual
plot( data.frame(cbind(actuals=dfa$REVENUE, predicteds=dfa$modelA )) )

# Plot Hexa Binning chart: Model Prediction vs Actual
plot(hexbin(dfa$REVENUE, dfa$modelA , xbins=5) , main="Hexagonal Binning")

4 - Building revenue predictive model based on ppm for future depart days 0, 3, 6, 30 and 180

# Model B:
modelB <- lm( REVENUE ~ PPM_DAY0+PPM_DAY3+PPM_DAY6+PPM_DAY30+PPM_DAY180+NB_FLIGHT , data = dfa )
summary( modelB )
## 
## Call:
## lm(formula = REVENUE ~ PPM_DAY0 + PPM_DAY3 + PPM_DAY6 + PPM_DAY30 + 
##     PPM_DAY180 + NB_FLIGHT, data = dfa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -301.157  -55.851   -6.482   75.760  191.368 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.388e+03  7.694e+01  18.045  < 2e-16 ***
## PPM_DAY0     4.654e+02  1.882e+02   2.473 0.016619 *  
## PPM_DAY3    -1.988e+03  7.304e+02  -2.721 0.008782 ** 
## PPM_DAY6     1.064e+03  6.893e+02   1.544 0.128452    
## PPM_DAY30   -3.833e+02  2.712e+02  -1.413 0.163372    
## PPM_DAY180   5.140e+02  2.135e+02   2.407 0.019606 *  
## NB_FLIGHT    3.768e-03  9.543e-04   3.949 0.000233 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 104.3 on 53 degrees of freedom
## Multiple R-squared:  0.4583, Adjusted R-squared:  0.3969 
## F-statistic: 7.472 on 6 and 53 DF,  p-value: 7.908e-06
dfa$modelB <- predict( modelB , dfa)

# Plot scattered chart: Model Prediction vs Actual
plot( data.frame(cbind(actuals=dfa$REVENUE, predicteds=dfa$modelB )) )

# Plot Hexa Binning chart: Model Prediction vs Actual
plot(hexbin(dfa$REVENUE, dfa$modelB , xbins=5) , main="Hexagonal Binning")

5 - Line Chart showing ppm variation around the mean for each ppm range, revenue and both predictor models

Chart below is representing normalized (around their means) the fields: revenue, prediction, day 0 to 180

agg_dfa<-agg_dfa[,c(1,21,22,23)] # Group.1(Quarter), REVENUE, modelA, modelB
 
dfc <- dfp[as.integer(dfp$YEAR)>2017,]
dfc$modelA <- as.integer( predict( modelA , dfc) )
dfc$modelB <- as.integer(  predict( modelB , dfc) )
dfc <- dfc[,c(22,23,24)]
agg_dfc <- aggregate( dfc$modelA , by=list(dfc$period), FUN=mean, na.rm=TRUE)
agg_dfd <- aggregate( dfc$modelB , by=list(dfc$period), FUN=mean, na.rm=TRUE)
agg_dfa[nrow(agg_dfa) + 1,] = c(agg_dfc[1,1],"",as.integer(agg_dfc[1,2]), as.integer( agg_dfd[1,2] ) )

agg_dfa$REVENUE <- as.integer(agg_dfa$REVENUE)

plot_ly(agg_dfa , x=~agg_dfa$Group.1, y=~agg_dfa$REVENUE, type = "scatter", name = paste0('Revenue ',iata), mode='lines'  ) %>%
  add_trace(y = ~modelA, name = 'Prediction ALL attributes', mode = 'lines+markers') %>%
  add_trace(y = ~modelB, name = 'Prediction select att.', mode = 'lines+markers') 
## Warning: Can't display both discrete & non-discrete data on same axis

2108 Q1 Prediction range: 1,462 Millions to 1,500 Millions