What factors explain the difference in price between an economy ticket and a premium-economy airline ticket?

datadescp <- describe(AirlinesRaw)
datadescp
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## IsInternationalFlag    6 458    0.91    0.28    1.00    1.00    0.00  0.00
## SeatsEconomy           7 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           8 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           9 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium          10 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          11 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          12 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          13 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          14 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         15 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            16 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       17 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       18 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   19 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## IsInternationalFlag    1.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23
datasummary <- summary(AirlinesRaw)
datasummary
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational IsInternationalFlag  SeatsEconomy    SeatsPremium  
##  Domestic     : 40    Min.   :0.0000      Min.   : 78.0   Min.   : 8.00  
##  International:418    1st Qu.:1.0000      1st Qu.:133.0   1st Qu.:21.00  
##                       Median :1.0000      Median :185.0   Median :36.00  
##                       Mean   :0.9127      Mean   :202.3   Mean   :33.65  
##                       3rd Qu.:1.0000      3rd Qu.:243.0   3rd Qu.:40.00  
##                       Max.   :1.0000      Max.   :389.0   Max.   :66.00  
##   PitchEconomy    PitchPremium    WidthEconomy    WidthPremium  
##  Min.   :30.00   Min.   :34.00   Min.   :17.00   Min.   :17.00  
##  1st Qu.:31.00   1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00  
##  Median :31.00   Median :38.00   Median :18.00   Median :19.00  
##  Mean   :31.22   Mean   :37.91   Mean   :17.84   Mean   :19.47  
##  3rd Qu.:32.00   3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00  
##  Max.   :33.00   Max.   :40.00   Max.   :19.00   Max.   :21.00  
##   PriceEconomy   PricePremium    PriceRelative      SeatsTotal 
##  Min.   :  65   Min.   :  86.0   Min.   :0.0200   Min.   : 98  
##  1st Qu.: 413   1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166  
##  Median :1242   Median :1737.0   Median :0.3650   Median :227  
##  Mean   :1327   Mean   :1845.3   Mean   :0.4872   Mean   :236  
##  3rd Qu.:1909   3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279  
##  Max.   :3593   Max.   :7414.0   Max.   :1.8900   Max.   :441  
##  PitchDifference  WidthDifference PercentPremiumSeats
##  Min.   : 2.000   Min.   :0.000   Min.   : 4.71      
##  1st Qu.: 6.000   1st Qu.:1.000   1st Qu.:12.28      
##  Median : 7.000   Median :1.000   Median :13.21      
##  Mean   : 6.688   Mean   :1.633   Mean   :14.65      
##  3rd Qu.: 7.000   3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :10.000   Max.   :4.000   Max.   :24.69
summarydf <- AirlinesRaw %>% group_by(PitchEconomy, PitchPremium, WidthEconomy, WidthPremium) %>% summarise(AverageDuration = mean(FlightDuration))
summarydf
## # A tibble: 11 x 5
## # Groups:   PitchEconomy, PitchPremium, WidthEconomy [?]
##    PitchEconomy PitchPremium WidthEconomy WidthPremium AverageDuration
##           <int>        <int>        <int>        <int>           <dbl>
##  1           30           40           17           21        3.482407
##  2           31           34           17           17        4.700000
##  3           31           34           18           18        2.142500
##  4           31           38           18           19        7.854971
##  5           31           38           18           21        9.220882
##  6           32           34           17           17        3.465789
##  7           32           35           18           18        4.420000
##  8           32           38           17           19        8.843125
##  9           32           38           18           19        9.120408
## 10           32           38           19           20       10.481000
## 11           33           35           17           17        2.474000

scatter plot of PitchEconomy vs. PriceEconomy.

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotPitchEcnmyPriceEcnmy <- scatterplot(PriceEconomy ~ PitchEconomy , data = AirlinesRaw, main = "scatter plot of PriceEconomy vs. PitchEconomy")

scatter plot of PitchPremium vs. PricePremium.

library(car)
scatterplotPitchEcnmyPriceEcnmy <- scatterplot(PricePremium ~ PitchPremium , data = AirlinesRaw, main = "scatter plot of PricePremium vs. PitchPremium")

library(lattice)
histogram(~PitchDifference, data = AirlinesRaw,
 main = "Distribution of Pitch Difference", xlab="Difference in Pitch", col='blue' ) 

correlation between Pitch and Price

correlationPMT <- cor(AirlinesRaw$PitchPremium, AirlinesRaw$PricePremium)
round(correlationPMT, 2)
## [1] 0.09
correlationPCT <- cor(AirlinesRaw$PitchEconomy, AirlinesRaw$PriceEconomy)
round(correlationPCT, 2)
## [1] 0.37

correlation between Width and Price

correlationPMTWidth <- cor(AirlinesRaw$PricePremium, AirlinesRaw$WidthPremium)
round(correlationPMTWidth, 2)
## [1] 0.06
correlationPCTWidth <- cor(AirlinesRaw$PriceEconomy, AirlinesRaw$WidthEconomy)
round(correlationPCTWidth, 2)
## [1] 0.07

Effect of Pitch Difference on the relative price of Economy and Premium Economy airfares

PitchDifference = aggregate(cbind(PriceEconomy, PricePremium, PriceRelative) ~ PitchDifference, 
                   data = AirlinesRaw, mean)
PitchDifference
##   PitchDifference PriceEconomy PricePremium PriceRelative
## 1               2     348.0000     377.3333    0.08708333
## 2               3     369.5625     398.7500    0.08125000
## 3               6    2008.6942    2333.7438    0.34082645
## 4               7    1388.1317    2155.4897    0.51888889
## 5              10     243.8519     435.6481    0.97074074

correlation between Duration and Price

correlationDurationPrice <- cor(AirlinesRaw$PricePremium, AirlinesRaw$FlightDuration)
round(correlationDurationPrice, 2)
## [1] 0.65
correlationDurationPriceEcnmy <- cor(AirlinesRaw$PriceEconomy, AirlinesRaw$FlightDuration)
round(correlationDurationPrice, 2)
## [1] 0.65

Scatterplot between Duration and Price

scatterplottEcnmy <- scatterplot(AirlinesRaw$FlightDuration, AirlinesRaw$PriceEconomy)

scatterplottEcnmy
## NULL
scatterplottPrm <- scatterplot(AirlinesRaw$FlightDuration, AirlinesRaw$PricePremium)

scatterplottPrm
## NULL

Scatterplot between Duration and PriceRelative

scatterplotPriceRelative <- scatterplot(AirlinesRaw$FlightDuration, AirlinesRaw$PriceRelative)

scatterplotPriceRelative
## NULL

corrgram of all the variables

library(corrgram)

corrgram(AirlinesRaw, order=TRUE, lower.panel=panel.shade,
  upper.panel=panel.pie, text.panel=panel.txt,
  main="Corrgram of Airline")

AirlineFlightduration <- AirlinesRaw %>% group_by(Airline) %>% summarise(avgDuration = mean(FlightDuration))
AirlineFlightduration
## # A tibble: 6 x 2
##     Airline avgDuration
##      <fctr>       <dbl>
## 1 AirFrance    8.988514
## 2   British    7.854971
## 3     Delta    4.028913
## 4       Jet    4.143934
## 5 Singapore   10.481000
## 6    Virgin    9.250484
CrosstableAirlinePlane <- xtabs(~Airline+Aircraft, data=AirlinesRaw)
ftable(CrosstableAirlinePlane)
##           Aircraft AirBus Boeing
## Airline                         
## AirFrance              36     38
## British                47    128
## Delta                  12     34
## Jet                     7     54
## Singapore              16     24
## Virgin                 33     29
prop.table(ftable(CrosstableAirlinePlane),1)*100 # print table 
##           Aircraft   AirBus   Boeing
## Airline                             
## AirFrance          48.64865 51.35135
## British            26.85714 73.14286
## Delta              26.08696 73.91304
## Jet                11.47541 88.52459
## Singapore          40.00000 60.00000
## Virgin             53.22581 46.77419
AirlinePricerelative <- AirlinesRaw %>% group_by(Airline) %>% summarise(avgPR = mean(PriceRelative))
AirlinePricerelative
## # A tibble: 6 x 2
##     Airline     avgPR
##      <fctr>     <dbl>
## 1 AirFrance 0.2047297
## 2   British 0.4375429
## 3     Delta 0.1250000
## 4       Jet 0.9396721
## 5 Singapore 0.5297500
## 6    Virgin 0.7606452
cor.test(AirlinesRaw$PriceRelative,AirlinesRaw$FlightDuration)
## 
##  Pearson's product-moment correlation
## 
## data:  AirlinesRaw$PriceRelative and AirlinesRaw$FlightDuration
## t = 2.6046, df = 456, p-value = 0.009498
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02977856 0.21036806
## sample estimates:
##      cor 
## 0.121075

Low Correlation

regression PPW

modelPPW <- lm(PriceRelative ~ PitchDifference + WidthDifference , data=AirlinesRaw)
summary(modelPPW)
## 
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference, 
##     data = AirlinesRaw)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84163 -0.28484 -0.07241  0.17698  1.18778 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.10514    0.08304  -1.266 0.206077    
## PitchDifference  0.06019    0.01590   3.785 0.000174 ***
## WidthDifference  0.11621    0.02356   4.933 1.14e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared:  0.2593, Adjusted R-squared:  0.2561 
## F-statistic: 79.65 on 2 and 455 DF,  p-value: < 2.2e-16

The p-values and the coefficients suggest that the model is a good fit.