What factors explain the difference in price between an economy ticket and a premium-economy airline ticket?

Setting Up

airline.df<-read.csv("SixAirlinesDataV2.csv")
View(airline.df)
summary(airline.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Plots Comparing Various Factors

library (car)
par(mfrow=c(2,2))
with(airline.df,plot(Airline,PriceEconomy))
with(airline.df,plot(Airline,PricePremium))
par(mfrow=c(2,2))

with(airline.df,plot(Aircraft,PriceEconomy))
with(airline.df,plot(Aircraft,PricePremium))
par(mfrow=c(2,2))

with(airline.df,plot(FlightDuration,PriceEconomy))
with(airline.df,plot(FlightDuration,PricePremium))
par(mfrow=c(2,2))

with(airline.df,plot(TravelMonth,PriceEconomy))
with(airline.df,plot(TravelMonth,PricePremium))
par(mfrow=c(2,2))

with(airline.df,plot(IsInternational,PriceEconomy))
with(airline.df,plot(IsInternational,PricePremium))
scatterplot(airline.df$PriceRelative , airline.df$WidthDifference, main="Price Relative vs width difference")

scatterplot(airline.df$PriceRelative ,airline.df$PitchDifference, main="Price Relative vs pitch difference" )

scatterplotMatrix(~ airline.df$PriceRelative + airline.df$PitchDifference + airline.df$WidthDifference)

Correlation Plots

library(corrplot)
## corrplot 0.84 loaded
library(corrgram)
airlines1<-airline.df[,c("FlightDuration","SeatsEconomy","SeatsPremium","PitchEconomy","PitchPremium","WidthEconomy","WidthPremium","PriceEconomy","PricePremium","PriceRelative","SeatsTotal","PitchDifference","WidthDifference","PercentPremiumSeats")]
cor.airline<-cor(airlines1)
library(corrgram)
corrgram(cor.airline,upper.panel = panel.pie)

Corrogram for Relative Price, Width Difference and Pitch Difference:

 airline1<-airline.df[,c(14,16,17)]
cor.airline<-cor(airline1)
library(corrgram)
corrgram(cor.airline,upper.panel = panel.pie)

Pearson’s Test

cor.test(airline.df$PriceEconomy,airline.df$PricePremium)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceEconomy and airline.df$PricePremium
## t = 44.452, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8826622 0.9172579
## sample estimates:
##       cor 
## 0.9013887
cor.test(airline.df$PriceEconomy,airline.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceEconomy and airline.df$WidthDifference
## t = -1.8109, df = 456, p-value = 0.07081
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.174773388  0.007182676
## sample estimates:
##         cor 
## -0.08449975
cor.test(airline.df$PricePremium,airline.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PricePremium and airline.df$WidthDifference
## t = -0.24585, df = 456, p-value = 0.8059
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.10303032  0.08019923
## sample estimates:
##         cor 
## -0.01151218
cor.test(airline.df$PriceEconomy,airline.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceEconomy and airline.df$PitchDifference
## t = -2.1359, df = 456, p-value = 0.03322
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.189424531 -0.007970989
## sample estimates:
##         cor 
## -0.09952511
cor.test(airline.df$PricePremium,airline.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PricePremium and airline.df$PitchDifference
## t = -0.38585, df = 456, p-value = 0.6998
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1095118  0.0736825
## sample estimates:
##         cor 
## -0.01806629

Linear Regression Model

Y = Relative Price

X1 = Pitch Difference

X2 = Width Difference

fit <- lm(airline.df$PriceRelative ~ airline.df$WidthDifference + airline.df$PitchDifference)
summary(fit)
## 
## Call:
## lm(formula = airline.df$PriceRelative ~ airline.df$WidthDifference + 
##     airline.df$PitchDifference)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84163 -0.28484 -0.07241  0.17698  1.18778 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -0.10514    0.08304  -1.266 0.206077    
## airline.df$WidthDifference  0.11621    0.02356   4.933 1.14e-06 ***
## airline.df$PitchDifference  0.06019    0.01590   3.785 0.000174 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared:  0.2593, Adjusted R-squared:  0.2561 
## F-statistic: 79.65 on 2 and 455 DF,  p-value: < 2.2e-16

Conclusion

P value proves that the relative price depends on the pitch and width difference.

Revisiting the question: What factors explain the difference in price between an economy ticket and a premium-economy airline ticket?

Ans: Pitch Difference Width Difference