airline <- read.csv(paste("SixAirlinesDataV2.csv", sep= ""))
library(psych)
describe(airline)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
chi1 <- xtabs (~ PriceRelative + Airline, data=airline)
chisq.test(chi1)
## Warning in chisq.test(chi1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: chi1
## X-squared = 1402.9, df = 485, p-value < 2.2e-16
-> The p-value< 2.2e-16 which signifies that PriceRelative has a very high dependency on type of Airlines.
chi2 <- xtabs (~ PriceRelative + Aircraft, data=airline)
chisq.test(chi2)
## Warning in chisq.test(chi2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: chi2
## X-squared = 245.44, df = 97, p-value = 7.647e-15
-> The p-value = 7.647e-15 which signifies that PriceRelative has a very high dependency on Aircraft type. ## Pearson Chi-square Test for PriceRelative and Travel Month
chi3 <- xtabs (~ PriceRelative + TravelMonth, data=airline)
chisq.test(chi3)
## Warning in chisq.test(chi3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: chi3
## X-squared = 169.32, df = 291, p-value = 1
-> The p-value = 1 which signifies that PriceRelative has a no dependency on Travel MOnth.
chi4 <- xtabs (~ PriceRelative + IsInternational, data=airline)
chisq.test(chi4)
## Warning in chisq.test(chi4): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: chi4
## X-squared = 163.29, df = 97, p-value = 2.946e-05
-> The p-value = 2.946e-05 which signifies that PriceRelative has a very high dependency if flight is International or Domestic.
round(cor(airline$PriceRelative, airline$FlightDuration), 3)
## [1] 0.121
-> The correlation coefficient = 0.121 which signifies that there is very weak correlation between PriceRelative and Flight Duration
air_quant <- airline[,6:18]
round(cor(air_quant), 3)
## SeatsEconomy SeatsPremium PitchEconomy PitchPremium
## SeatsEconomy 1.000 0.625 0.144 0.119
## SeatsPremium 0.625 1.000 -0.034 0.005
## PitchEconomy 0.144 -0.034 1.000 -0.551
## PitchPremium 0.119 0.005 -0.551 1.000
## WidthEconomy 0.374 0.456 0.294 -0.024
## WidthPremium 0.102 -0.003 -0.539 0.750
## PriceEconomy 0.128 0.114 0.369 0.050
## PricePremium 0.177 0.218 0.226 0.089
## PriceRelative 0.004 -0.097 -0.423 0.418
## SeatsTotal 0.993 0.715 0.124 0.108
## PitchDifference 0.035 0.016 -0.783 0.951
## WidthDifference -0.081 -0.216 -0.636 0.703
## PercentPremiumSeats -0.331 0.485 -0.103 -0.175
## WidthEconomy WidthPremium PriceEconomy PricePremium
## SeatsEconomy 0.374 0.102 0.128 0.177
## SeatsPremium 0.456 -0.003 0.114 0.218
## PitchEconomy 0.294 -0.539 0.369 0.226
## PitchPremium -0.024 0.750 0.050 0.089
## WidthEconomy 1.000 0.082 0.068 0.151
## WidthPremium 0.082 1.000 -0.057 0.064
## PriceEconomy 0.068 -0.057 1.000 0.901
## PricePremium 0.151 0.064 0.901 1.000
## PriceRelative -0.044 0.504 -0.289 0.032
## SeatsTotal 0.405 0.091 0.132 0.192
## PitchDifference -0.127 0.760 -0.100 -0.018
## WidthDifference -0.393 0.884 -0.084 -0.012
## PercentPremiumSeats 0.227 -0.183 0.065 0.116
## PriceRelative SeatsTotal PitchDifference
## SeatsEconomy 0.004 0.993 0.035
## SeatsPremium -0.097 0.715 0.016
## PitchEconomy -0.423 0.124 -0.783
## PitchPremium 0.418 0.108 0.951
## WidthEconomy -0.044 0.405 -0.127
## WidthPremium 0.504 0.091 0.760
## PriceEconomy -0.289 0.132 -0.100
## PricePremium 0.032 0.192 -0.018
## PriceRelative 1.000 -0.012 0.469
## SeatsTotal -0.012 1.000 0.034
## PitchDifference 0.469 0.034 1.000
## WidthDifference 0.486 -0.106 0.761
## PercentPremiumSeats -0.162 -0.221 -0.093
## WidthDifference PercentPremiumSeats
## SeatsEconomy -0.081 -0.331
## SeatsPremium -0.216 0.485
## PitchEconomy -0.636 -0.103
## PitchPremium 0.703 -0.175
## WidthEconomy -0.393 0.227
## WidthPremium 0.884 -0.183
## PriceEconomy -0.084 0.065
## PricePremium -0.012 0.116
## PriceRelative 0.486 -0.162
## SeatsTotal -0.106 -0.221
## PitchDifference 0.761 -0.093
## WidthDifference 1.000 -0.276
## PercentPremiumSeats -0.276 1.000
-> The correlation martix signifies that there is very weak correlation between Price Relative Seats and SeatsEconomy, SeatsPremium, WidthEconomy, PricePremium, SeatsTotal and PercentPremiumSeats.
-> The matrix also tells that a weak correlation exists between Price Relative and PitchEconomy, PitchPremium, WidthPremium, PriceEconomy, PitchDifference and WidthDifference.
# Converting into integers
airline$Airline[airline$Res == 0] <- 'AirFrance'
airline$Airline[airline$Res == 1] <- 'British'
airline$Airline[airline$Res == 2] <- 'Delta'
airline$Airline[airline$Res == 3] <- 'Jet'
airline$Airline[airline$Res == 4] <- 'Singapore'
airline$Airline[airline$Res == 5] <- 'Virgin'
# convert Airline into factor variable
airline$Airline<- factor(airline$Airline)
# converting into integers
airline$Aircraft[airline$Res == 0] <- 'AirBus'
airline$Aircraft[airline$Res == 1] <- 'Boeing'
# convert Aircraft into factor variable
airline$Aircraft<- factor(airline$Aircraft)
# converting into integers
airline$IsInternational[airline$Res == 0] <- 'Domestic'
airline$IsInternational[airline$Res == 1] <- 'International'
# convert IsInternational into factor variable
airline$IsInternational<- factor(airline$IsInternational)
# check that the data types have changed to factor
str(airline)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
regress <- lm(PriceRelative ~ Airline + Aircraft + FlightDuration + IsInternational + PitchPremium + PitchDifference + WidthDifference, data = airline)
summary(regress)
##
## Call:
## lm(formula = PriceRelative ~ Airline + Aircraft + FlightDuration +
## IsInternational + PitchPremium + PitchDifference + WidthDifference,
## data = airline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81510 -0.19268 -0.05124 0.09981 1.47122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.029133 4.238674 -0.243 0.808275
## AirlineBritish 0.274168 0.115020 2.384 0.017559 *
## AirlineDelta 0.003609 0.198262 0.018 0.985486
## AirlineJet 0.531175 0.140365 3.784 0.000175 ***
## AirlineSingapore 0.308822 0.079777 3.871 0.000125 ***
## AirlineVirgin 0.366455 0.131493 2.787 0.005549 **
## AircraftBoeing -0.017079 0.046104 -0.370 0.711220
## FlightDuration 0.037123 0.006685 5.554 4.82e-08 ***
## IsInternationalInternational -0.469714 0.331689 -1.416 0.157436
## PitchPremium 0.026332 0.124471 0.212 0.832556
## PitchDifference 0.042179 0.077243 0.546 0.585306
## WidthDifference 0.087253 0.083851 1.041 0.298637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3599 on 446 degrees of freedom
## Multiple R-squared: 0.3774, Adjusted R-squared: 0.362
## F-statistic: 24.57 on 11 and 446 DF, p-value: < 2.2e-16
comp <- data.frame(airline$PriceRelative, (fitted(regress)))
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
some(comp)
## airline.PriceRelative X.fitted.regress..
## 31 0.34 0.3236282
## 52 1.03 0.3826546
## 97 0.26 0.9536572
## 123 0.11 0.2478988
## 168 0.91 0.8101917
## 225 0.08 0.1141994
## 341 0.36 0.2214965
## 372 0.14 0.6362081
## 400 0.74 1.0000616
## 413 0.99 0.6457624
What factors explain the difference in price between an economy ticket and a premium-economy airline ticket?
The analysis was carried out in order to know what factors contribute for the the difference in price between an economy ticket and a premium-economy airline ticket and following conclusions were drawn:
The major factors that contributed for the price change are Airline Brand (Air France, British, Delta, Jet, Singapore or Virgin) and Airline type (International or Domestic).
The minor factors that contributed for the price change are FlightDuration, PitchPremium, PitchDifference and WidthDifference.