sixair <- read.csv(paste("SixAirlinesDataV2.csv", sep= ""))
summary(sixair)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
library(psych)
describe(sixair)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
c1 <- xtabs (~ PriceRelative + Airline, data=sixair)
chisq.test(c1)
## Warning in chisq.test(c1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: c1
## X-squared = 1402.9, df = 485, p-value < 2.2e-16
Here the p value < 2.2e-16, which is less than 0.05, meaning null hypothesis is rejected and signifies that Airline is prominently impacting the Relative Price.
c2 <- xtabs (~ PriceRelative + Aircraft, data=sixair)
chisq.test(c2)
## Warning in chisq.test(c2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: c2
## X-squared = 245.44, df = 97, p-value = 7.647e-15
Here the p value = 7.647e-15, which is less than 0.05, meaning null hypothesis is rejected and signifies that Aircraft is more prominently impacting the Relative Price.
c3 <- xtabs (~ PriceRelative + TravelMonth, data=sixair)
chisq.test(c3)
## Warning in chisq.test(c3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: c3
## X-squared = 169.32, df = 291, p-value = 1
-> Here the p value =1,meaning null hypothesis is accepted and signifies that TravelMonth is not at all impacting the Relative Price.
c4 <- xtabs (~ PriceRelative + IsInternational, data=sixair)
chisq.test(c4)
## Warning in chisq.test(c4): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: c4
## X-squared = 163.29, df = 97, p-value = 2.946e-05
-> Here the p value < 2.946e-05, which is less than 0.05, meaning null hypothesis is rejected and signifies that International Flights is prominently impacting the Relative Price.
(cor(sixair$PriceRelative, sixair$FlightDuration))
## [1] 0.121075
-> Since the value of correlation coefficient is 0.121 , it tells that there is a very weak positive correlation between Flight Duration and Relative Price. 7) Correlation MAtrix
air_q <- sixair[,6:18]
round(cor(air_q), 4)
## SeatsEconomy SeatsPremium PitchEconomy PitchPremium
## SeatsEconomy 1.0000 0.6251 0.1441 0.1192
## SeatsPremium 0.6251 1.0000 -0.0342 0.0049
## PitchEconomy 0.1441 -0.0342 1.0000 -0.5506
## PitchPremium 0.1192 0.0049 -0.5506 1.0000
## WidthEconomy 0.3737 0.4558 0.2945 -0.0237
## WidthPremium 0.1024 -0.0027 -0.5393 0.7503
## PriceEconomy 0.1282 0.1136 0.3687 0.0504
## PricePremium 0.1770 0.2176 0.2261 0.0885
## PriceRelative 0.0040 -0.0972 -0.4230 0.4175
## SeatsTotal 0.9926 0.7152 0.1237 0.1075
## PitchDifference 0.0353 0.0164 -0.7825 0.9506
## WidthDifference -0.0807 -0.2162 -0.6356 0.7033
## PercentPremiumSeats -0.3309 0.4850 -0.1028 -0.1755
## WidthEconomy WidthPremium PriceEconomy PricePremium
## SeatsEconomy 0.3737 0.1024 0.1282 0.1770
## SeatsPremium 0.4558 -0.0027 0.1136 0.2176
## PitchEconomy 0.2945 -0.5393 0.3687 0.2261
## PitchPremium -0.0237 0.7503 0.0504 0.0885
## WidthEconomy 1.0000 0.0819 0.0680 0.1505
## WidthPremium 0.0819 1.0000 -0.0570 0.0640
## PriceEconomy 0.0680 -0.0570 1.0000 0.9014
## PricePremium 0.1505 0.0640 0.9014 1.0000
## PriceRelative -0.0440 0.5042 -0.2886 0.0318
## SeatsTotal 0.4055 0.0913 0.1324 0.1923
## PitchDifference -0.1272 0.7601 -0.0995 -0.0181
## WidthDifference -0.3932 0.8841 -0.0845 -0.0115
## PercentPremiumSeats 0.2271 -0.1833 0.0653 0.1164
## PriceRelative SeatsTotal PitchDifference
## SeatsEconomy 0.0040 0.9926 0.0353
## SeatsPremium -0.0972 0.7152 0.0164
## PitchEconomy -0.4230 0.1237 -0.7825
## PitchPremium 0.4175 0.1075 0.9506
## WidthEconomy -0.0440 0.4055 -0.1272
## WidthPremium 0.5042 0.0913 0.7601
## PriceEconomy -0.2886 0.1324 -0.0995
## PricePremium 0.0318 0.1923 -0.0181
## PriceRelative 1.0000 -0.0116 0.4687
## SeatsTotal -0.0116 1.0000 0.0342
## PitchDifference 0.4687 0.0342 1.0000
## WidthDifference 0.4858 -0.1058 0.7609
## PercentPremiumSeats -0.1616 -0.2209 -0.0926
## WidthDifference PercentPremiumSeats
## SeatsEconomy -0.0807 -0.3309
## SeatsPremium -0.2162 0.4850
## PitchEconomy -0.6356 -0.1028
## PitchPremium 0.7033 -0.1755
## WidthEconomy -0.3932 0.2271
## WidthPremium 0.8841 -0.1833
## PriceEconomy -0.0845 0.0653
## PricePremium -0.0115 0.1164
## PriceRelative 0.4858 -0.1616
## SeatsTotal -0.1058 -0.2209
## PitchDifference 0.7609 -0.0926
## WidthDifference 1.0000 -0.2756
## PercentPremiumSeats -0.2756 1.0000
-> From the above correlation martix, we get to know that there is very weak correlation between Price Relative and SeatsEconomy, SeatsPremium, WidthEconomy, PricePremium, SeatsTotal and PercentPremiumSeats. Also there is a weak correlation between Price Relative and PitchEconomy, PitchPremium, WidthPremium, PriceEconomy, PitchDifference and WidthDifference.
# Converting into integers
sixair$Airline[sixair$Res == 0] <- 'AirFrance'
sixair$Airline[sixair$Res == 1] <- 'British'
sixair$Airline[sixair$Res == 2] <- 'Delta'
sixair$Airline[sixair$Res == 3] <- 'Jet'
sixair$Airline[sixair$Res == 4] <- 'Singapore'
sixair$Airline[sixair$Res == 5] <- 'Virgin'
# convert Airline into factor variable
sixair$Airline<- factor(sixair$Airline)
# converting into integers
sixair$Aircraft[sixair$Res == 0] <- 'AirBus'
sixair$Aircraft[sixair$Res == 1] <- 'Boeing'
# convert Aircraft into factor variable
sixair$Aircraft<- factor(sixair$Aircraft)
# converting into integers
sixair$IsInternational[sixair$Res == 0] <- 'Domestic'
sixair$IsInternational[sixair$Res == 1] <- 'International'
# convert IsInternational into factor variable
sixair$IsInternational<- factor(sixair$IsInternational)
# checking that the data types have changed to factor
str(sixair)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
9)Regression Analysis
reg <- lm(PriceRelative ~ Airline + Aircraft + FlightDuration + IsInternational + PitchPremium + PitchDifference + WidthDifference, data = sixair)
summary(reg)
##
## Call:
## lm(formula = PriceRelative ~ Airline + Aircraft + FlightDuration +
## IsInternational + PitchPremium + PitchDifference + WidthDifference,
## data = sixair)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81510 -0.19268 -0.05124 0.09981 1.47122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.029133 4.238674 -0.243 0.808275
## AirlineBritish 0.274168 0.115020 2.384 0.017559 *
## AirlineDelta 0.003609 0.198262 0.018 0.985486
## AirlineJet 0.531175 0.140365 3.784 0.000175 ***
## AirlineSingapore 0.308822 0.079777 3.871 0.000125 ***
## AirlineVirgin 0.366455 0.131493 2.787 0.005549 **
## AircraftBoeing -0.017079 0.046104 -0.370 0.711220
## FlightDuration 0.037123 0.006685 5.554 4.82e-08 ***
## IsInternationalInternational -0.469714 0.331689 -1.416 0.157436
## PitchPremium 0.026332 0.124471 0.212 0.832556
## PitchDifference 0.042179 0.077243 0.546 0.585306
## WidthDifference 0.087253 0.083851 1.041 0.298637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3599 on 446 degrees of freedom
## Multiple R-squared: 0.3774, Adjusted R-squared: 0.362
## F-statistic: 24.57 on 11 and 446 DF, p-value: < 2.2e-16
regv <- data.frame(sixair$PriceRelative, (fitted(reg)))
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
some(regv)
## sixair.PriceRelative X.fitted.reg..
## 21 0.34 0.3919354
## 109 0.47 0.6440065
## 112 0.36 0.5727294
## 117 0.11 0.2790825
## 223 0.08 0.1141994
## 325 0.13 0.6780574
## 359 0.03 0.2615899
## 378 0.37 0.4969949
## 395 1.04 0.9907807
## 410 0.99 0.6457624
-> According to the statistical tests and correlation and regression analysis, The factors that explain the the difference in price between an economy ticket and a premium-economy airline ticket are Airline Brand (Air France, British, Delta, Jet, Singapore or Virgin) and Airline type (International or Domestic) and Aircraft. However,some parameters like FlightDuration, PitchDifference, PitchPremium, and WidthDifference were having a minimal impact for the difference in price of the ticket.