A deep analysis on factors that have significant effect on Price of Premium-economy class ticket and price of economy class ticket.
library(psych)
airline <- read.csv(paste("SixAirlines.csv",sep=""))
describe(airline)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
library(Amelia)
missmap(airline)
str(airline)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
library(plotly)
table(airline$Airline)
##
## AirFrance British Delta Jet Singapore Virgin
## 74 175 46 61 40 62
noairline <- plot_ly(
x = c("AirFrance", "British", "Delta","Jet","Singapore","Virgin"),
y = c(74,175,46,61,40,62),
name = "No. of Airline that Dataset have !",
type = "bar"
)
noairline
table(airline$Aircraft)
##
## AirBus Boeing
## 151 307
noaircraft <- plot_ly(
x = c("AirBus", "Boeing"),
y = c(151,307),
name = "No. of Aircraft that Dataset have !",
type = "bar"
)
noaircraft
describe(airline$FlightDuration)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 458 7.58 3.54 7.79 7.57 4.81 1.25 14.66 13.41 -0.07 -1.12
## se
## X1 0.17
boxplot(airline$FlightDuration,horizontal = TRUE,ylab=" ",xlab="Flight Duration",
main="Boxplot of Fligh tDuration",col="violet")
aggregate(airline$FlightDuration, by=list(Airline = airline$Airline), mean)
## Airline x
## 1 AirFrance 8.988514
## 2 British 7.854971
## 3 Delta 4.028913
## 4 Jet 4.143934
## 5 Singapore 10.481000
## 6 Virgin 9.250484
library(ggplot2)
table(airline$TravelMonth)
##
## Aug Jul Oct Sep
## 127 75 127 129
ggplot(airline, aes(TravelMonth, fill = Airline)) + geom_bar()+
labs(title = "Airlines Travel Month", x = "Travel Month", y = "Count of Flights")
table(airline$IsInternational)
##
## Domestic International
## 40 418
ggplot(airline, aes(IsInternational, fill = Airline)) + geom_bar()+
labs(title = "Is International ?", x = "Is International", y = "Count of Flights")
aggregate(airline$SeatsEconomy, by=list(Airline = airline$Aircraft), mean)
## Airline x
## 1 AirBus 245.5762
## 2 Boeing 181.0326
aggregate(airline$PitchEconomy, by=list(Airline = airline$Aircraft), mean)
## Airline x
## 1 AirBus 31.43709
## 2 Boeing 31.11075
qplot(PitchEconomy, data = airline, facets = Aircraft~Airline)
aggregate(airline$WidthEconomy, by=list(Airline = airline$Aircraft), mean)
## Airline x
## 1 AirBus 18.06623
## 2 Boeing 17.72638
qplot(WidthEconomy, data = airline, facets = Aircraft~Airline)
describe(airline$PriceEconomy)
## vars n mean sd median trimmed mad min max range skew
## X1 1 458 1327.08 988.27 1242 1244.4 1159.39 65 3593 3528 0.51
## kurtosis se
## X1 -0.88 46.18
boxplot(airline$PricePremium,xlab="Premium Price",horizontal = TRUE,col="violet")
aggregate(airline$PriceRelative, by=list(Airline = airline$Airline), mean)
## Airline x
## 1 AirFrance 0.2047297
## 2 British 0.4375429
## 3 Delta 0.1250000
## 4 Jet 0.9396721
## 5 Singapore 0.5297500
## 6 Virgin 0.7606452
aggregate(airline$SeatsTotal, by=list(Airline = airline$Airline), mean)
## Airline x
## 1 AirFrance 241.1622
## 2 British 259.7714
## 3 Delta 159.7826
## 4 Jet 155.9672
## 5 Singapore 274.8000
## 6 Virgin 272.7097
aggregate(airline$SeatsTotal, by=list(Airline = airline$Aircraft), mean)
## Airline x
## 1 AirBus 284.7285
## 2 Boeing 211.9739
aggregate(airline$PitchDifference, by=list(Airline = airline$Airline), mean)
## Airline x
## 1 AirFrance 6.000000
## 2 British 7.000000
## 3 Delta 3.000000
## 4 Jet 9.540984
## 5 Singapore 6.000000
## 6 Virgin 7.000000
aggregate(airline$WidthDifference, by=list(Airline = airline$Airline), mean)
## Airline x
## 1 AirFrance 1.4324324
## 2 British 1.0000000
## 3 Delta 0.3913043
## 4 Jet 3.6557377
## 5 Singapore 1.0000000
## 6 Virgin 3.0000000
library(car)
scatterplotMatrix(formula = ~ FlightDuration + PriceEconomy + PricePremium, cex=0.6,spread=FALSE, smoother.args=list(lty=2),
data=airline, diagonal="histogram",main="Relationship b/w Duration and Price of Economy and Premium ")
scatterplotMatrix(formula = ~PitchPremium + log(PriceRelative) + PitchDifference, cex=0.6,spread=FALSE, smoother.args=list(lty=2),
data=airline, diagonal="histogram",main="Relationship among Pitch Premium, Price Relative and Pitch Differrence")
scatterplotMatrix(formula = ~ PriceEconomy + SeatsEconomy +PricePremium + SeatsPremium, spread=FALSE,cex=0.6, smoother.args=list(lty=2),data=airline,diagonal = "histogram",main="Price of Economy,Premium and Seats Economy, Seats Premium")
round(cor(airline[,c(3,6:18)]),2)
## FlightDuration SeatsEconomy SeatsPremium PitchEconomy
## FlightDuration 1.00 0.20 0.16 0.29
## SeatsEconomy 0.20 1.00 0.63 0.14
## SeatsPremium 0.16 0.63 1.00 -0.03
## PitchEconomy 0.29 0.14 -0.03 1.00
## PitchPremium 0.10 0.12 0.00 -0.55
## WidthEconomy 0.46 0.37 0.46 0.29
## WidthPremium 0.10 0.10 0.00 -0.54
## PriceEconomy 0.57 0.13 0.11 0.37
## PricePremium 0.65 0.18 0.22 0.23
## PriceRelative 0.12 0.00 -0.10 -0.42
## SeatsTotal 0.20 0.99 0.72 0.12
## PitchDifference -0.04 0.04 0.02 -0.78
## WidthDifference -0.12 -0.08 -0.22 -0.64
## PercentPremiumSeats 0.06 -0.33 0.49 -0.10
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## FlightDuration 0.10 0.46 0.10 0.57
## SeatsEconomy 0.12 0.37 0.10 0.13
## SeatsPremium 0.00 0.46 0.00 0.11
## PitchEconomy -0.55 0.29 -0.54 0.37
## PitchPremium 1.00 -0.02 0.75 0.05
## WidthEconomy -0.02 1.00 0.08 0.07
## WidthPremium 0.75 0.08 1.00 -0.06
## PriceEconomy 0.05 0.07 -0.06 1.00
## PricePremium 0.09 0.15 0.06 0.90
## PriceRelative 0.42 -0.04 0.50 -0.29
## SeatsTotal 0.11 0.41 0.09 0.13
## PitchDifference 0.95 -0.13 0.76 -0.10
## WidthDifference 0.70 -0.39 0.88 -0.08
## PercentPremiumSeats -0.18 0.23 -0.18 0.07
## PricePremium PriceRelative SeatsTotal PitchDifference
## FlightDuration 0.65 0.12 0.20 -0.04
## SeatsEconomy 0.18 0.00 0.99 0.04
## SeatsPremium 0.22 -0.10 0.72 0.02
## PitchEconomy 0.23 -0.42 0.12 -0.78
## PitchPremium 0.09 0.42 0.11 0.95
## WidthEconomy 0.15 -0.04 0.41 -0.13
## WidthPremium 0.06 0.50 0.09 0.76
## PriceEconomy 0.90 -0.29 0.13 -0.10
## PricePremium 1.00 0.03 0.19 -0.02
## PriceRelative 0.03 1.00 -0.01 0.47
## SeatsTotal 0.19 -0.01 1.00 0.03
## PitchDifference -0.02 0.47 0.03 1.00
## WidthDifference -0.01 0.49 -0.11 0.76
## PercentPremiumSeats 0.12 -0.16 -0.22 -0.09
## WidthDifference PercentPremiumSeats
## FlightDuration -0.12 0.06
## SeatsEconomy -0.08 -0.33
## SeatsPremium -0.22 0.49
## PitchEconomy -0.64 -0.10
## PitchPremium 0.70 -0.18
## WidthEconomy -0.39 0.23
## WidthPremium 0.88 -0.18
## PriceEconomy -0.08 0.07
## PricePremium -0.01 0.12
## PriceRelative 0.49 -0.16
## SeatsTotal -0.11 -0.22
## PitchDifference 0.76 -0.09
## WidthDifference 1.00 -0.28
## PercentPremiumSeats -0.28 1.00
round(cor(airline$FlightDuration,airline$PitchPremium),2)
## [1] 0.1
round(cor(airline$FlightDuration,airline$PitchEconomy),2)
## [1] 0.29
round(cor(airline$PitchDifference,airline$PriceRelative),2)
## [1] 0.47
library(corrgram)
corrgram(airline[,c(3,6:18)], main="Corrgram of Airline Variables", lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,order=TRUE)
Null Hypothesis : There is no significant relation in factors affecting on Prices of Premium economy and economy tickets.
attach(airline)
cor.test(PricePremium,PriceEconomy)
##
## Pearson's product-moment correlation
##
## data: PricePremium and PriceEconomy
## t = 44.452, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8826622 0.9172579
## sample estimates:
## cor
## 0.9013887
We can reject the null hypothesis as its p value is less than 0.01.
Null Hypothesis : There is no significant relation between Price relative and PitchDifference
attach(airline)
cor.test(PriceRelative,PitchDifference)
##
## Pearson's product-moment correlation
##
## data: PriceRelative and PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3940262 0.5372817
## sample estimates:
## cor
## 0.4687302
We can reject the null hypothesis as its p value is less than 0.01.
fit <- lm(PriceRelative~.-Airline-Aircraft-TravelMonth-IsInternational,data=airline)
summary(fit)
##
## Call:
## lm(formula = PriceRelative ~ . - Airline - Aircraft - TravelMonth -
## IsInternational, data = airline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.77611 -0.10464 0.00752 0.07428 0.84432
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.091e+00 1.108e+00 -4.594 5.66e-06 ***
## FlightDuration 2.163e-02 4.490e-03 4.817 2.00e-06 ***
## SeatsEconomy 1.768e-03 5.264e-04 3.358 0.000851 ***
## SeatsPremium -1.501e-02 3.131e-03 -4.793 2.25e-06 ***
## PitchEconomy 1.085e-01 2.824e-02 3.840 0.000141 ***
## PitchPremium 1.151e-01 1.349e-02 8.530 2.28e-16 ***
## WidthEconomy -1.998e-01 2.878e-02 -6.940 1.39e-11 ***
## WidthPremium 5.969e-02 1.602e-02 3.725 0.000220 ***
## PriceEconomy -8.462e-04 2.939e-05 -28.788 < 2e-16 ***
## PricePremium 5.500e-04 2.253e-05 24.410 < 2e-16 ***
## SeatsTotal NA NA NA NA
## PitchDifference NA NA NA NA
## WidthDifference NA NA NA NA
## PercentPremiumSeats 2.211e-02 7.480e-03 2.956 0.003285 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2187 on 447 degrees of freedom
## Multiple R-squared: 0.7695, Adjusted R-squared: 0.7643
## F-statistic: 149.2 on 10 and 447 DF, p-value: < 2.2e-16
Some of the variables are not defined because of singularity means that the variables are not linearly independent. If we remove the variables that are giving NA in the above summary, we will obtain the same result for the rest of the variables. This is because the information given by those variables is already contained in the other variables and thus redundant.
fit1 <- lm(PriceRelative~.-Airline-Aircraft-TravelMonth-IsInternational-SeatsTotal-PitchDifference-WidthDifference,data=airline)
summary(fit1)
##
## Call:
## lm(formula = PriceRelative ~ . - Airline - Aircraft - TravelMonth -
## IsInternational - SeatsTotal - PitchDifference - WidthDifference,
## data = airline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.77611 -0.10464 0.00752 0.07428 0.84432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.091e+00 1.108e+00 -4.594 5.66e-06 ***
## FlightDuration 2.163e-02 4.490e-03 4.817 2.00e-06 ***
## SeatsEconomy 1.768e-03 5.264e-04 3.358 0.000851 ***
## SeatsPremium -1.501e-02 3.131e-03 -4.793 2.25e-06 ***
## PitchEconomy 1.085e-01 2.824e-02 3.840 0.000141 ***
## PitchPremium 1.151e-01 1.349e-02 8.530 2.28e-16 ***
## WidthEconomy -1.998e-01 2.878e-02 -6.940 1.39e-11 ***
## WidthPremium 5.969e-02 1.602e-02 3.725 0.000220 ***
## PriceEconomy -8.462e-04 2.939e-05 -28.788 < 2e-16 ***
## PricePremium 5.500e-04 2.253e-05 24.410 < 2e-16 ***
## PercentPremiumSeats 2.211e-02 7.480e-03 2.956 0.003285 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2187 on 447 degrees of freedom
## Multiple R-squared: 0.7695, Adjusted R-squared: 0.7643
## F-statistic: 149.2 on 10 and 447 DF, p-value: < 2.2e-16
fit1$coefficients
## (Intercept) FlightDuration SeatsEconomy
## -5.0913244706 0.0216285953 0.0017679358
## SeatsPremium PitchEconomy PitchPremium
## -0.0150054100 0.1084521113 0.1150557296
## WidthEconomy WidthPremium PriceEconomy
## -0.1997511456 0.0596927891 -0.0008462092
## PricePremium PercentPremiumSeats
## 0.0005500437 0.0221092157
# confidence intervals
confint(fit1)
## 2.5 % 97.5 %
## (Intercept) -7.2692846686 -2.9133642725
## FlightDuration 0.0128041066 0.0304530840
## SeatsEconomy 0.0007333483 0.0028025232
## SeatsPremium -0.0211586907 -0.0088521293
## PitchEconomy 0.0529453387 0.1639588838
## PitchPremium 0.0885484004 0.1415630589
## WidthEconomy -0.2563207040 -0.1431815873
## WidthPremium 0.0282018718 0.0911837063
## PriceEconomy -0.0009039769 -0.0007884415
## PricePremium 0.0005057590 0.0005943283
## PercentPremiumSeats 0.0074084342 0.0368099972
Factors affecting the difference in price between an economy ticket and a premium-economy airline ticket :-