setwd("C:/Users/Kalyan/Downloads")
airlines<-read.csv(paste("SixAirlinesDataV2.csv",sep=""))
View(airlines)
summary(airlines)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
library(psych)
describe(airlines)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
str(airlines)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
boxplot(airlines$FlightDuration~airlines$Airline,xlab="Airlines",ylab="Flight Duration",main="Flight duration for differnt airlines")
plot(airlines$PitchDifference,airlines$PriceRelative,main="Pitch difference vs Relative Price",xlab="Pitch difference",ylab="Relative Price")
plot(airlines$WidthDifference,airlines$PriceRelative,main="Width difference vs Relative Price",xlab="Width difference",ylab="Relative Price")
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(formula=~PriceRelative+PitchDifference+WidthDifference+PercentPremiumSeats,cex=0.8,data = airlines,main="Scatter plot Matrix",spread=FALSE)
So we could see here the relative price increasing with the pitch and width difference factors.
Lets check the correlation between all the variables using correlation matrix.
cor(airlines[,6:18])
## SeatsEconomy SeatsPremium PitchEconomy PitchPremium
## SeatsEconomy 1.000000000 0.625056587 0.14412692 0.119221250
## SeatsPremium 0.625056587 1.000000000 -0.03421296 0.004883123
## PitchEconomy 0.144126924 -0.034212963 1.00000000 -0.550606241
## PitchPremium 0.119221250 0.004883123 -0.55060624 1.000000000
## WidthEconomy 0.373670252 0.455782883 0.29448586 -0.023740873
## WidthPremium 0.102431959 -0.002717527 -0.53929285 0.750259029
## PriceEconomy 0.128167220 0.113642176 0.36866123 0.050384550
## PricePremium 0.177000928 0.217612376 0.22614179 0.088539147
## PriceRelative 0.003956939 -0.097196009 -0.42302204 0.417539056
## SeatsTotal 0.992607966 0.715171053 0.12373524 0.107512784
## PitchDifference 0.035318044 0.016365566 -0.78254993 0.950591466
## WidthDifference -0.080670148 -0.216168666 -0.63557430 0.703281797
## PercentPremiumSeats -0.330935223 0.485029771 -0.10280880 -0.175487414
## WidthEconomy WidthPremium PriceEconomy PricePremium
## SeatsEconomy 0.37367025 0.102431959 0.12816722 0.17700093
## SeatsPremium 0.45578288 -0.002717527 0.11364218 0.21761238
## PitchEconomy 0.29448586 -0.539292852 0.36866123 0.22614179
## PitchPremium -0.02374087 0.750259029 0.05038455 0.08853915
## WidthEconomy 1.00000000 0.081918728 0.06799061 0.15054837
## WidthPremium 0.08191873 1.000000000 -0.05704522 0.06402004
## PriceEconomy 0.06799061 -0.057045224 1.00000000 0.90138870
## PricePremium 0.15054837 0.064020043 0.90138870 1.00000000
## PriceRelative -0.04396116 0.504247591 -0.28856711 0.03184654
## SeatsTotal 0.40545860 0.091297500 0.13243313 0.19232533
## PitchDifference -0.12722421 0.760121272 -0.09952511 -0.01806629
## WidthDifference -0.39320512 0.884149655 -0.08449975 -0.01151218
## PercentPremiumSeats 0.22714172 -0.183312058 0.06532232 0.11639097
## PriceRelative SeatsTotal PitchDifference
## SeatsEconomy 0.003956939 0.99260797 0.03531804
## SeatsPremium -0.097196009 0.71517105 0.01636557
## PitchEconomy -0.423022038 0.12373524 -0.78254993
## PitchPremium 0.417539056 0.10751278 0.95059147
## WidthEconomy -0.043961160 0.40545860 -0.12722421
## WidthPremium 0.504247591 0.09129750 0.76012127
## PriceEconomy -0.288567110 0.13243313 -0.09952511
## PricePremium 0.031846537 0.19232533 -0.01806629
## PriceRelative 1.000000000 -0.01156894 0.46873025
## SeatsTotal -0.011568942 1.00000000 0.03416915
## PitchDifference 0.468730249 0.03416915 1.00000000
## WidthDifference 0.485802437 -0.10584398 0.76089108
## PercentPremiumSeats -0.161565556 -0.22091465 -0.09264869
## WidthDifference PercentPremiumSeats
## SeatsEconomy -0.08067015 -0.33093522
## SeatsPremium -0.21616867 0.48502977
## PitchEconomy -0.63557430 -0.10280880
## PitchPremium 0.70328180 -0.17548741
## WidthEconomy -0.39320512 0.22714172
## WidthPremium 0.88414965 -0.18331206
## PriceEconomy -0.08449975 0.06532232
## PricePremium -0.01151218 0.11639097
## PriceRelative 0.48580244 -0.16156556
## SeatsTotal -0.10584398 -0.22091465
## PitchDifference 0.76089108 -0.09264869
## WidthDifference 1.00000000 -0.27559416
## PercentPremiumSeats -0.27559416 1.00000000
So we could see that there exists a good correlation between the prices of both classes.Also the relative price factor is in correlations with the pitch and width of the premium classes and even their differences as in pitch difference and width difference in between the economy and the premium economy class.
library(corrgram)
corrgram(airlines, order=FALSE,lower.panel=panel.shade,upper.panel=panel.pie,text.panel=panel.txt,main="Corrgram of airlines correlations")
So here lets observe from the correlations for the prices or the relative prices.We can see a strong positive correlation between prices of economy and premium economy.The premium economy price and the economy prices are strongly and positively correlated with the flight duration.Looking back to our question,we need to find out the reason for the difference between the prices of economy and premium economy classes.So,relative price is positively correlated with the pitch and width differences.Also there exists lesser correlation between relative price and percentage of premium seats.
t.test(airlines$PriceRelative,airlines$PitchDifference)
##
## Welch Two Sample t-test
##
## data: airlines$PriceRelative and airlines$PitchDifference
## t = -72.974, df = 516.54, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.367495 -6.033640
## sample estimates:
## mean of x mean of y
## 0.4872052 6.6877729
cor.test(airlines$PriceRelative,airlines$PitchDifference)
##
## Pearson's product-moment correlation
##
## data: airlines$PriceRelative and airlines$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3940262 0.5372817
## sample estimates:
## cor
## 0.4687302
p value less than 0.05 in both the cases shows a significant relationship between both these variables.It rejects the null hypothesis and hence shows that the relative price and pitch difference are dependent upon each other.
t.test(airlines$PriceRelative,airlines$WidthDifference)
##
## Welch Two Sample t-test
##
## data: airlines$PriceRelative and airlines$WidthDifference
## t = -19.284, df = 585.55, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.262697 -1.029268
## sample estimates:
## mean of x mean of y
## 0.4872052 1.6331878
cor.test(airlines$PriceRelative,airlines$WidthDifference)
##
## Pearson's product-moment correlation
##
## data: airlines$PriceRelative and airlines$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4125388 0.5528218
## sample estimates:
## cor
## 0.4858024
p value less than 0.05 in both the cases shows a significant relationship between both these variables.It rejects the null hypothesis and hence shows that the relative price and the difference in width are dependent upon each other.
t.test(airlines$PriceRelative,airlines$FlightDuration)
##
## Welch Two Sample t-test
##
## data: airlines$PriceRelative and airlines$FlightDuration
## t = -42.499, df = 471.79, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.418482 -6.762785
## sample estimates:
## mean of x mean of y
## 0.4872052 7.5778384
cor.test(airlines$PriceRelative,airlines$FlightDuration)
##
## Pearson's product-moment correlation
##
## data: airlines$PriceRelative and airlines$FlightDuration
## t = 2.6046, df = 456, p-value = 0.009498
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02977856 0.21036806
## sample estimates:
## cor
## 0.121075
p value less than 0.05 in both the cases shows a significant relationship between both these variables.It rejects the null hypothesis and hence shows that the relative price and flight duration are dependent upon each other.
t.test(airlines$PriceRelative,airlines$PriceEconomy)
##
## Welch Two Sample t-test
##
## data: airlines$PriceRelative and airlines$PriceEconomy
## t = -28.727, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1417.339 -1235.840
## sample estimates:
## mean of x mean of y
## 0.4872052 1327.0764192
cor.test(airlines$PriceRelative,airlines$PriceEconomy)
##
## Pearson's product-moment correlation
##
## data: airlines$PriceRelative and airlines$PriceEconomy
## t = -6.4359, df = 456, p-value = 3.112e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3704004 -0.2022889
## sample estimates:
## cor
## -0.2885671
p value less than 0.05 in both the cases shows a significant relationship between both these variables.It rejects the null hypothesis and hence shows that the relative price and economy seat prices are dependent upon each other.
fit<-lm(formula = PriceRelative ~ PitchDifference + WidthDifference + PercentPremiumSeats+FlightDuration+PriceEconomy+PricePremium, data = airlines)
summary(fit)
##
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference +
## PercentPremiumSeats + FlightDuration + PriceEconomy + PricePremium,
## data = airlines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71347 -0.14837 -0.00985 0.10434 0.93289
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.868e-01 6.563e-02 2.846 0.004623 **
## PitchDifference 3.967e-02 1.013e-02 3.914 0.000105 ***
## WidthDifference 8.796e-02 1.578e-02 5.573 4.32e-08 ***
## PercentPremiumSeats -1.379e-02 2.513e-03 -5.488 6.79e-08 ***
## FlightDuration 2.196e-02 4.280e-03 5.130 4.32e-07 ***
## PriceEconomy -7.049e-04 2.713e-05 -25.981 < 2e-16 ***
## PricePremium 4.674e-04 2.276e-05 20.536 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2404 on 451 degrees of freedom
## Multiple R-squared: 0.719, Adjusted R-squared: 0.7153
## F-statistic: 192.4 on 6 and 451 DF, p-value: < 2.2e-16
This shows a significant p value(<0.05).Also the model fits properly and it is finally a good fit with an adjusted R-squared value of 0.7153 and multiple R-squared value of 0.719.