airlines.df<-read.csv(paste("SixAirlinesDataV2.csv",sep=""))
View(airlines.df)
summary(airlines.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
boxplot(airlines.df$FlightDuration~airlines.df$Airline,xlab="Airlines",ylab="Flight Duration",main="Flight duration for different airlines")
plot(airlines.df$PitchDifference,airlines.df$PriceRelative,main="Pitch difference vs Relative Price",xlab="Pitch difference",ylab="Relative Price")
plot(airlines.df$WidthDifference,airlines.df$PriceRelative,main="Width difference vs Relative Price",xlab="Width difference",ylab="Relative Price")
library(car)
scatterplotMatrix(formula=~PriceRelative+PitchDifference+WidthDifference+PercentPremiumSeats,cex=0.5,data = airlines.df,main="Scatter plot Matrix")
cor(airlines.df[,6:18])
## SeatsEconomy SeatsPremium PitchEconomy PitchPremium
## SeatsEconomy 1.000000000 0.625056587 0.14412692 0.119221250
## SeatsPremium 0.625056587 1.000000000 -0.03421296 0.004883123
## PitchEconomy 0.144126924 -0.034212963 1.00000000 -0.550606241
## PitchPremium 0.119221250 0.004883123 -0.55060624 1.000000000
## WidthEconomy 0.373670252 0.455782883 0.29448586 -0.023740873
## WidthPremium 0.102431959 -0.002717527 -0.53929285 0.750259029
## PriceEconomy 0.128167220 0.113642176 0.36866123 0.050384550
## PricePremium 0.177000928 0.217612376 0.22614179 0.088539147
## PriceRelative 0.003956939 -0.097196009 -0.42302204 0.417539056
## SeatsTotal 0.992607966 0.715171053 0.12373524 0.107512784
## PitchDifference 0.035318044 0.016365566 -0.78254993 0.950591466
## WidthDifference -0.080670148 -0.216168666 -0.63557430 0.703281797
## PercentPremiumSeats -0.330935223 0.485029771 -0.10280880 -0.175487414
## WidthEconomy WidthPremium PriceEconomy PricePremium
## SeatsEconomy 0.37367025 0.102431959 0.12816722 0.17700093
## SeatsPremium 0.45578288 -0.002717527 0.11364218 0.21761238
## PitchEconomy 0.29448586 -0.539292852 0.36866123 0.22614179
## PitchPremium -0.02374087 0.750259029 0.05038455 0.08853915
## WidthEconomy 1.00000000 0.081918728 0.06799061 0.15054837
## WidthPremium 0.08191873 1.000000000 -0.05704522 0.06402004
## PriceEconomy 0.06799061 -0.057045224 1.00000000 0.90138870
## PricePremium 0.15054837 0.064020043 0.90138870 1.00000000
## PriceRelative -0.04396116 0.504247591 -0.28856711 0.03184654
## SeatsTotal 0.40545860 0.091297500 0.13243313 0.19232533
## PitchDifference -0.12722421 0.760121272 -0.09952511 -0.01806629
## WidthDifference -0.39320512 0.884149655 -0.08449975 -0.01151218
## PercentPremiumSeats 0.22714172 -0.183312058 0.06532232 0.11639097
## PriceRelative SeatsTotal PitchDifference
## SeatsEconomy 0.003956939 0.99260797 0.03531804
## SeatsPremium -0.097196009 0.71517105 0.01636557
## PitchEconomy -0.423022038 0.12373524 -0.78254993
## PitchPremium 0.417539056 0.10751278 0.95059147
## WidthEconomy -0.043961160 0.40545860 -0.12722421
## WidthPremium 0.504247591 0.09129750 0.76012127
## PriceEconomy -0.288567110 0.13243313 -0.09952511
## PricePremium 0.031846537 0.19232533 -0.01806629
## PriceRelative 1.000000000 -0.01156894 0.46873025
## SeatsTotal -0.011568942 1.00000000 0.03416915
## PitchDifference 0.468730249 0.03416915 1.00000000
## WidthDifference 0.485802437 -0.10584398 0.76089108
## PercentPremiumSeats -0.161565556 -0.22091465 -0.09264869
## WidthDifference PercentPremiumSeats
## SeatsEconomy -0.08067015 -0.33093522
## SeatsPremium -0.21616867 0.48502977
## PitchEconomy -0.63557430 -0.10280880
## PitchPremium 0.70328180 -0.17548741
## WidthEconomy -0.39320512 0.22714172
## WidthPremium 0.88414965 -0.18331206
## PriceEconomy -0.08449975 0.06532232
## PricePremium -0.01151218 0.11639097
## PriceRelative 0.48580244 -0.16156556
## SeatsTotal -0.10584398 -0.22091465
## PitchDifference 0.76089108 -0.09264869
## WidthDifference 1.00000000 -0.27559416
## PercentPremiumSeats -0.27559416 1.00000000
library(corrgram)
corrgram(airlines.df,order=FALSE,lower.panel=panel.shade,upper.panel=panel.pie,text.panel=panel.txt,main="Corrgram of airlines correlations")
Relative price is positively correlated with the pitch and width differences.Also there exists lesser correlation between relative price and percentage of premium seats.
t.test(airlines.df$PriceRelative,airlines.df$PitchDifference)
##
## Welch Two Sample t-test
##
## data: airlines.df$PriceRelative and airlines.df$PitchDifference
## t = -72.974, df = 516.54, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.367495 -6.033640
## sample estimates:
## mean of x mean of y
## 0.4872052 6.6877729
cor.test(airlines.df$PriceRelative,airlines.df$PitchDifference)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PriceRelative and airlines.df$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3940262 0.5372817
## sample estimates:
## cor
## 0.4687302
p value less than 0.05 in both the cases rejects the null hypothesis and hence shows that the relative price and pitch difference are interdependent
t.test(airlines.df$PriceRelative,airlines.df$WidthDifference)
##
## Welch Two Sample t-test
##
## data: airlines.df$PriceRelative and airlines.df$WidthDifference
## t = -19.284, df = 585.55, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.262697 -1.029268
## sample estimates:
## mean of x mean of y
## 0.4872052 1.6331878
cor.test(airlines.df$PriceRelative,airlines.df$WidthDifference)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PriceRelative and airlines.df$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4125388 0.5528218
## sample estimates:
## cor
## 0.4858024
p value less than 0.05 in both the cases rejects the null hypothesis and hence shows that the relative price and the difference in width are interdependent.
fit<-lm(formula = PriceRelative ~ PitchDifference + WidthDifference + PercentPremiumSeats, data = airlines.df)
summary(fit)
##
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference +
## PercentPremiumSeats, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.88643 -0.29471 -0.05005 0.19013 1.17157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.031508 0.097220 -0.324 0.746
## PitchDifference 0.064596 0.016171 3.994 7.56e-05 ***
## WidthDifference 0.104782 0.024813 4.223 2.92e-05 ***
## PercentPremiumSeats -0.005764 0.003971 -1.451 0.147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3882 on 454 degrees of freedom
## Multiple R-squared: 0.2627, Adjusted R-squared: 0.2579
## F-statistic: 53.93 on 3 and 454 DF, p-value: < 2.2e-16
This shows a significant p value(<0.05).Also the model fits properly and it is finally a good fit.