setwd("C:/Users/Dell/Downloads/Sameer Mathur")
airlines.df<- read.csv("SixAirlinesDataV2.csv")
View(airlines.df)
summary(airlines.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
library(psych)
describe(airlines.df)[,1:5]
## vars n mean sd median
## Airline* 1 458 3.01 1.65 2.00
## Aircraft* 2 458 1.67 0.47 2.00
## FlightDuration 3 458 7.58 3.54 7.79
## TravelMonth* 4 458 2.56 1.17 3.00
## IsInternational* 5 458 1.91 0.28 2.00
## SeatsEconomy 6 458 202.31 76.37 185.00
## SeatsPremium 7 458 33.65 13.26 36.00
## PitchEconomy 8 458 31.22 0.66 31.00
## PitchPremium 9 458 37.91 1.31 38.00
## WidthEconomy 10 458 17.84 0.56 18.00
## WidthPremium 11 458 19.47 1.10 19.00
## PriceEconomy 12 458 1327.08 988.27 1242.00
## PricePremium 13 458 1845.26 1288.14 1737.00
## PriceRelative 14 458 0.49 0.45 0.36
## SeatsTotal 15 458 235.96 85.29 227.00
## PitchDifference 16 458 6.69 1.76 7.00
## WidthDifference 17 458 1.63 1.19 1.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21
par(mfrow=c(2,2))
boxplot(airlines.df$SeatsEconomy,xlab="Seats in Economy" ,horizontal = TRUE)
boxplot(airlines.df$SeatsPremium,xlab="Seats in Premium" , horizontal = TRUE)
boxplot(airlines.df$PriceEconomy,xlab="Price of Economy", horizontal = TRUE)
boxplot(airlines.df$PricePremium,xlab="Price of Premium" ,horizontal = TRUE)
boxplot(airlines.df$PriceRelative,xlab="Relative Price difference", horizontal = TRUE)
boxplot(airlines.df$PitchDifference,xlab="Difference in Pitch" ,horizontal = TRUE)
boxplot(airlines.df$WidthDifference,xlab="Difference in Width" ,horizontal = TRUE)
boxplot(airlines.df$PriceEconomy~airlines.df$Airline,ylab="Price in economy",verticalal=TRUE,las=1)
boxplot(airlines.df$PricePremium~airlines.df$Airline,ylab="Price in premium",verticalal=TRUE,las=1)
boxplot(airlines.df$PriceRelative~airlines.df$Airline,ylab="Relative price",vertical=TRUE,las=1)
par(mfrow=c(1,2))
boxplot(airlines.df$PriceEconomy~airlines.df$WidthEconomy,xlab="Width in economy",ylab="Price in economy",vertical=TRUE,las=1)
boxplot(airlines.df$PricePremium~airlines.df$WidthPremium,xlab="Width in premium",ylab="Price in premium",vertical=TRUE,las=1)
boxplot(airlines.df$PriceRelative ~ airlines.df$WidthDifference,xlab="Relative price",ylab="Difference in width",horizontal=TRUE,las=1)
boxplot(airlines.df$PriceRelative ~ airlines.df$PitchDifference,xlab="Relative price",ylab="Difference in pitch",horizontal=TRUE,las=1)
airline<- subset(airlines.df,,select = c(FlightDuration,SeatsEconomy,SeatsPremium,PitchEconomy,PitchPremium,WidthEconomy,WidthPremium,PriceEconomy,PricePremium,PriceRelative,SeatsTotal,PitchDifference,WidthDifference,PercentPremiumSeats))
View(airline)
cov(airline)
## FlightDuration SeatsEconomy SeatsPremium
## FlightDuration 12.5462183 52.9194291 7.57372426
## SeatsEconomy 52.9194291 5832.9154300 633.07060954
## SeatsPremium 7.5737243 633.0706095 175.86521648
## PitchEconomy 0.6817421 7.2117665 -0.29725856
## PitchPremium 0.4477835 11.9637325 0.08508595
## WidthEconomy 0.9014224 15.9105138 3.36977440
## WidthPremium 0.4019845 8.5832800 -0.03954019
## PriceEconomy 1983.5401655 9673.7944684 1489.38359627
## PricePremium 2959.9783043 17413.2541733 3717.36428960
## PriceRelative 0.1932368 0.1361699 -0.58078765
## SeatsTotal 60.4931534 6465.9860396 808.93582602
## PitchDifference -0.2339587 4.7519660 0.38234451
## WidthDifference -0.4994380 -7.3272338 -3.40931459
## PercentPremiumSeats 1.0379912 -122.3914537 31.14753127
## PitchEconomy PitchPremium WidthEconomy WidthPremium
## FlightDuration 0.6817421 0.44778348 0.90142242 0.40198446
## SeatsEconomy 7.2117665 11.96373253 15.91051379 8.58327998
## SeatsPremium -0.2972586 0.08508595 3.36977440 -0.03954019
## PitchEconomy 0.4292471 -0.47398546 0.10756500 -0.38766208
## PitchPremium -0.4739855 1.72639580 -0.01739081 1.08157435
## WidthEconomy 0.1075650 -0.01739081 0.31081765 0.05010845
## WidthPremium -0.3876621 1.08157435 0.05010845 1.20378776
## PriceEconomy 238.7031905 65.42513354 37.46095191 -61.85450011
## PricePremium 190.8517195 149.85356368 108.11611707 90.47997668
## PriceRelative -0.1248808 0.24719874 -0.01104335 0.24928593
## SeatsTotal 6.9145079 12.04881848 19.28028819 8.54373979
## PitchDifference -0.9032326 2.20038126 -0.12495581 1.46923643
## WidthDifference -0.4952271 1.09896515 -0.26070920 1.15367930
## PercentPremiumSeats -0.3261739 -1.11655834 0.61321816 -0.97393787
## PriceEconomy PricePremium PriceRelative
## FlightDuration 1983.54017 2959.97830 0.19323683
## SeatsEconomy 9673.79447 17413.25417 0.13616991
## SeatsPremium 1489.38360 3717.36429 -0.58078765
## PitchEconomy 238.70319 190.85172 -0.12488080
## PitchPremium 65.42513 149.85356 0.24719874
## WidthEconomy 37.46095 108.11612 -0.01104335
## WidthPremium -61.85450 90.47998 0.24928593
## PriceEconomy 976684.06198 1147494.76801 -128.49991725
## PricePremium 1147494.76801 1659293.11947 18.48428836
## PriceRelative -128.49992 18.48429 0.20302893
## SeatsTotal 11163.17806 21130.61846 -0.44461774
## PitchDifference -173.27806 -40.99816 0.37207954
## WidthDifference -99.31545 -17.63614 0.26032928
## PercentPremiumSeats 312.61077 726.01582 -0.35252750
## SeatsTotal PitchDifference WidthDifference
## FlightDuration 60.4931534 -0.2339587 -0.4994380
## SeatsEconomy 6465.9860396 4.7519660 -7.3272338
## SeatsPremium 808.9358260 0.3823445 -3.4093146
## PitchEconomy 6.9145079 -0.9032326 -0.4952271
## PitchPremium 12.0488185 2.2003813 1.0989652
## WidthEconomy 19.2802882 -0.1249558 -0.2607092
## WidthPremium 8.5437398 1.4692364 1.1536793
## PriceEconomy 11163.1780647 -173.2780570 -99.3154520
## PricePremium 21130.6184629 -40.9981558 -17.6361404
## PriceRelative -0.4446177 0.3720795 0.2603293
## SeatsTotal 7274.9218656 5.1343105 -10.7365484
## PitchDifference 5.1343105 3.1036138 1.5941922
## WidthDifference -10.7365484 1.5941922 1.4143885
## PercentPremiumSeats -91.2439224 -0.7903844 -1.5871560
## PercentPremiumSeats
## FlightDuration 1.0379912
## SeatsEconomy -122.3914537
## SeatsPremium 31.1475313
## PitchEconomy -0.3261739
## PitchPremium -1.1165583
## WidthEconomy 0.6132182
## WidthPremium -0.9739379
## PriceEconomy 312.6107669
## PricePremium 726.0158229
## PriceRelative -0.3525275
## SeatsTotal -91.2439224
## PitchDifference -0.7903844
## WidthDifference -1.5871560
## PercentPremiumSeats 23.4493343
library(corrgram)
corrgram(airlines.df,order=TRUE,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,main="Corrgram of airlines intercorrelations")
Hypothesis 1: The price of premium seats is greatly affected by the duration of the flight,the total number of seats,the number of premium seats,the difference in the width of the premium seats and the difference in the pitch of the premium seats.
library(psych)
t.test(airlines.df$PricePremium,airlines.df$FlightDuration,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PricePremium and airlines.df$FlightDuration
## t = 30.585, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1719.606 1955.754
## sample estimates:
## mean of the differences
## 1837.68
t.test(airlines.df$PricePremium,airlines.df$SeatsPremium,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PricePremium and airlines.df$SeatsPremium
## t = 30.164, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1693.583 1929.635
## sample estimates:
## mean of the differences
## 1811.609
t.test(airlines.df$PricePremium,airlines.df$SeatsTotal,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PricePremium and airlines.df$SeatsTotal
## t = 27.023, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1492.266 1726.328
## sample estimates:
## mean of the differences
## 1609.297
t.test(airlines.df$PricePremium,airlines.df$WidthDifference,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PricePremium and airlines.df$WidthDifference
## t = 30.629, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1725.338 1961.910
## sample estimates:
## mean of the differences
## 1843.624
t.test(airlines.df$PricePremium,airlines.df$PitchDifference,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PricePremium and airlines.df$PitchDifference
## t = 30.545, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1720.282 1956.858
## sample estimates:
## mean of the differences
## 1838.57
Hypothesis 2: The relative price difference is greatly affected by the duration of the flight,the total number of seats,the difference in the pitch of economy and premium seats and the difference in the width of economy and premium seats
t.test(airlines.df$PriceRelative,airlines.df$FlightDuration,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PriceRelative and airlines.df$FlightDuration
## t = -43.158, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.413501 -6.767765
## sample estimates:
## mean of the differences
## -7.090633
t.test(airlines.df$PriceRelative,airlines.df$SeatsTotal,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PriceRelative and airlines.df$SeatsTotal
## t = -59.078, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -243.3062 -227.6408
## sample estimates:
## mean of the differences
## -235.4735
t.test(airlines.df$PriceRelative,airlines.df$PitchDifference,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PriceRelative and airlines.df$PitchDifference
## t = -82.896, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.347561 -6.053574
## sample estimates:
## mean of the differences
## -6.200568
t.test(airlines.df$PriceRelative,airlines.df$WidthDifference,paired = TRUE)
##
## Paired t-test
##
## data: airlines.df$PriceRelative and airlines.df$WidthDifference
## t = -23.418, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.242149 -1.049816
## sample estimates:
## mean of the differences
## -1.145983
From the testing of Hypothesis 1 we can say that the hypothesis cannot be considered null and all the factors affect the price of premium seats
final<-lm(PricePremium~FlightDuration+SeatsTotal+SeatsPremium+WidthDifference+PitchDifference,data = airlines.df)
summary(final)
##
## Call:
## lm(formula = PricePremium ~ FlightDuration + SeatsTotal + SeatsPremium +
## WidthDifference + PitchDifference, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2273.0 -605.0 -19.5 701.9 4485.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.3415 238.4197 0.085 0.93205
## FlightDuration 235.6225 12.9664 18.172 < 2e-16 ***
## SeatsTotal -0.8036 0.7592 -1.058 0.29041
## SeatsPremium 19.9846 5.0617 3.948 9.13e-05 ***
## WidthDifference 258.7604 62.3947 4.147 4.02e-05 ***
## PitchDifference -129.4945 40.8960 -3.166 0.00165 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 955.9 on 452 degrees of freedom
## Multiple R-squared: 0.4553, Adjusted R-squared: 0.4493
## F-statistic: 75.57 on 5 and 452 DF, p-value: < 2.2e-16
we can therefore generate a formula to calculate approximately the price of premium seats. PricePremium = 20.3415+235.6225FlightDuration + 19.9846SeatsPremium + 258.7604WidthDifference -129.4945PitchDifference.
From the testing of Hypothesis 1 we can say that the hypothesis cannot be considered null and all the factors affect the relative pricing of the seats.
final1<-lm(PriceRelative~FlightDuration+SeatsTotal+PitchDifference+WidthDifference, data = airlines.df)
summary(final1)
##
## Call:
## lm(formula = PriceRelative ~ FlightDuration + SeatsTotal + PitchDifference +
## WidthDifference, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7518 -0.2876 -0.0572 0.1593 1.1593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.403e-01 9.510e-02 -2.527 0.011854 *
## FlightDuration 2.200e-02 5.180e-03 4.247 2.63e-05 ***
## SeatsTotal -9.444e-05 2.176e-04 -0.434 0.664447
## PitchDifference 5.590e-02 1.590e-02 3.515 0.000483 ***
## WidthDifference 1.281e-01 2.377e-02 5.390 1.14e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3819 on 453 degrees of freedom
## Multiple R-squared: 0.2878, Adjusted R-squared: 0.2816
## F-statistic: 45.77 on 4 and 453 DF, p-value: < 2.2e-16
we can therefore generate a formula to calculate approximately the relative price similarly.