R Markdown
airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airlines.df)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(airlines.df)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
hist(airlines.df$SeatsEconomy,
main="Histogram of Seats Economy",
xlab="Seats Of Economy Class" )

hist(airlines.df$SeatsPremium,
main="Histogram of Seats Premium",
xlab="Seats Of Premium Economy Class" )

hist(airlines.df$PitchEconomy,
main="Histogram of Pitch Economy",
xlab="Pitch Of Economy Class" )

hist(airlines.df$PitchPremium,
main="Histogram of Pitch Premium",
xlab="Pitch Of PremiumEconomy Class" )

hist(airlines.df$WidthEconomy,
main="Histogram of Seats Width Economy",
xlab="Seats Width Of Economy Class" )

hist(airlines.df$WidthPremium,
main="Histogram of Seats Width Premium",
xlab="Seats Width Of Premium Economy Class" )

hist(airlines.df$PriceEconomy,
main="Histogram of Price of Tickets In Economy Class",
xlab="Price of Tickets In Economy Class" )

hist(airlines.df$PricePremium,
main="Histogram of Price of Tickets In PremiumEconomy Class",
xlab="Price of Tickets In Economy Class" )

par(mfrow=c(1, 2))
plot(x=airlines.df$SeatsEconomy, y=airlines.df$SeatsPremium)
plot(x=airlines.df$PitchEconomy, y=airlines.df$PitchPremium)

plot(x=airlines.df$WidthEconomy, y=airlines.df$WidthPremium)
plot(x=airlines.df$PriceEconomy, y=airlines.df$PricePremium)

par(mfrow=c(1, 1))
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(airlines.df, order=FALSE, lower.panel=panel.shade, upper.panel=panel.pie, diag.panel=panel.minmax, text.panel=panel.txt, main="Corrgram of airlines.df intercorrelations")

## Hypothesis : 1. There is no effect of Seats, SeatWidth and Seatpitch On the Ticket Price Of Economic and Premium Economic Classes of Airlines .
## 2. There is no effect of No. of Seats in Economic Class To the No. of Seats in PRemium Economic class.
## 3. Price Of tickets in Economy Class to The Price in tickets of Premium Economy Class .
## There is no relation between Price Of Economy to Type Of flight(Domestic or International)
t.test(PriceEconomy ~ IsInternational ,data = airlines.df, var.equal = TRUE)
##
## Two Sample t-test
##
## data: PriceEconomy by IsInternational
## t = -6.8166, df = 456, p-value = 2.965e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1369.8658 -756.7693
## sample estimates:
## mean in group Domestic mean in group International
## 356.625 1419.943
t.test(PricePremium ~ IsInternational ,data = airlines.df, var.equal = TRUE)
##
## Two Sample t-test
##
## data: PricePremium by IsInternational
## t = -8.0006, df = 456, p-value = 1.033e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1991.772 -1206.246
## sample estimates:
## mean in group Domestic mean in group International
## 385.900 1984.909
## P-value < 0.05 . Therefore , The price Of tickets in Economy and Price Of tickets in Premium flight are dependent on type of Flight .
##Therefore , Successfull in rejecting the Hypothesis .
## Hypothesis :
##1.There is no effect of Seats, SeatWidth and Seatpitch On the Ticket Price Of Economic and Premium Economic Classes of Airlines .
Model1 = PriceEconomy~SeatsEconomy+WidthEconomy+PitchEconomy+IsInternational+TravelMonth + FlightDuration+Aircraft+Airline
effect <- lm(Model1 , data = airlines.df)
summary(effect)
##
## Call:
## lm(formula = Model1, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2076.00 -232.27 71.06 319.54 1195.57
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1046.4681 3343.4663 0.313 0.75444
## SeatsEconomy -0.7980 0.3958 -2.016 0.04440 *
## WidthEconomy -42.2485 101.4948 -0.416 0.67742
## PitchEconomy 9.8126 88.2097 0.111 0.91148
## IsInternationalInternational 1329.1036 269.5574 4.931 1.16e-06 ***
## TravelMonthJul 100.5417 77.5219 1.297 0.19533
## TravelMonthOct -45.8224 65.8492 -0.696 0.48688
## TravelMonthSep 0.8848 65.6567 0.013 0.98925
## FlightDuration 98.9510 9.5863 10.322 < 2e-16 ***
## AircraftBoeing 194.6460 68.8440 2.827 0.00491 **
## AirlineBritish -1368.1788 124.1997 -11.016 < 2e-16 ***
## AirlineDelta -677.5162 247.7959 -2.734 0.00650 **
## AirlineJet -2157.0207 176.3881 -12.229 < 2e-16 ***
## AirlineSingapore -1994.9912 179.3738 -11.122 < 2e-16 ***
## AirlineVirgin -1149.5624 135.4383 -8.488 3.20e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 524.6 on 443 degrees of freedom
## Multiple R-squared: 0.7269, Adjusted R-squared: 0.7182
## F-statistic: 84.21 on 14 and 443 DF, p-value: < 2.2e-16
## Thus : Regression Model :
## Price = b0 + b1SeatsEconomy + b2WidthEconomy + b3PitchEconomy + b4*IsInternational+b5*TravelMonth+b6*FlightDuration+b7Aircraft+b8Airline+e .
## Price = 1046.4681(-0.7980)*SeatEconomy+(-42.2485)*WidthEconomy+9.8126*PitchEconomy+1329.1036*IsInternational+100.5417*TravelMonthJul+(-45.8224)*TravelMonthOct+0.8848*TravelMothSep+98.9510*FightDuration+194.6460*AircraftBoeing+(-1368.1788)*AirlineBritish+(-677.5162)*AirlineDelta..
## P-value < 0.05 Therefore , Successfull in Rejecting the Null Hypothesis .
## Multiple R-squared = 72.69 % , The variables are strongly related .
## Pr(>|t|)
#SeatsEconomy 0.04440 *
#WidthEconomy 0.67742
#PitchEconomy 0.91148
#IsInternational 1.16e-06 ***
#TravelMonthJul 0.19533
#TravelMonthOct 0.48688
#TravelMonthSep 0.98925
#FlightDuration 2e-16 ***
#AircraftBoeing 0.00491 **
#AirlineBritish 2e-16 ***
#AirlineDelta 0.00650 **
#AirlineJet 2e-16 ***
#AirlineSingapore 2e-16 ***
#AirlineVirgin 3.20e-16 ***
## FRom the Individual p-values we come to know that the factors related for the Price Of ticket Im Economic class Are - SeatsEconomy,ISInternational,FlightDuration,AircraftBoeingand Airline .
Model1 = PricePremium~SeatsPremium+WidthPremium+PitchPremium+IsInternational+TravelMonth + FlightDuration+Aircraft+Airline
effect1 <- lm(Model1 , data = airlines.df)
summary(effect1)
##
## Call:
## lm(formula = Model1, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2094.8 -364.8 52.3 360.9 4387.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3930.8320 5562.5878 -0.707 0.48015
## SeatsPremium 0.2668 4.8987 0.054 0.95658
## WidthPremium 57.8484 224.6487 0.258 0.79691
## PitchPremium 82.9102 226.5945 0.366 0.71462
## IsInternationalInternational 1017.7044 722.8915 1.408 0.15988
## TravelMonthJul 89.6143 111.7754 0.802 0.42314
## TravelMonthOct -26.7792 94.9716 -0.282 0.77810
## TravelMonthSep -2.1725 94.7113 -0.023 0.98171
## FlightDuration 175.8219 14.1080 12.463 < 2e-16 ***
## AircraftBoeing 261.8917 91.7820 2.853 0.00453 **
## AirlineBritish -983.1179 136.8293 -7.185 2.87e-12 ***
## AirlineDelta -343.7183 562.5606 -0.611 0.54152
## AirlineJet -2081.9591 287.7882 -7.234 2.07e-12 ***
## AirlineSingapore -2173.3836 272.6682 -7.971 1.35e-14 ***
## AirlineVirgin -503.6319 475.1096 -1.060 0.28971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 756.3 on 443 degrees of freedom
## Multiple R-squared: 0.6659, Adjusted R-squared: 0.6553
## F-statistic: 63.06 on 14 and 443 DF, p-value: < 2.2e-16
## p-value is less than 0.05 . Hence Successfully Rejected the Null Hypothesis.
## Multiple R-squared: 0.6659 i.e 66.59 percent . Hence variables are strongly related.
## 2. There is no effect of No. of Seats in Economic Class To the No. of Seats in PRemium Economic class.
effect2 <- lm(SeatsPremium~SeatsEconomy , data = airlines.df)
summary(effect2)
##
## Call:
## lm(formula = SeatsPremium ~ SeatsEconomy, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.273 -6.645 -2.064 10.424 17.936
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.690683 1.372381 8.519 2.37e-16 ***
## SeatsEconomy 0.108534 0.006347 17.100 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.36 on 456 degrees of freedom
## Multiple R-squared: 0.3907, Adjusted R-squared: 0.3894
## F-statistic: 292.4 on 1 and 456 DF, p-value: < 2.2e-16
## Thus : Regression Model :
## SeatsPremium = b0 + b1SeatsEconomy + e .
## SeatsPremium = 11.690683+0.108534 *SeatEconomy
## P-value < 0.05 Therefore , Successfull in Rejecting the Null Hypothesis .
## Multiple R-squared = 39.07 % , The variables are moderately related .
effect3 <- lm(PricePremium~PriceEconomy , data = airlines.df)
summary(effect3)
##
## Call:
## lm(formula = PricePremium ~ PriceEconomy, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -805.5 -315.6 -111.1 157.5 3483.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 286.09096 43.71533 6.544 1.61e-10 ***
## PriceEconomy 1.17489 0.02643 44.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 558.4 on 456 degrees of freedom
## Multiple R-squared: 0.8125, Adjusted R-squared: 0.8121
## F-statistic: 1976 on 1 and 456 DF, p-value: < 2.2e-16
## Thus : Regression Model :
## PricePremium = b0 + b1PriceEconomy + e .
## SeatsPremium = 286.09096+1.17489 *PriceEconomy
## P-value < 0.05 Therefore , Successfull in Rejecting the Null Hypothesis .
## Multiple R-squared = 81.25 % , The variables are strongly related .
## Factors on Which difference between PriceEconomy and Price Premium is dependent.
Model1 = PriceRelative~SeatsEconomy+WidthEconomy+PitchEconomy+IsInternational+TravelMonth + FlightDuration+Aircraft+Airline
effect <- lm(Model1 , data = airlines.df)
summary(effect)
##
## Call:
## lm(formula = Model1, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.79257 -0.21656 -0.04377 0.11681 1.47095
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.5367478 2.2920593 2.416 0.01611 *
## SeatsEconomy 0.0004907 0.0002714 1.808 0.07127 .
## WidthEconomy -0.0874560 0.0695781 -1.257 0.20944
## PitchEconomy -0.1329101 0.0604707 -2.198 0.02847 *
## IsInternationalInternational 0.0133091 0.1847907 0.072 0.94262
## TravelMonthJul -0.0191317 0.0531439 -0.360 0.71902
## TravelMonthOct 0.0528023 0.0451419 1.170 0.24275
## TravelMonthSep -0.0128973 0.0450099 -0.287 0.77460
## FlightDuration 0.0358711 0.0065717 5.458 8.01e-08 ***
## AircraftBoeing 0.0179156 0.0471949 0.380 0.70442
## AirlineBritish 0.1705060 0.0851431 2.003 0.04583 *
## AirlineDelta 0.0902847 0.1698725 0.531 0.59535
## AirlineJet 0.6657522 0.1209200 5.506 6.24e-08 ***
## AirlineSingapore 0.3824173 0.1229668 3.110 0.00199 **
## AirlineVirgin 0.4458910 0.0928475 4.802 2.15e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3596 on 443 degrees of freedom
## Multiple R-squared: 0.3825, Adjusted R-squared: 0.363
## F-statistic: 19.6 on 14 and 443 DF, p-value: < 2.2e-16
## According to independent p-values Factors are :
## PitchEconomy , Flight Duration , AirlineBritish , AirlineJet , AirlineSingapore , AirlineVirgin .
## Mean Price Of Economic And Premium Economic class .
mean(airlines.df$PriceEconomy)
## [1] 1327.076
mean(airlines.df$PricePremium)
## [1] 1845.258