airline_df<-read.csv(paste("SixAirlinesDataV2.csv", sep=""))
head(airline_df, n=10)
## Airline Aircraft FlightDuration TravelMonth IsInternational
## 1 British Boeing 12.25 Jul International
## 2 British Boeing 12.25 Aug International
## 3 British Boeing 12.25 Sep International
## 4 British Boeing 12.25 Oct International
## 5 British Boeing 8.16 Aug International
## 6 British Boeing 8.16 Sep International
## 7 British Boeing 8.16 Oct International
## 8 British Boeing 6.50 Aug International
## 9 British Boeing 6.50 Sep International
## 10 British Boeing 11.50 Sep International
## SeatsEconomy SeatsPremium PitchEconomy PitchPremium WidthEconomy
## 1 122 40 31 38 18
## 2 122 40 31 38 18
## 3 122 40 31 38 18
## 4 122 40 31 38 18
## 5 122 40 31 38 18
## 6 122 40 31 38 18
## 7 122 40 31 38 18
## 8 122 40 31 38 18
## 9 122 40 31 38 18
## 10 122 40 31 38 18
## WidthPremium PriceEconomy PricePremium PriceRelative SeatsTotal
## 1 19 2707 3725 0.38 162
## 2 19 2707 3725 0.38 162
## 3 19 2707 3725 0.38 162
## 4 19 2707 3725 0.38 162
## 5 19 1793 2999 0.67 162
## 6 19 1793 2999 0.67 162
## 7 19 1793 2999 0.67 162
## 8 19 1476 2997 1.03 162
## 9 19 1476 2997 1.03 162
## 10 19 1705 2989 0.75 162
## PitchDifference WidthDifference PercentPremiumSeats
## 1 7 1 24.69
## 2 7 1 24.69
## 3 7 1 24.69
## 4 7 1 24.69
## 5 7 1 24.69
## 6 7 1 24.69
## 7 7 1 24.69
## 8 7 1 24.69
## 9 7 1 24.69
## 10 7 1 24.69
summary(airline_df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
plot(airline_df$Airline, airline_df$SeatsEconomy, main="Airline vs No. of seats in Economy Class",col =c("orange","blue","light green","yellow","purple","black"))
plot(airline_df$TravelMonth,main = "Monthwise Travel",col="cyan")
plot(airline_df$IsInternational,main = "Graph showing number of domestic and international flights",col="purple")
boxplot(FlightDuration~Airline,data=airline_df,xlab="Airline", ylab="Flight duration",col = c("purple","navy","dark green","yellow","red","grey"))
library(car)
scatterplot(PriceRelative ~PitchDifference, data=airline_df,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter Plot of Relative Price vs Pitch Difference",
xlab="pitch difference",
ylab="price relative")
scatterplot(PriceRelative ~ WidthDifference, data= airline_df,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of Relative price vs Width Difference",
xlab="Width difference",
ylab="Price relative")
library(corrgram)
corrgram(airline_df, order=TRUE, upper.panel=panel.pie,lower.panel=panel.shade, text.panel=panel.txt,main="Corrgram")
-> From the above corrgram, we get to know that there is very weak correlation between Price Relative and SeatsEconomy, SeatsPremium, WidthEconomy, PricePremium, SeatsTotal and PercentPremiumSeats. Also there is a weak correlation between Price Relative and PitchEconomy, PitchPremium, WidthPremium, PriceEconomy, PitchDifference and WidthDifference.
H1: There is no relation between relative price and width difference. H2: There is no relation between relative price and pitch difference.
t.test(airline_df$PriceRelative,airline_df$WidthDifference)
##
## Welch Two Sample t-test
##
## data: airline_df$PriceRelative and airline_df$WidthDifference
## t = -19.284, df = 585.55, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.262697 -1.029268
## sample estimates:
## mean of x mean of y
## 0.4872052 1.6331878
Here, p value is less than 0.05 so H1 is rejected.
t.test(airline_df$PriceRelative,airline_df$PitchDifference)
##
## Welch Two Sample t-test
##
## data: airline_df$PriceRelative and airline_df$PitchDifference
## t = -72.974, df = 516.54, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.367495 -6.033640
## sample estimates:
## mean of x mean of y
## 0.4872052 6.6877729
Here again, p value is less than 0.05, therefore H2 is rejected as well.
# Converting into integers
airline_df$Airline[airline_df$Res == 0] <- 'AirFrance'
airline_df$Airline[airline_df$Res == 1] <- 'British'
airline_df$Airline[airline_df$Res == 2] <- 'Delta'
airline_df$Airline[airline_df$Res == 3] <- 'Jet'
airline_df$Airline[airline_df$Res == 4] <- 'Singapore'
airline_df$Airline[airline_df$Res == 5] <- 'Virgin'
# convert Airline into factor variable
airline_df$Airline<- factor(airline_df$Airline)
# converting into integers
airline_df$Aircraft[airline_df$Res == 0] <- 'AirBus'
airline_df$Aircraft[airline_df$Res == 1] <- 'Boeing'
# convert Aircraft into factor variable
airline_df$Aircraft<- factor(airline_df$Aircraft)
# converting into integers
airline_df$IsInternational[airline_df$Res == 0] <- 'Domestic'
airline_df$IsInternational[airline_df$Res == 1] <- 'International'
# convert IsInternational into factor variable
airline_df$IsInternational<- factor(airline_df$IsInternational)
# checking that the data types have changed to factor
str(airline_df)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
reg <- lm(PriceRelative ~ Airline + Aircraft + FlightDuration + IsInternational + PitchPremium + PitchDifference + WidthDifference, data = airline_df)
summary(reg)
##
## Call:
## lm(formula = PriceRelative ~ Airline + Aircraft + FlightDuration +
## IsInternational + PitchPremium + PitchDifference + WidthDifference,
## data = airline_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81510 -0.19268 -0.05124 0.09981 1.47122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.029133 4.238674 -0.243 0.808275
## AirlineBritish 0.274168 0.115020 2.384 0.017559 *
## AirlineDelta 0.003609 0.198262 0.018 0.985486
## AirlineJet 0.531175 0.140365 3.784 0.000175 ***
## AirlineSingapore 0.308822 0.079777 3.871 0.000125 ***
## AirlineVirgin 0.366455 0.131493 2.787 0.005549 **
## AircraftBoeing -0.017079 0.046104 -0.370 0.711220
## FlightDuration 0.037123 0.006685 5.554 4.82e-08 ***
## IsInternationalInternational -0.469714 0.331689 -1.416 0.157436
## PitchPremium 0.026332 0.124471 0.212 0.832556
## PitchDifference 0.042179 0.077243 0.546 0.585306
## WidthDifference 0.087253 0.083851 1.041 0.298637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3599 on 446 degrees of freedom
## Multiple R-squared: 0.3774, Adjusted R-squared: 0.362
## F-statistic: 24.57 on 11 and 446 DF, p-value: < 2.2e-16
regv <- data.frame(airline_df$PriceRelative, (fitted(reg)))
library(car)
some(regv)
## airline_df.PriceRelative X.fitted.reg..
## 88 0.40 0.4227479
## 93 0.48 0.9536572
## 116 0.04 0.2478988
## 133 0.16 0.3098950
## 148 0.29 0.1907261
## 154 0.09 0.1662881
## 158 1.82 0.8257836
## 238 0.03 0.1854765
## 354 0.03 0.1999649
## 454 0.58 0.9350954
According to the statistical tests and correlation and regression analysis, The factors that explain the the difference in price between an economy ticket and a premium-economy airline ticket are Airline Brand (Air France, British, Delta, Jet, Singapore or Virgin) and Airline type (International or Domestic) and Aircraft. However,some parameters like FlightDuration, PitchDifference, PitchPremium, and WidthDifference were having a minimal impact for the difference in price of the ticket.