airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airlines.df)
summary(airlines.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
library(psych)
describe(airlines.df)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
boxplot(airlines.df$SeatsEconomy ~ airlines.df$Airline, horizontal=TRUE,
xlab="No. of Economy Seats", las=1,
main="Visulaizing No. of Economy Seats in each Airline",
col=c("red","blue","peachpuff","yellow", "green", "pink"))
boxplot(airlines.df$SeatsPremium ~ airlines.df$Airline, horizontal=TRUE,
xlab="No. of Premium Economy Seats", las=1,
main="Visulaizing No. of Premium Economy Seats in each Airline",
col=c("red","blue","peachpuff","yellow", "green", "pink"))
boxplot(airlines.df$PitchEconomy ~ airlines.df$Airline, horizontal=TRUE,
xlab="Economy Pitch", las=1,
main="Visulaizing Economy Pitch in each Airline",
col=c("red","blue","peachpuff","yellow", "green", "pink"))
boxplot(airlines.df$WidthEconomy ~ airlines.df$Airline, horizontal=TRUE,
xlab="Economy Width", las=1,
main="Visulaizing Economy Width in each Airline",
col=c("red","blue","peachpuff","yellow", "green", "pink"))
boxplot(airlines.df$PitchDifference ~ airlines.df$Airline, horizontal=TRUE,
xlab="Pitch Difference", las=1,
main="Visulaizing Pitch difference in each Airline",
col=c("red","blue","peachpuff","yellow", "green", "pink"))
boxplot(airlines.df$WidthDifference ~ airlines.df$Airline, horizontal=TRUE,
xlab="Width Difference", las=1,
main="Visulaizing width difference in each Airline",
col=c("red","blue","peachpuff","yellow", "green", "pink"))
boxplot(airlines.df$PriceEconomy ~ airlines.df$Airline, horizontal=TRUE,
xlab="Price of Economy", las=1,
main="Visulaizing Price of Economy in each Airline",
col=c("red","blue","peachpuff","yellow", "green", "pink"))
plot(airlines.df$PitchDifference, airlines.df$PriceRelative,
col="blue",
main="Effect of Pitch Difference on relative price",
xlab="Pitch difference", ylab="Relative price")
plot(airlines.df$WidthDifference, airlines.df$PriceRelative,
col="blue",
main="Effect of Width Difference on relative price",
xlab="Width difference", ylab="Relative price")
library(corrgram)
corrgram(airlines.df[, names(airlines.df)], order=FALSE,
main="Corrgram of airline variables",
lower.panel=panel.shade, upper.panel=panel.pie,
text.panel=panel.txt)
H1 = The effect of number of economy seats on relaive price is greater than that of premium economy seats.
library(MASS)
library(psych)
attach(airlines.df)
t.test(SeatsEconomy, SeatsPremium, paired = TRUE)
##
## Paired t-test
##
## data: SeatsEconomy and SeatsPremium
## t = 52.414, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 162.3400 174.9875
## sample estimates:
## mean of the differences
## 168.6638
We can now reject the null hypothesis and accept the alternate hypothesis i.e. effect of effect of number of economy seats is greater than number of premium economy seats.
H1 = The effect of pitch of economy on relaive price is greater than that of pitch of premium economy.
library(MASS)
library(psych)
attach(airlines.df)
t.test(PitchEconomy, PitchPremium, paired = TRUE)
##
## Paired t-test
##
## data: PitchEconomy and PitchPremium
## t = -81.242, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.849544 -6.526002
## sample estimates:
## mean of the differences
## -6.687773
We can now reject the null hypothesis and accept the alternate hypothesis i.e. effect of pitch of economy seats is greater than pitch of premium economy seats.
H1 = The effect of width of economy on relaive price is greater than that of width of premium economy.
library(MASS)
library(psych)
attach(airlines.df)
t.test(WidthEconomy, WidthPremium, paired = TRUE)
##
## Paired t-test
##
## data: WidthEconomy and WidthPremium
## t = -29.389, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.742395 -1.523981
## sample estimates:
## mean of the differences
## -1.633188
We can now reject the null hypothesis and accept the alternate hypothesis i.e. effect of width of economy seats is greater than width of premium economy seats.
Run a regression of Relative Price on {PercentPremiumSeats, PitchDifference, WidthDifference, SeatsEconomy, SeatsPremium, PitchEconomy, PitchPremium, WidthEconomy, WidthPremium, PriceEconomy, PricePremium}.
fit <- lm(PriceRelative ~ PercentPremiumSeats+PitchDifference+WidthDifference+SeatsEconomy+SeatsPremium+PitchEconomy+PitchPremium+WidthEconomy+WidthPremium+PriceEconomy+PricePremium, data = airlines.df)
summary(fit)
##
## Call:
## lm(formula = PriceRelative ~ PercentPremiumSeats + PitchDifference +
## WidthDifference + SeatsEconomy + SeatsPremium + PitchEconomy +
## PitchPremium + WidthEconomy + WidthPremium + PriceEconomy +
## PricePremium, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86630 -0.10024 -0.00421 0.07693 0.83219
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.658e+00 1.085e+00 -6.135 1.88e-09 ***
## PercentPremiumSeats 2.389e-02 7.654e-03 3.121 0.001920 **
## PitchDifference 1.223e-01 1.373e-02 8.904 < 2e-16 ***
## WidthDifference 5.990e-02 1.642e-02 3.649 0.000294 ***
## SeatsEconomy 1.892e-03 5.387e-04 3.513 0.000488 ***
## SeatsPremium -1.660e-02 3.190e-03 -5.206 2.95e-07 ***
## PitchEconomy 2.454e-01 3.660e-02 6.706 6.05e-11 ***
## PitchPremium NA NA NA NA
## WidthEconomy -8.750e-02 2.716e-02 -3.221 0.001369 **
## WidthPremium NA NA NA NA
## PriceEconomy -8.559e-04 3.004e-05 -28.492 < 2e-16 ***
## PricePremium 5.911e-04 2.137e-05 27.660 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2241 on 448 degrees of freedom
## Multiple R-squared: 0.7575, Adjusted R-squared: 0.7527
## F-statistic: 155.5 on 9 and 448 DF, p-value: < 2.2e-16
The explanatory variable(s) whose beta-coefficients are statistically significant (p < 0.05) -
The explanatory variable(s) whose beta-coefficients are not statistically significant (p > 0.05) or who are not involved in the regression and showing NA -
p- value of the whole model is less than 2.2e-16 which is much less than 0.05 and therefore, the model as a whole is a good model for the prediction of profit.
The model has passed the F-Test most likely.
According to the Adjusted R-Squared, all the predictor variables taken explain a 75.27% of variance approximately. Since it is around 76%, we can say that the number of variables taken to calculate the effect on Relative Price is not very less and is just appropriate.
Since, for the PitchPremium and WidthPremium variables, the model is giving NA, we cannot come to a proper conclusion regarding their effect on Relative Price or price difference between economy and premium economy tickets.
There is a very positive relationship between (PitchDifference, WidthDifference, SeatsEconomy, SeatsPremium, PitchEconomy, PriceEconomy, PricePremium) variables and Price difference or relative price.
The inflence of SeatsEconomy is greater than SeatsPremium.
The inflence of PitchDifference is greater than WidthDifference.
Width b/w armrests in economy has more influence on price difference than width b/w armrests in premium economy.
Difference b/w economy seats has more influence on price difference than distance b/w premium economy seats.