Airline Project
airline= read.csv('SixAirlinesDataV2.csv')
View(airline)
library(psych)
Summary of data
describe(airline)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
summary(airline)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
Economy Class Seats
hist(airline$SeatsEconomy, xlab="Seats Economy",col = "blue",main = "Economy class")

barplot(airline$SeatsEconomy)

boxplot(airline$SeatsEconomy,col="green")

Premium Class Seats
hist(airline$SeatsPremium, xlab="Seats Premium",col = "blue",main="Premium class")

barplot(airline$SeatsPremium)

boxplot(airline$SeatsPremium,col="green")

Economy Class Pitch
hist(airline$PitchEconomy, xlab="Pitch Economy",col = "blue",main="Economy class Pitch")

barplot(airline$PitchEconomy)

boxplot(airline$PitchEconomy,col="green")

Premium Class Pitch
hist(airline$PitchPremium, xlab="Pitch Premium",col = "green",main="Premium class Pitch")

barplot(airline$PitchPremium)

boxplot(airline$PitchPremium,col="green")

Economy Class Width
hist(airline$WidthEconomy, xlab="Width Economy",col ="blue", main = "Economy class Width")

barplot(airline$WidthEconomy)

boxplot(airline$WidthEconomy, col="green")

Premium Class Width
hist(airline$WidthPremium, xlab="Width Premium", col="blue", main="Premium class Width")

barplot(airline$WidthPremium)

boxplot(airline$WidthPremium, col="green")

Economy Class Price
hist(airline$PriceEconomy, xlab="Price Economy", col="blue", main="Economy class Price")

barplot(airline$PriceEconomy)

boxplot(airline$PriceEconomy, col="green")

Premium Class price
hist(airline$PricePremium, xlab= "Price premium", col="blue", main = "Premiuim class Price")

barplot(airline$PricePremium)

boxplot(airline$PricePremium,col="green")

Relative Price
hist(airline$PriceRelative, xlab = "Relative Price", col = "blue", main = "Relative Price")

barplot(airline$PriceRelative)

boxplot(airline$PriceRelative, col = "green")

Scatter plots
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
Price vs Width
scatterplot(x= airline$PriceRelative, y= airline$WidthDifference, main="Price vs Width", xlab="Price", ylab="Width")

Price vs Duration
scatterplot(x= airline$PriceRelative, y= airline$FlightDuration, main="Price vs Duration", xlab="Price", ylab="Width")

Price vs Pitch
scatterplot(x=airline$PriceRelative, y=airline$PitchDifference, main="Price vs Pitch", xlab = "Price", ylab = "Pitch")

Corelation among Variables
round(cor(airline[ ,6:18]),2)
## SeatsEconomy SeatsPremium PitchEconomy PitchPremium
## SeatsEconomy 1.00 0.63 0.14 0.12
## SeatsPremium 0.63 1.00 -0.03 0.00
## PitchEconomy 0.14 -0.03 1.00 -0.55
## PitchPremium 0.12 0.00 -0.55 1.00
## WidthEconomy 0.37 0.46 0.29 -0.02
## WidthPremium 0.10 0.00 -0.54 0.75
## PriceEconomy 0.13 0.11 0.37 0.05
## PricePremium 0.18 0.22 0.23 0.09
## PriceRelative 0.00 -0.10 -0.42 0.42
## SeatsTotal 0.99 0.72 0.12 0.11
## PitchDifference 0.04 0.02 -0.78 0.95
## WidthDifference -0.08 -0.22 -0.64 0.70
## PercentPremiumSeats -0.33 0.49 -0.10 -0.18
## WidthEconomy WidthPremium PriceEconomy PricePremium
## SeatsEconomy 0.37 0.10 0.13 0.18
## SeatsPremium 0.46 0.00 0.11 0.22
## PitchEconomy 0.29 -0.54 0.37 0.23
## PitchPremium -0.02 0.75 0.05 0.09
## WidthEconomy 1.00 0.08 0.07 0.15
## WidthPremium 0.08 1.00 -0.06 0.06
## PriceEconomy 0.07 -0.06 1.00 0.90
## PricePremium 0.15 0.06 0.90 1.00
## PriceRelative -0.04 0.50 -0.29 0.03
## SeatsTotal 0.41 0.09 0.13 0.19
## PitchDifference -0.13 0.76 -0.10 -0.02
## WidthDifference -0.39 0.88 -0.08 -0.01
## PercentPremiumSeats 0.23 -0.18 0.07 0.12
## PriceRelative SeatsTotal PitchDifference
## SeatsEconomy 0.00 0.99 0.04
## SeatsPremium -0.10 0.72 0.02
## PitchEconomy -0.42 0.12 -0.78
## PitchPremium 0.42 0.11 0.95
## WidthEconomy -0.04 0.41 -0.13
## WidthPremium 0.50 0.09 0.76
## PriceEconomy -0.29 0.13 -0.10
## PricePremium 0.03 0.19 -0.02
## PriceRelative 1.00 -0.01 0.47
## SeatsTotal -0.01 1.00 0.03
## PitchDifference 0.47 0.03 1.00
## WidthDifference 0.49 -0.11 0.76
## PercentPremiumSeats -0.16 -0.22 -0.09
## WidthDifference PercentPremiumSeats
## SeatsEconomy -0.08 -0.33
## SeatsPremium -0.22 0.49
## PitchEconomy -0.64 -0.10
## PitchPremium 0.70 -0.18
## WidthEconomy -0.39 0.23
## WidthPremium 0.88 -0.18
## PriceEconomy -0.08 0.07
## PricePremium -0.01 0.12
## PriceRelative 0.49 -0.16
## SeatsTotal -0.11 -0.22
## PitchDifference 0.76 -0.09
## WidthDifference 1.00 -0.28
## PercentPremiumSeats -0.28 1.00
Corrgram of variables
library(corrgram)
corrgram(airline, order=FALSE, lower.panel=panel.shade, upper.panel=panel.pie,text.panel=panel.txt, main="Corrgram of Airlines variables")

t-test
t.test(airline$PriceRelative,airline$FlightDuration)
##
## Welch Two Sample t-test
##
## data: airline$PriceRelative and airline$FlightDuration
## t = -42.499, df = 471.79, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.418482 -6.762785
## sample estimates:
## mean of x mean of y
## 0.4872052 7.5778384
t.test(airline$PriceRelative, airline$PitchDifference)
##
## Welch Two Sample t-test
##
## data: airline$PriceRelative and airline$PitchDifference
## t = -72.974, df = 516.54, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.367495 -6.033640
## sample estimates:
## mean of x mean of y
## 0.4872052 6.6877729
Linear regression to find the significance of variables on Relative Price.
airline.lm = lm(PriceRelative~PitchDifference + WidthDifference + FlightDuration, data= airline)
summary(airline.lm)
##
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference +
## FlightDuration, data = airline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74824 -0.28372 -0.06032 0.16447 1.15720
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.254902 0.088846 -2.869 0.004309 **
## PitchDifference 0.054755 0.015667 3.495 0.000521 ***
## WidthDifference 0.129969 0.023356 5.565 4.5e-08 ***
## FlightDuration 0.021597 0.005092 4.241 2.7e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3816 on 454 degrees of freedom
## Multiple R-squared: 0.2876, Adjusted R-squared: 0.2828
## F-statistic: 61.08 on 3 and 454 DF, p-value: < 2.2e-16
Based on the analysis we can say that all the Variables have
significant relation with price relative
Conclusion
By analysing the above data we can say difference in
price between an economy ticket and a premium-economy airline ticket is due
to the width and pitch of the Premium- economy class.