title: “Airlines Travel Class review Case” author: “Puneet Rajput” output: html_document
airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airlines.df)
summary(airlines.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
library(car)
scatterplot(PricePremium~PriceEconomy, data=airlines.df, spread=FALSE, smoother.args=list(lty=2), pch=19,xlab="Economy Price", ylab="Premium Price", main= "Premium Vs. Economy Price")

Most of the people prefer travelling through economy class due to it’s low price
Analysing the role of pitch difference.
PD <- table(airlines.df$PitchDifference)
PD
##
## 2 3 6 7 10
## 24 16 121 243 54
mean(airlines.df$PitchDifference)
## [1] 6.687773
CD <- aggregate(PriceRelative ~ PitchDifference, data= airlines.df,mean)
CD
## PitchDifference PriceRelative
## 1 2 0.08708333
## 2 3 0.08125000
## 3 6 0.34082645
## 4 7 0.51888889
## 5 10 0.97074074
library(lattice)
hist(airlines.df$PitchDifference, xlab= "pitch difference", main="Pich Difference")

boxplot(PriceRelative~PitchDifference,data=airlines.df, main="Relative Price Difference vs. Pitch", ylab="Pitch Difference", xlab="Relative Price b/w Economy and Premium Economy", horizontal=TRUE)

scatterplot(PriceRelative ~ PitchDifference, data=airlines.df, spread=FALSE, smoother.args=list(lty=2), pch=19, xlab="Pitch", main="Relative PriceDifference Vs. Pitch")

Average pitch difference = 6.687, most frequently occuring pitch Difference = 7 with an average PriceRelative = 0.518
BoxPlot and scatterplot reveals that the increased pitch difference increases the price of the premium class
Analysing the role of WidthDifference.
PD <- table(airlines.df$WidthDifference)
PD
##
## 0 1 2 3 4
## 40 264 32 68 54
mean(airlines.df$WidthDifference)
## [1] 1.633188
CD <- aggregate(PriceRelative ~ WidthDifference, data= airlines.df,mean)
CD
## WidthDifference PriceRelative
## 1 0 0.0847500
## 2 1 0.4184091
## 3 2 0.2296875
## 4 3 0.7282353
## 5 4 0.9707407
library(lattice)
hist(airlines.df$WidthDifference, xlab= "width difference", main="width Difference")

boxplot(PriceRelative~WidthDifference,data=airlines.df, main="Relative Price Difference vs. Width", ylab="Width Difference", xlab="Relative Price b/w Economy and Premium Economy", horizontal=TRUE)

scatterplot(PriceRelative ~ WidthDifference, data=airlines.df, spread=FALSE, smoother.args=list(lty=2), pch=19,xlab="Width", main="Relative Price Difference Vs. Width")

Average Width Difference = 1.633, most frequently occuring width Difference = 1 with an average PriceRelative = 0.418
BoxPlot and scatterplot reveals that the increased WidthDifference increases the price of Premium Class
Correlation Visualisation
attach(airlines.df)
scatterplotMatrix(~PriceRelative+PricePremium+PriceEconomy+PercentPremiumSeats, main="Premium Vs. Economy Travel Class Analysis")

scatterplotMatrix(~PriceEconomy+PricePremium+WidthDifference+PitchDifference, main="Premium Vs. Economy Travel Class Analysis")

library(corrplot)
## corrplot 0.84 loaded
corrplot(corr = cor(airlines.df[,c("PriceRelative","PricePremium","PriceEconomy","PitchDifference","WidthDifference","PercentPremiumSeats")], use="complete.obs"),method="ellipse")

library(corrgram)
corrgram(airlines.df[,c("PriceRelative","PricePremium","PriceEconomy","PitchDifference","WidthDifference","PercentPremiumSeats")], lower.panel = panel.shade, text.panel = panel.txt, upper.panel = panel.pie,diag.panel = panel.minmax)

Correlation numerics
options(digits=2)
cor(airlines.df[,c("PriceRelative","PricePremium","PriceEconomy","PitchDifference","WidthDifference","PercentPremiumSeats")], use="complete.obs", method="kendall")
## PriceRelative PricePremium PriceEconomy
## PriceRelative 1.0000 -0.0078 -0.2042
## PricePremium -0.0078 1.0000 0.8060
## PriceEconomy -0.2042 0.8060 1.0000
## PitchDifference 0.4224 -0.0887 -0.1863
## WidthDifference 0.3370 0.1034 0.0236
## PercentPremiumSeats 0.0050 0.0084 -0.0028
## PitchDifference WidthDifference PercentPremiumSeats
## PriceRelative 0.422 0.337 0.0050
## PricePremium -0.089 0.103 0.0084
## PriceEconomy -0.186 0.024 -0.0028
## PitchDifference 1.000 0.595 0.1613
## WidthDifference 0.595 1.000 -0.1698
## PercentPremiumSeats 0.161 -0.170 1.0000
attach(airlines.df)
## The following objects are masked from airlines.df (pos = 5):
##
## Aircraft, Airline, FlightDuration, IsInternational,
## PercentPremiumSeats, PitchDifference, PitchEconomy,
## PitchPremium, PriceEconomy, PricePremium, PriceRelative,
## SeatsEconomy, SeatsPremium, SeatsTotal, TravelMonth,
## WidthDifference, WidthEconomy, WidthPremium
t.test(PricePremium,PriceEconomy)
##
## Welch Two Sample t-test
##
## data: PricePremium and PriceEconomy
## t = 7, df = 900, p-value = 2e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 369 667
## sample estimates:
## mean of x mean of y
## 1845 1327
t.test(PitchPremium,PitchEconomy)
##
## Welch Two Sample t-test
##
## data: PitchPremium and PitchEconomy
## t = 100, df = 700, p-value <2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.6 6.8
## sample estimates:
## mean of x mean of y
## 38 31
t.test(WidthPremium,PitchPremium)
##
## Welch Two Sample t-test
##
## data: WidthPremium and PitchPremium
## t = -200, df = 900, p-value <2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -19 -18
## sample estimates:
## mean of x mean of y
## 19 38
Prices and Pitch/widthdifference are negatively and positively correlated with the PriceRelative
Pitch and width differences are positively correlated with each other
Premuim and Economy Price are strongly correlated positively
Pitches, Widths and Prices of Premium and Economy class are statistically significant with each other
Prices are proved statistically correlated with each other
Pitch and Width differences aren’t statistically associated with each other