MINI-PROJECT ON AIRLINE TICKET PRICING ANALYSIS UNDER GUIDANCE OF PROF.SAMEER MATHUR(PH.D) IIM-LUCKNOW.
----------------Premium Economy Vs Economy Ticket Pricing by Airlines--------------
#reading the csv file into R
sixairlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
attach(sixairlines.df)
library(psych)
summary(sixairlines.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
describe(sixairlines.df)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
--------Calculating the mean & medians-----------
mean(sixairlines.df$FlightDuration)
## [1] 7.577838
mean(sixairlines.df$SeatsEconomy)
## [1] 202.3122
mean(sixairlines.df$SeatsPremium)
## [1] 33.64847
mean(sixairlines.df$PitchEconomy)
## [1] 31.21834
mean(sixairlines.df$WidthEconomy)
## [1] 17.83843
mean(sixairlines.df$WidthPremium)
## [1] 19.47162
mean(sixairlines.df$PriceEconomy)
## [1] 1327.076
mean(sixairlines.df$PricePremium)
## [1] 1845.258
mean(sixairlines.df$PriceRelative)
## [1] 0.4872052
mean(sixairlines.df$SeatsTotal)
## [1] 235.9607
mean(sixairlines.df$PitchDifference)
## [1] 6.687773
mean(sixairlines.df$WidthDifference)
## [1] 1.633188
--------------------- medians for the following analysis -----------------------
median(sixairlines.df$FlightDuration)
## [1] 7.79
median(sixairlines.df$SeatsEconomy)
## [1] 185
median(sixairlines.df$SeatsPremium)
## [1] 36
median(sixairlines.df$PitchEconomy)
## [1] 31
median(sixairlines.df$WidthEconomy)
## [1] 18
median(sixairlines.df$WidthPremium)
## [1] 19
median(sixairlines.df$PriceEconomy)
## [1] 1242
median(sixairlines.df$PricePremium)
## [1] 1737
median(sixairlines.df$PriceRelative)
## [1] 0.365
median(sixairlines.df$SeatsTotal)
## [1] 227
median(sixairlines.df$PitchDifference)
## [1] 7
median(sixairlines.df$WidthDifference)
## [1] 1
## Price analysis for premium aswell as Economy.
plot(~PriceEconomy + PricePremium, main="Premium Economy Price vs Economy Price")
abline(0,1)
## Histogram for Pitchdifference
library(lattice)
histogram(~PitchDifference, data = sixairlines.df,
main = "Pitch Difference Analysis", xlab="Difference in Pitch", ylab = "Total_percentage", col='yellow' )
## Histogram for Widthdifference
library(lattice)
histogram(~WidthDifference, data = sixairlines.df,
main = "Distribution of Difference in Seat Width", xlab="Difference in Seat Width", ylab = "Total_percentage", col='orange' )
##analysing pitch difference
pitchDifference <- table(sixairlines.df$PitchDifference)
pitchDifference
##
## 2 3 6 7 10
## 24 16 121 243 54
- - - - - - - - - - - Scatterplots for the various classes - - - - - - - - - - - - -
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
attach(sixairlines.df)
## The following objects are masked from sixairlines.df (pos = 6):
##
## Aircraft, Airline, FlightDuration, IsInternational,
## PercentPremiumSeats, PitchDifference, PitchEconomy,
## PitchPremium, PriceEconomy, PricePremium, PriceRelative,
## SeatsEconomy, SeatsPremium, SeatsTotal, TravelMonth,
## WidthDifference, WidthEconomy, WidthPremium
scatterplot(PriceEconomy~FlightDuration,main="Scatterplot for PriceEconomy versus FlightDuration", xlab="Flight_Duration",ylab="Price_Economy")
scatterplot(PricePremium~FlightDuration,main="Scatterplot for Price Premium versus FlightDuration", xlab="Flight_Duration",ylab="Price_Premium", col='red')
library(car)
scatterplotMatrix(formula=~SeatsEconomy+SeatsPremium+PriceEconomy+PricePremium,cex=0.8,data = sixairlines.df)
library(car)
scatterplotMatrix(formula=~PriceRelative+PitchDifference+WidthDifference+PercentPremiumSeats,cex=0.8,data = sixairlines.df)
counts <- table(sixairlines.df$Airline)
barplot(counts,main="Distribution of airlines",xlab="Airline",col = 'brown')
##visualizing the data using boxplot
boxplot(FlightDuration~Airline,data = sixairlines.df,xlab="Airlines",ylab="FlightDuration",col=c("skyblue","purple","orange","green","yellow"),main="Flight Duration for Different Airlines")
- - - - - - - Boxplot to analyse various categories - - - - - - - - - -
boxplot(sixairlines.df$PitchDifference,main="Pitch Difference",horizontal=TRUE, col="blue")
boxplot(sixairlines.df$WidthDifference,main="Width Difference",horizontal=TRUE, col="blue")
boxplot(sixairlines.df$PriceEconomy~sixairlines.df$Airline,col=c("skyblue","purple","orange","green","yellow"),main="Airlines versus Price_Economy",horizontal = TRUE)
plot(sixairlines.df$Airline,sixairlines.df$PriceRelative,main="Flight Type vs Relative Price",xlab="Flight Type",ylab="Relative Price",col=c("grey","pink","red","white","black"),horizontal = TRUE)
boxplot(sixairlines.df$PricePremium , xlab= "PricePremium ", main= "PricePremium distribution boxplot",col="green", horizontal = TRUE)
``` ———using corrgram————-
library(corrgram)
corrgram(sixairlines.df, order = T, text.panel=panel.txt,lower.panel = panel.shade,upper.panel = panel.pie, main="Corrgram of all variables")
------testing correlation between priceRelative & pitchDifference-------------
cor.test(sixairlines.df$PriceRelative,sixairlines.df$PitchDifference)
##
## Pearson's product-moment correlation
##
## data: sixairlines.df$PriceRelative and sixairlines.df$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3940262 0.5372817
## sample estimates:
## cor
## 0.4687302
-------Null hypothesis because the test value is lessthan 0.05 we can straight forwardly reject the hypothesis-----
-------Testing correlation between PriceRelative & WidthDifference------------
cor.test(sixairlines.df$PriceRelative,sixairlines.df$WidthDifference)
##
## Pearson's product-moment correlation
##
## data: sixairlines.df$PriceRelative and sixairlines.df$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4125388 0.5528218
## sample estimates:
## cor
## 0.4858024
-------Null hypothesis because the test value is lessthan 0.05 we can straight forwardly reject the hypothesis-----
------------T-Test analysis-----------------------
t.test(sixairlines.df$PriceRelative, sixairlines.df$PitchDifference)
##
## Welch Two Sample t-test
##
## data: sixairlines.df$PriceRelative and sixairlines.df$PitchDifference
## t = -72.974, df = 516.54, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.367495 -6.033640
## sample estimates:
## mean of x mean of y
## 0.4872052 6.6877729
t.test(sixairlines.df$PriceRelative,sixairlines.df$PercentPremiumSeats)
##
## Welch Two Sample t-test
##
## data: sixairlines.df$PriceRelative and sixairlines.df$PercentPremiumSeats
## t = -62.302, df = 464.91, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.60477 -13.71164
## sample estimates:
## mean of x mean of y
## 0.4872052 14.6454148
------Linear model analysis/regression model-------------
mini.df <- lm(formula = PriceRelative ~ PitchDifference + WidthDifference + PercentPremiumSeats, data = sixairlines.df)
summary(mini.df)
##
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference +
## PercentPremiumSeats, data = sixairlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.88643 -0.29471 -0.05005 0.19013 1.17157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.031508 0.097220 -0.324 0.746
## PitchDifference 0.064596 0.016171 3.994 7.56e-05 ***
## WidthDifference 0.104782 0.024813 4.223 2.92e-05 ***
## PercentPremiumSeats -0.005764 0.003971 -1.451 0.147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3882 on 454 degrees of freedom
## Multiple R-squared: 0.2627, Adjusted R-squared: 0.2579
## F-statistic: 53.93 on 3 and 454 DF, p-value: < 2.2e-16
fit <- lm(sixairlines.df$PriceRelative ~ sixairlines.df$TravelMonth)
summary(fit)
##
## Call:
## lm(formula = sixairlines.df$PriceRelative ~ sixairlines.df$TravelMonth)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4908 -0.3779 -0.1179 0.2523 1.4321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.47661 0.04005 11.899 <2e-16 ***
## sixairlines.df$TravelMonthJul 0.02205 0.06574 0.335 0.737
## sixairlines.df$TravelMonthOct 0.04417 0.05665 0.780 0.436
## sixairlines.df$TravelMonthSep -0.01871 0.05643 -0.332 0.740
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4514 on 454 degrees of freedom
## Multiple R-squared: 0.002997, Adjusted R-squared: -0.003591
## F-statistic: 0.4549 on 3 and 454 DF, p-value: 0.714
-------Insight/conclusion-----------
There fore we can reject the following hypothisis because we got the values which are less than our standard value which is 0.05. still we can also state that the prices were really high when the travel month as well as class changes,as premium economy class has a huge facilites and they can still provide many other services for this passengers.this was the only reason many people chooses for their comfortness.still almost 83% was noticed by conducting the following tests.