air<- read.csv(paste("SixAirlinesDataV2.csv" , sep=''))
str(air)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
Description of data
library(psych)
attach(air)
describe(PriceRelative)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 458 0.49 0.45 0.36 0.42 0.41 0.02 1.89 1.87 1.17 0.72
## se
## X1 0.02
describe(PercentPremiumSeats)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 458 14.65 4.84 13.21 14.31 2.68 4.71 24.69 19.98 0.71 0.28
## se
## X1 0.23
describe( FlightDuration)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 458 7.58 3.54 7.79 7.57 4.81 1.25 14.66 13.41 -0.07 -1.12
## se
## X1 0.17
describe(PitchDifference )
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 458 6.69 1.76 7 6.76 0 2 10 8 -0.54 1.78 0.08
describe(WidthDifference)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 458 1.63 1.19 1 1.53 0 0 4 4 0.84 -0.53 0.06
describe( SeatsTotal)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 458 235.96 85.29 227 228.73 90.44 98 441 343 0.7 -0.53
## se
## X1 3.99
Histograms
par(mfrow =c(2,3))
hist(air$PriceRelative, col="gray",
xlab="PriceRelative",
main="PriceRelative")
hist(air$PercentPremiumSeats, col="green",
xlab="PercentPremiumSeats",
main="PercentPremiumSeats")
hist(air$FlightDuration, col="red",
xlab="FlightDuration",
main="FlightDuration")
hist(air$PitchDifference, col="blue",
xlab="PitchDifference",
main="PitchDifference")
hist(air$WidthDifference, col="yellow",
xlab="WidthDifference",
main="WidthDifference")
hist(air$SeatsTotal, col="black",
xlab=" SeatsTotal",
main=" SeatsTotal")
Scatterplot Matrix
library(car)
scatterplotMatrix(air[,c("PriceRelative","SeatsTotal","PercentPremiumSeats","PitchDifference","WidthDifference","FlightDuration")],
spread=FALSE, main="Scatter Plot Matrix")
Corrgram
library(corrgram)
corrgram(air, order=FALSE,
lower.panel=panel.shade,
upper.panel=panel.pie,
diag.panel=panel.minmax,
text.panel=panel.txt,
main="Corrgram of Air ticket price intercorrelations")
Multiple Regression Model Analysis
regtest <- lm(PriceRelative~ SeatsTotal + PercentPremiumSeats + PitchDifference + WidthDifference +FlightDuration +Airline + Aircraft + IsInternational
+ TravelMonth ,data = air)
summary(regtest)
##
## Call:
## lm(formula = PriceRelative ~ SeatsTotal + PercentPremiumSeats +
## PitchDifference + WidthDifference + FlightDuration + Airline +
## Aircraft + IsInternational + TravelMonth, data = air)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.86313 -0.20707 -0.05344 0.10901 1.46867
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.816e-02 2.938e-01 -0.198 0.843184
## SeatsTotal -7.584e-05 3.113e-04 -0.244 0.807611
## PercentPremiumSeats -1.347e-02 5.555e-03 -2.425 0.015702 *
## PitchDifference 6.354e-02 6.212e-02 1.023 0.306916
## WidthDifference 5.833e-02 8.204e-02 0.711 0.477513
## FlightDuration 3.479e-02 6.748e-03 5.156 3.82e-07 ***
## AirlineBritish 3.144e-01 1.123e-01 2.800 0.005332 **
## AirlineDelta 6.877e-02 1.852e-01 0.371 0.710612
## AirlineJet 5.230e-01 1.414e-01 3.699 0.000244 ***
## AirlineSingapore 3.051e-01 7.983e-02 3.822 0.000151 ***
## AirlineVirgin 4.521e-01 1.158e-01 3.904 0.000109 ***
## AircraftBoeing 6.997e-03 4.825e-02 0.145 0.884765
## IsInternationalInternational -3.523e-01 2.657e-01 -1.326 0.185520
## TravelMonthJul -1.853e-02 5.276e-02 -0.351 0.725672
## TravelMonthOct 5.427e-02 4.484e-02 1.210 0.226808
## TravelMonthSep -1.055e-02 4.469e-02 -0.236 0.813469
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3572 on 442 degrees of freedom
## Multiple R-squared: 0.3923, Adjusted R-squared: 0.3717
## F-statistic: 19.03 on 15 and 442 DF, p-value: < 2.2e-16
This project is about analyzing the pricing of Premium Economy tickets relative to regular Economy airline tickets. After observing data and histograms we find that price relative and WidthDifference have relatively very high standard deviation as compared to their mean stating huge deviations.
There is a positive correlation between SeatsTotal and SeatsEconomy, PitchPremium and PitchDifference , WidthPremium and withDifference, and lastly WidthDifference and PremiumDifference. There is a negative correlation between PitchDifference and PitchEconomy , WidthDifference and PitchEconomy and vice versa.
On running a mutiple regression test on pricerelative as dependent variable and SeatsTotal, PercentPremiumSeats, PitchDifference, WidthDifference, FlightDuration, Airline, Aircraft, IsInternational and TravelMonth as independent variables. PercentPremiumSeats, FlightDuration and Airline show statistically significant deviations. Hence, the difference in price between an economy ticket and a premium-economy airline ticket is explained by flight duration, percentage of premium seats and the airline.