air<- read.csv(paste("SixAirlinesDataV2.csv" , sep=''))
str(air)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

Description of data

library(psych)
attach(air)
describe(PriceRelative)
##    vars   n mean   sd median trimmed  mad  min  max range skew kurtosis
## X1    1 458 0.49 0.45   0.36    0.42 0.41 0.02 1.89  1.87 1.17     0.72
##      se
## X1 0.02
describe(PercentPremiumSeats)
##    vars   n  mean   sd median trimmed  mad  min   max range skew kurtosis
## X1    1 458 14.65 4.84  13.21   14.31 2.68 4.71 24.69 19.98 0.71     0.28
##      se
## X1 0.23
describe( FlightDuration)
##    vars   n mean   sd median trimmed  mad  min   max range  skew kurtosis
## X1    1 458 7.58 3.54   7.79    7.57 4.81 1.25 14.66 13.41 -0.07    -1.12
##      se
## X1 0.17
describe(PitchDifference )
##    vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 458 6.69 1.76      7    6.76   0   2  10     8 -0.54     1.78 0.08
describe(WidthDifference)
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 458 1.63 1.19      1    1.53   0   0   4     4 0.84    -0.53 0.06
describe( SeatsTotal)
##    vars   n   mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 458 235.96 85.29    227  228.73 90.44  98 441   343  0.7    -0.53
##      se
## X1 3.99

Histograms

par(mfrow =c(2,3))
hist(air$PriceRelative, col="gray",
xlab="PriceRelative",
main="PriceRelative")

hist(air$PercentPremiumSeats, col="green",
xlab="PercentPremiumSeats",
main="PercentPremiumSeats")

hist(air$FlightDuration, col="red",
xlab="FlightDuration",
main="FlightDuration")

hist(air$PitchDifference, col="blue",
xlab="PitchDifference",
main="PitchDifference")

hist(air$WidthDifference, col="yellow",
xlab="WidthDifference",
main="WidthDifference")

hist(air$SeatsTotal, col="black",
xlab=" SeatsTotal",
main=" SeatsTotal")

Scatterplot Matrix

library(car)
scatterplotMatrix(air[,c("PriceRelative","SeatsTotal","PercentPremiumSeats","PitchDifference","WidthDifference","FlightDuration")],
spread=FALSE, main="Scatter Plot Matrix")

Corrgram

library(corrgram)
corrgram(air, order=FALSE,
lower.panel=panel.shade,
upper.panel=panel.pie,
diag.panel=panel.minmax,
text.panel=panel.txt,
main="Corrgram of Air ticket price intercorrelations")

Multiple Regression Model Analysis

regtest <- lm(PriceRelative~ SeatsTotal + PercentPremiumSeats + PitchDifference + WidthDifference +FlightDuration +Airline + Aircraft + IsInternational
+ TravelMonth  ,data = air)
summary(regtest)
## 
## Call:
## lm(formula = PriceRelative ~ SeatsTotal + PercentPremiumSeats + 
##     PitchDifference + WidthDifference + FlightDuration + Airline + 
##     Aircraft + IsInternational + TravelMonth, data = air)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86313 -0.20707 -0.05344  0.10901  1.46867 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -5.816e-02  2.938e-01  -0.198 0.843184    
## SeatsTotal                   -7.584e-05  3.113e-04  -0.244 0.807611    
## PercentPremiumSeats          -1.347e-02  5.555e-03  -2.425 0.015702 *  
## PitchDifference               6.354e-02  6.212e-02   1.023 0.306916    
## WidthDifference               5.833e-02  8.204e-02   0.711 0.477513    
## FlightDuration                3.479e-02  6.748e-03   5.156 3.82e-07 ***
## AirlineBritish                3.144e-01  1.123e-01   2.800 0.005332 ** 
## AirlineDelta                  6.877e-02  1.852e-01   0.371 0.710612    
## AirlineJet                    5.230e-01  1.414e-01   3.699 0.000244 ***
## AirlineSingapore              3.051e-01  7.983e-02   3.822 0.000151 ***
## AirlineVirgin                 4.521e-01  1.158e-01   3.904 0.000109 ***
## AircraftBoeing                6.997e-03  4.825e-02   0.145 0.884765    
## IsInternationalInternational -3.523e-01  2.657e-01  -1.326 0.185520    
## TravelMonthJul               -1.853e-02  5.276e-02  -0.351 0.725672    
## TravelMonthOct                5.427e-02  4.484e-02   1.210 0.226808    
## TravelMonthSep               -1.055e-02  4.469e-02  -0.236 0.813469    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3572 on 442 degrees of freedom
## Multiple R-squared:  0.3923, Adjusted R-squared:  0.3717 
## F-statistic: 19.03 on 15 and 442 DF,  p-value: < 2.2e-16

This project is about analyzing the pricing of Premium Economy tickets relative to regular Economy airline tickets. After observing data and histograms we find that price relative and WidthDifference have relatively very high standard deviation as compared to their mean stating huge deviations.

There is a positive correlation between SeatsTotal and SeatsEconomy, PitchPremium and PitchDifference , WidthPremium and withDifference, and lastly WidthDifference and PremiumDifference. There is a negative correlation between PitchDifference and PitchEconomy , WidthDifference and PitchEconomy and vice versa.

On running a mutiple regression test on pricerelative as dependent variable and SeatsTotal, PercentPremiumSeats, PitchDifference, WidthDifference, FlightDuration, Airline, Aircraft, IsInternational and TravelMonth as independent variables. PercentPremiumSeats, FlightDuration and Airline show statistically significant deviations. Hence, the difference in price between an economy ticket and a premium-economy airline ticket is explained by flight duration, percentage of premium seats and the airline.