1

Read the data into R

a.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))

2

Summarize the data to understand the mean, median, standard deviation of each variable

summary(a.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

3

Comparing Premium Economy Ticket Prices and Economy Ticket Prices.

plot(~a.df$PriceEconomy + a.df$PricePremium, main="Premium Economy Price vs. Economy Price")
abline(0,1)

Analysing Pitch Difference of Premium Economy seats and the pitch of Economy seats.

library(lattice)
histogram(~a.df$PitchDifference, main = "Distribution of Pitch Difference", xlab="Difference in Pitch")

Analysing effect of Pitch Difference on the relative price of Economy and Premium Economy.

rel_pr = aggregate(cbind(PriceEconomy,PricePremium, PriceRelative) ~ PitchDifference, data = a.df, mean)
library(car)
scatterplot(rel_pr$PitchDifference, rel_pr$PriceRelative, main="Relative Price Difference & Pitch", xlab="Pitch Difference", ylab="Relative Price b/w Economy and Premium Economy")
## Warning in smoother(.x, .y, col = col[2], log.x = logged("x"), log.y =
## logged("y"), : could not fit positive part of the spread

Analysing effect of Pitch Difference on the price of Economy and Premium Economy.

boxplot(a.df$PriceRelative~a.df$PitchDifference, main="Relative Price Difference vs.Pitch", ylab="Pitch Difference", xlab="Relative Price b/w Economy and Premium Economy")

Comparing distribution of the difference in the width of Premium Economy seats and the width of Economy seats.

 library(lattice)
histogram(~a.df$WidthDifference, main = "Distribution of Difference in Seat Width", xlab="Difference in Seat Width")

Analysing effect of plane capacity

xyplot(a.df$PriceRelative ~ a.df$SeatsTotal,type = c("p", "g"), xlab = "Total Seats (Economy + Premium Economy Seats)", ylab = "Rel. Price Difference")

Analysing percentage of Premium Economy Seats

boxplot(a.df$PercentPremiumSeats, main="Percentage of Premium Economy Seats", ylab="Percentage of Premium Economy Seats in Plane")

4

Scatter Plots to understand how are the variables correlated pair-wise

library(car)
scatterplotMatrix(~PricePremium+PriceEconomy+SeatsTotal+PercentPremiumSeats+PitchDifference+WidthDifference, data=a.df, main="Premium Economy vs. Economy Airfares")

5

Create a Variance-Covariance Matrix

library(Hmisc)
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
colairlines <- c("PricePremium","PriceEconomy","PitchDifference","WidthDifference")
corMatrix <- rcorr(as.matrix(a.df[,colairlines]))
corMatrix
##                 PricePremium PriceEconomy PitchDifference WidthDifference
## PricePremium            1.00         0.90           -0.02           -0.01
## PriceEconomy            0.90         1.00           -0.10           -0.08
## PitchDifference        -0.02        -0.10            1.00            0.76
## WidthDifference        -0.01        -0.08            0.76            1.00
## 
## n= 458 
## 
## 
## P
##                 PricePremium PriceEconomy PitchDifference WidthDifference
## PricePremium                 0.0000       0.6998          0.8059         
## PriceEconomy    0.0000                    0.0332          0.0708         
## PitchDifference 0.6998       0.0332                       0.0000         
## WidthDifference 0.8059       0.0708       0.0000
colairlines2 <- c("PricePremium","PriceEconomy","SeatsTotal","PercentPremiumSeats")
corMatrix2 <- rcorr(as.matrix(a.df[,colairlines2]))
corMatrix2
##                     PricePremium PriceEconomy SeatsTotal
## PricePremium                1.00         0.90       0.19
## PriceEconomy                0.90         1.00       0.13
## SeatsTotal                  0.19         0.13       1.00
## PercentPremiumSeats         0.12         0.07      -0.22
##                     PercentPremiumSeats
## PricePremium                       0.12
## PriceEconomy                       0.07
## SeatsTotal                        -0.22
## PercentPremiumSeats                1.00
## 
## n= 458 
## 
## 
## P
##                     PricePremium PriceEconomy SeatsTotal
## PricePremium                     0.0000       0.0000    
## PriceEconomy        0.0000                    0.0045    
## SeatsTotal          0.0000       0.0045                 
## PercentPremiumSeats 0.0127       0.1628       0.0000    
##                     PercentPremiumSeats
## PricePremium        0.0127             
## PriceEconomy        0.1628             
## SeatsTotal          0.0000             
## PercentPremiumSeats

6

Draw a Corrgram

library(Hmisc)
library(car)
library(corrgram)
colairlines <- c("PricePremium","PriceEconomy","PitchDifference","WidthDifference","SeatsTotal","PercentPremiumSeats")
corrgram(a.df[,colairlines], order=TRUE, main="Premium Economy vs. Economy Airfares", lower.panel=panel.pts, upper.panel=panel.pie, diag.panel=panel.minmax, text.panel=panel.txt)

7

Run T-Test to test the following null hypthesis: Premium and Economy airfares are same.

t.test(a.df$PricePremium, a.df$PriceEconomy)
## 
##  Welch Two Sample t-test
## 
## data:  a.df$PricePremium and a.df$PriceEconomy
## t = 6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  369.2793 667.0831
## sample estimates:
## mean of x mean of y 
##  1845.258  1327.076

Since the p-value is less than 0.05, we fail to reject the null hypothesis that they are equal.

Regression Analysis

In this model we try regressing Price Premium on all the remaining columns.

m <- PricePremium ~ PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats + SeatsTotal + IsInternational + TravelMonth + FlightDuration + Aircraft
fit <- lm(m, data = a.df)
summary(fit)
## 
## Call:
## lm(formula = m, data = a.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -977.2 -246.3  -47.9  135.2 3419.7 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -1.211e+03  1.755e+02  -6.898 1.82e-11 ***
## PriceEconomy                  1.064e+00  3.114e-02  34.175  < 2e-16 ***
## PitchDifference               8.510e+01  3.913e+01   2.175 0.030163 *  
## WidthDifference               1.240e+02  3.438e+01   3.607 0.000345 ***
## PercentPremiumSeats           3.177e+01  5.250e+00   6.052 3.04e-09 ***
## SeatsTotal                    1.925e+00  3.360e-01   5.729 1.87e-08 ***
## IsInternationalInternational -7.537e+02  2.135e+02  -3.530 0.000458 ***
## TravelMonthJul               -3.441e+01  7.074e+01  -0.486 0.626904    
## TravelMonthOct                2.692e+01  6.036e+01   0.446 0.655795    
## TravelMonthSep               -2.097e+00  6.015e+01  -0.035 0.972203    
## FlightDuration                8.455e+01  8.809e+00   9.598  < 2e-16 ***
## AircraftBoeing               -2.082e+00  5.651e+01  -0.037 0.970625    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 480.7 on 446 degrees of freedom
## Multiple R-squared:  0.8641, Adjusted R-squared:  0.8607 
## F-statistic: 257.7 on 11 and 446 DF,  p-value: < 2.2e-16