Reading the data

airline_df<-read.csv(paste("SixAirlinesDataV2.csv", sep=""))
head(airline_df, n=10)
##    Airline Aircraft FlightDuration TravelMonth IsInternational
## 1  British   Boeing          12.25         Jul   International
## 2  British   Boeing          12.25         Aug   International
## 3  British   Boeing          12.25         Sep   International
## 4  British   Boeing          12.25         Oct   International
## 5  British   Boeing           8.16         Aug   International
## 6  British   Boeing           8.16         Sep   International
## 7  British   Boeing           8.16         Oct   International
## 8  British   Boeing           6.50         Aug   International
## 9  British   Boeing           6.50         Sep   International
## 10 British   Boeing          11.50         Sep   International
##    SeatsEconomy SeatsPremium PitchEconomy PitchPremium WidthEconomy
## 1           122           40           31           38           18
## 2           122           40           31           38           18
## 3           122           40           31           38           18
## 4           122           40           31           38           18
## 5           122           40           31           38           18
## 6           122           40           31           38           18
## 7           122           40           31           38           18
## 8           122           40           31           38           18
## 9           122           40           31           38           18
## 10          122           40           31           38           18
##    WidthPremium PriceEconomy PricePremium PriceRelative SeatsTotal
## 1            19         2707         3725          0.38        162
## 2            19         2707         3725          0.38        162
## 3            19         2707         3725          0.38        162
## 4            19         2707         3725          0.38        162
## 5            19         1793         2999          0.67        162
## 6            19         1793         2999          0.67        162
## 7            19         1793         2999          0.67        162
## 8            19         1476         2997          1.03        162
## 9            19         1476         2997          1.03        162
## 10           19         1705         2989          0.75        162
##    PitchDifference WidthDifference PercentPremiumSeats
## 1                7               1               24.69
## 2                7               1               24.69
## 3                7               1               24.69
## 4                7               1               24.69
## 5                7               1               24.69
## 6                7               1               24.69
## 7                7               1               24.69
## 8                7               1               24.69
## 9                7               1               24.69
## 10               7               1               24.69

Summay

summary(airline_df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Airline vs Number of Seats in Economy Class

plot(airline_df$Airline, airline_df$SeatsEconomy, main="Airline vs No. of seats in Economy Class",col =c("orange","blue","light green","yellow","purple","black"))

Airline vs Number of Seats in Premium Class

plot(airline_df$Airline, airline_df$SeatsPremium, main="Airline vs No. of seats in Premium Class",col =  c("blue","navy","green","red","grey","yellow"))

Monthwise Travel

plot(airline_df$TravelMonth,main = "Monthwise Travel",col="cyan")

Domestic and International Flights

plot(airline_df$IsInternational,main = "Graph showing number of domestic and international flights",col="purple")

Histogram showing the pitch of seats in Economy and Premium class vs Frequency

par(mfrow=c(1,2))
hist(airline_df$PitchEconomy, xlab="Economy Seats Pitch",col = "grey",main="Economy class ")
hist(airline_df$PitchPremium, xlab="Premium Seats Pitch",col = "dark green",main="Premium class ")

Histogram showing the width of seats in Economy and Premium class vs Frequency

par(mfrow=c(1,2))
hist(airline_df$WidthEconomy, xlab="Economy Seats Width",col = "grey",main="Economy class")
hist(airline_df$WidthPremium, xlab="Premium Seats Width",col = "dark green",main="Premium class")

Histogram showing the price of seats in Economy and Premium class vs Frequency

par(mfrow=c(1,2))
hist(airline_df$PriceEconomy, xlab="Economy Seats Price",col = "grey",main="Economy class")
hist(airline_df$PricePremium, xlab="Premium Seats Price",col = "dark green",main="Premium class")

A boxplot of airline vs flight duration

boxplot(FlightDuration~Airline,data=airline_df,xlab="Airline", ylab="Flight duration",col = c("purple","navy","dark green","yellow","red","grey"))

A Scatterplot showing relative price vs the difference in pitch

library(car)
scatterplot(PriceRelative ~PitchDifference,     data=airline_df,
            spread=FALSE, smoother.args=list(lty=2),
            main="Scatter Plot of Relative Price vs Pitch Difference",
            xlab="pitch difference",
            ylab="price relative")

A Scatterplot showing relative price vs the difference in width

scatterplot(PriceRelative ~ WidthDifference, data= airline_df,
            spread=FALSE, smoother.args=list(lty=2),
            main="Scatter plot of Relative price vs Width Difference",
            xlab="Width difference",
            ylab="Price relative")

A corrgram

library(corrgram)
corrgram(airline_df, order=TRUE, upper.panel=panel.pie,lower.panel=panel.shade, text.panel=panel.txt,main="Corrgram")

-> From the above corrgram, we get to know that there is very weak correlation between Price Relative and SeatsEconomy, SeatsPremium, WidthEconomy, PricePremium, SeatsTotal and PercentPremiumSeats. Also there is a weak correlation between Price Relative and PitchEconomy, PitchPremium, WidthPremium, PriceEconomy, PitchDifference and WidthDifference.

T-test Hypotheses

H1: There is no relation between relative price and width difference. H2: There is no relation between relative price and pitch difference.

t.test(airline_df$PriceRelative,airline_df$WidthDifference)
## 
##  Welch Two Sample t-test
## 
## data:  airline_df$PriceRelative and airline_df$WidthDifference
## t = -19.284, df = 585.55, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.262697 -1.029268
## sample estimates:
## mean of x mean of y 
## 0.4872052 1.6331878

Here, p value is less than 0.05 so H1 is rejected.

t.test(airline_df$PriceRelative,airline_df$PitchDifference)
## 
##  Welch Two Sample t-test
## 
## data:  airline_df$PriceRelative and airline_df$PitchDifference
## t = -72.974, df = 516.54, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.367495 -6.033640
## sample estimates:
## mean of x mean of y 
## 0.4872052 6.6877729

Here again, p value is less than 0.05, therefore H2 is rejected as well.

Conversion of the variables into a factor variable and intergers

# Converting into integers
airline_df$Airline[airline_df$Res == 0] <- 'AirFrance'
airline_df$Airline[airline_df$Res == 1] <- 'British'
airline_df$Airline[airline_df$Res == 2] <- 'Delta'
airline_df$Airline[airline_df$Res == 3] <- 'Jet'
airline_df$Airline[airline_df$Res == 4] <- 'Singapore'
airline_df$Airline[airline_df$Res == 5] <- 'Virgin'

# convert Airline into factor variable 
airline_df$Airline<- factor(airline_df$Airline)

# converting into integers
airline_df$Aircraft[airline_df$Res == 0] <- 'AirBus'
airline_df$Aircraft[airline_df$Res == 1] <- 'Boeing'

# convert Aircraft into factor variable 
airline_df$Aircraft<- factor(airline_df$Aircraft)

# converting into integers
airline_df$IsInternational[airline_df$Res == 0] <- 'Domestic'
airline_df$IsInternational[airline_df$Res == 1] <- 'International'

# convert IsInternational into factor variable 
airline_df$IsInternational<- factor(airline_df$IsInternational)


# checking that the data types have changed to factor
str(airline_df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

Regression Analysis

reg <- lm(PriceRelative ~ Airline + Aircraft + FlightDuration + IsInternational + PitchPremium + PitchDifference + WidthDifference, data = airline_df)
summary(reg)
## 
## Call:
## lm(formula = PriceRelative ~ Airline + Aircraft + FlightDuration + 
##     IsInternational + PitchPremium + PitchDifference + WidthDifference, 
##     data = airline_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81510 -0.19268 -0.05124  0.09981  1.47122 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -1.029133   4.238674  -0.243 0.808275    
## AirlineBritish                0.274168   0.115020   2.384 0.017559 *  
## AirlineDelta                  0.003609   0.198262   0.018 0.985486    
## AirlineJet                    0.531175   0.140365   3.784 0.000175 ***
## AirlineSingapore              0.308822   0.079777   3.871 0.000125 ***
## AirlineVirgin                 0.366455   0.131493   2.787 0.005549 ** 
## AircraftBoeing               -0.017079   0.046104  -0.370 0.711220    
## FlightDuration                0.037123   0.006685   5.554 4.82e-08 ***
## IsInternationalInternational -0.469714   0.331689  -1.416 0.157436    
## PitchPremium                  0.026332   0.124471   0.212 0.832556    
## PitchDifference               0.042179   0.077243   0.546 0.585306    
## WidthDifference               0.087253   0.083851   1.041 0.298637    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3599 on 446 degrees of freedom
## Multiple R-squared:  0.3774, Adjusted R-squared:  0.362 
## F-statistic: 24.57 on 11 and 446 DF,  p-value: < 2.2e-16

Regression Verification

regv <- data.frame(airline_df$PriceRelative, (fitted(reg)))
library(car)
some(regv)
##     airline_df.PriceRelative X.fitted.reg..
## 88                      0.40      0.4227479
## 93                      0.48      0.9536572
## 116                     0.04      0.2478988
## 133                     0.16      0.3098950
## 148                     0.29      0.1907261
## 154                     0.09      0.1662881
## 158                     1.82      0.8257836
## 238                     0.03      0.1854765
## 354                     0.03      0.1999649
## 454                     0.58      0.9350954

According to the statistical tests and correlation and regression analysis, The factors that explain the the difference in price between an economy ticket and a premium-economy airline ticket are Airline Brand (Air France, British, Delta, Jet, Singapore or Virgin) and Airline type (International or Domestic) and Aircraft. However,some parameters like FlightDuration, PitchDifference, PitchPremium, and WidthDifference were having a minimal impact for the difference in price of the ticket.