Read the data into R

airlines.df <- read.csv(paste("SixAirlinesDataV2.csv",sep=""))
summary(airlines.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
str(airlines.df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

Summarize the data to understand the mean, median, standard deviation of each variable

library(psych)
describe(airlines.df)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23

Draw Box Plots / Bar Plots to visualize the distribution of each variable independently

Aircraft VS Airline

plot(x=airlines.df$Airline,y=airlines.df$Aircraft)

Flight Duration VS Airline

plot(x=airlines.df$Airline,y=airlines.df$FlightDuration)

Travel Month VS Airline

plot(x=airlines.df$Airline,y=airlines.df$TravelMonth)

Seat Economy VS Airline

plot(x=airlines.df$Airline,y=airlines.df$SeatsEconomy)

Seats Premium VS Airline

plot(x=airlines.df$Airline,y=airlines.df$SeatsPremium)

Pitch Economy VS Airline

plot(x=airlines.df$Airline,y=airlines.df$PitchEconomy)

Pitch Premium VS Airline

plot(x=airlines.df$Airline,y=airlines.df$PitchPremium)

Width Economy VS Airline

plot(x=airlines.df$Airline,y=airlines.df$WidthEconomy)

Width Premium VS Airline

plot(x=airlines.df$Airline,y=airlines.df$WidthPremium)

Price Economy VS Airline

plot(x=airlines.df$Airline,y=airlines.df$PriceEconomy)

Price Premium VS Airline

plot(x=airlines.df$Airline,y=airlines.df$PricePremium)

Price Relative VS Airline

plot(x=airlines.df$Airline,y=airlines.df$PriceRelative)

Seats Total VS Airline

plot(x=airlines.df$Airline,y=airlines.df$SeatsTotal)

Pitch Difference VS Airline

plot(x=airlines.df$Airline,y=airlines.df$PitchDifference)

Width Difference VS Airline

plot(x=airlines.df$Airline,y=airlines.df$WidthDifference)

Percent Premium Seats VS Airline

plot(x=airlines.df$Airline,y=airlines.df$PercentPremiumSeats)

Draw Scatter Plots to understand how are the variables correlated pair-wise

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
par(mfrow=c(1,2))
scatterplot(airlines.df$Aircraft,airlines.df$PriceEconomy)
scatterplot(airlines.df$Aircraft,airlines.df$PitchPremium)

##  [1] "296" "297" "298" "302" "303" "306" "74"  "75"  "76"  "77"  "78" 
## [12] "79"  "80"  "81"  "98"  "281" "90"  "91"  "92"  "93"  "94"  "95" 
## [23] "96"  "97"  "379" "380"

Price Economy for various airlines

aggregate(airlines.df$PriceEconomy,by=list(airline=airlines.df$Airline),mean)
##     airline         x
## 1 AirFrance 2769.7838
## 2   British 1293.4800
## 3     Delta  560.9348
## 4       Jet  276.1639
## 5 Singapore  860.2500
## 6    Virgin 1603.5323

Draw a Corrgram; Create a Variance-Covariance Matrix

library(corrgram)
corrgram(airlines.df, lower.panel=panel.shade,
         upper.panel=panel.pie,
         main="Corrgram Showing all the variable")

Pearson Test

cor.test(airlines.df$PriceEconomy,airlines.df$PricePremium)
## 
##  Pearson's product-moment correlation
## 
## data:  airlines.df$PriceEconomy and airlines.df$PricePremium
## t = 44.452, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8826622 0.9172579
## sample estimates:
##       cor 
## 0.9013887

p-value< 2e-5 which in turn < 0.05 so price economy and price premium are correlated Run T-Tests appropriate, to test your Hypotheses

t.test(airlines.df$PricePremium~airlines.df$Aircraft)
## 
##  Welch Two Sample t-test
## 
## data:  airlines.df$PricePremium by airlines.df$Aircraft
## t = 0.28645, df = 310.38, p-value = 0.7747
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -212.2929  284.6350
## sample estimates:
## mean in group AirBus mean in group Boeing 
##             1869.503             1833.332
t.test(airlines.df$PriceEconomy~airlines.df$Aircraft)
## 
##  Welch Two Sample t-test
## 
## data:  airlines.df$PriceEconomy by airlines.df$Aircraft
## t = 0.64317, df = 289.45, p-value = 0.5206
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -131.7801  259.7135
## sample estimates:
## mean in group AirBus mean in group Boeing 
##             1369.954             1305.987

Regression model

for Price Economy

fitEconomy<- lm(data = airlines.df,PriceEconomy ~ FlightDuration + SeatsEconomy + 
           PitchEconomy + WidthEconomy + WidthPremium + IsInternational)
summary(fitEconomy)
## 
## Call:
## lm(formula = PriceEconomy ~ FlightDuration + SeatsEconomy + PitchEconomy + 
##     WidthEconomy + WidthPremium + IsInternational, data = airlines.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1817.89  -442.52    17.48   556.63  1406.81 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -5.559e+03  2.560e+03  -2.172   0.0304 *  
## FlightDuration                1.329e+02  1.131e+01  11.752   <2e-16 ***
## SeatsEconomy                 -7.739e-02  4.719e-01  -0.164   0.8698    
## PitchEconomy                  6.038e+02  6.781e+01   8.904   <2e-16 ***
## WidthEconomy                 -6.817e+02  7.041e+01  -9.682   <2e-16 ***
## WidthPremium                 -1.118e+02  4.216e+01  -2.652   0.0083 ** 
## IsInternationalInternational  1.516e+03  1.664e+02   9.111   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 682.4 on 451 degrees of freedom
## Multiple R-squared:  0.5295, Adjusted R-squared:  0.5232 
## F-statistic: 84.58 on 6 and 451 DF,  p-value: < 2.2e-16

for Price Premium

fitPremium<- lm(data = airlines.df,PricePremium ~ FlightDuration  +
              SeatsPremium + PitchEconomy + WidthEconomy + WidthPremium + PriceEconomy + IsInternational)
summary(fitPremium)
## 
## Call:
## lm(formula = PricePremium ~ FlightDuration + SeatsPremium + PitchEconomy + 
##     WidthEconomy + WidthPremium + PriceEconomy + IsInternational, 
##     data = airlines.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -841.3 -239.8  -64.3  117.5 3224.6 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  5991.4201  1715.7925   3.492 0.000527 ***
## FlightDuration                 78.5442     8.8416   8.883  < 2e-16 ***
## SeatsPremium                   11.0824     2.0349   5.446 8.48e-08 ***
## PitchEconomy                 -281.7893    51.4259  -5.480 7.11e-08 ***
## WidthEconomy                   34.4761    58.2259   0.592 0.554076    
## WidthPremium                  108.7646    30.3149   3.588 0.000370 ***
## PriceEconomy                    1.1246     0.0324  34.715  < 2e-16 ***
## IsInternationalInternational -594.4440   121.9935  -4.873 1.53e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 463.2 on 450 degrees of freedom
## Multiple R-squared:  0.8727, Adjusted R-squared:  0.8707 
## F-statistic: 440.6 on 7 and 450 DF,  p-value: < 2.2e-16

for relative price

fitrelative <- lm(PriceRelative ~ FlightDuration + 
                    SeatsEconomy + SeatsPremium +
                    PitchEconomy + PitchPremium +
                    WidthEconomy + WidthPremium
                  + IsInternational,
                  data = airlines.df)
summary(fitrelative)
## 
## Call:
## lm(formula = PriceRelative ~ FlightDuration + SeatsEconomy + 
##     SeatsPremium + PitchEconomy + PitchPremium + WidthEconomy + 
##     WidthPremium + IsInternational, data = airlines.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86537 -0.24973 -0.07381  0.13157  1.41493 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   0.5109115  3.0988778   0.165   0.8691    
## FlightDuration                0.0352775  0.0065328   5.400 1.08e-07 ***
## SeatsEconomy                  0.0006600  0.0003196   2.065   0.0395 *  
## SeatsPremium                 -0.0040377  0.0024263  -1.664   0.0968 .  
## PitchEconomy                 -0.2144599  0.0449962  -4.766 2.54e-06 ***
## PitchPremium                  0.1119607  0.0572868   1.954   0.0513 .  
## WidthEconomy                  0.0260857  0.0432187   0.604   0.5464    
## WidthPremium                  0.1156244  0.0261084   4.429 1.19e-05 ***
## IsInternationalInternational -0.6073656  0.2459364  -2.470   0.0139 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3668 on 449 degrees of freedom
## Multiple R-squared:  0.349,  Adjusted R-squared:  0.3374 
## F-statistic: 30.08 on 8 and 449 DF,  p-value: < 2.2e-16

Executive Summary

Boeing aircraft are more in numbers as compared to airbus of all the airlines on whole.

Singapore airlines is having larger flight duration.

British and virgin airlines are having more number of premium seats.

Highest price for Economy is taken by Airfrance.

British airlines have more number of total seats.

Price Economy and Price Premium are highly correlated.