Read the data into R

AirlinesData.df <- read.csv(paste("SixAirlinesData.csv", sep=""))
summary(AirlinesData.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Summarize the data to understand the mean, median, standard deviation of each variable

library(psych)
describe(AirlinesData.df)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23

Draw Box Plots / Bar Plots to visualize the distribution of each variable independently

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$Aircraft)

Aircraft VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$FlightDuration)

Flight Duration VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$TravelMonth)

Travel Month VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$SeatsEconomy)

Seat Economy VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$SeatsPremium)

Seats Premium VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$PitchEconomy)

Pitch Economy VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$PitchPremium)

Width Premium VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$PriceEconomy)

Airline VS WidthEconomy

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$WidthEconomy)

Width Economy VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$WidthPremium)

Price Economy VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$PricePremium)

Price Premium VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$PriceRelative)

Seats Total VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$PitchDifference)

Price Relative VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$SeatsTotal)

Width Difference VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$PercentPremiumSeats)

Pitch Difference VS Airline

plot(x=AirlinesData.df$Airline,y=AirlinesData.df$WidthDifference)

Percent Premium Seats VS Airline

Draw Scatter Plots to understand how are the variables correlated pair-wise

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
par(mfrow=c(1,2))

Draw a Corrgram; Create a Variance-Covariance Matrix

library(corrgram)
corrgram(AirlinesData.df, lower.panel=panel.shade,
        upper.panel=panel.pie,
        main="Corrgram Showing all the variable")

####Price Economy for various airlines

aggregate(AirlinesData.df$PriceEconomy,by=list(airline=AirlinesData.df$Airline),mean)
##     airline         x
## 1 AirFrance 2769.7838
## 2   British 1293.4800
## 3     Delta  560.9348
## 4       Jet  276.1639
## 5 Singapore  860.2500
## 6    Virgin 1603.5323

Draw Scatter Plots to understand how are the variables correlated pair-wise

library(car)
par(mfrow=c(1,2))
scatterplot(AirlinesData.df$Aircraft,AirlinesData.df$PriceEconomy)
scatterplot(AirlinesData.df$Aircraft,AirlinesData.df$PitchPremium)

##  [1] "296" "297" "298" "302" "303" "306" "74"  "75"  "76"  "77"  "78" 
## [12] "79"  "80"  "81"  "98"  "281" "90"  "91"  "92"  "93"  "94"  "95" 
## [23] "96"  "97"  "379" "380"

Regression model

Relative Price
fitrelative <- lm(PriceRelative ~ FlightDuration + 
                   SeatsEconomy + SeatsPremium +
                   PitchEconomy + PitchPremium +
                   WidthEconomy + WidthPremium ,
                 data = AirlinesData.df)
summary(fitrelative)
## 
## Call:
## lm(formula = PriceRelative ~ FlightDuration + SeatsEconomy + 
##     SeatsPremium + PitchEconomy + PitchPremium + WidthEconomy + 
##     WidthPremium, data = AirlinesData.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86549 -0.25418 -0.07713  0.14522  1.34717 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     6.8912366  1.7209126   4.004 7.27e-05 ***
## FlightDuration  0.0278757  0.0058374   4.775 2.43e-06 ***
## SeatsEconomy    0.0007934  0.0003168   2.505   0.0126 *  
## SeatsPremium   -0.0076652  0.0019421  -3.947 9.19e-05 ***
## PitchEconomy   -0.2606111  0.0411633  -6.331 5.90e-10 ***
## PitchPremium   -0.0194938  0.0212963  -0.915   0.3605    
## WidthEconomy   -0.0050452  0.0415730  -0.121   0.9035    
## WidthPremium    0.1256658  0.0259356   4.845 1.74e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3689 on 450 degrees of freedom
## Multiple R-squared:  0.3401, Adjusted R-squared:  0.3299 
## F-statistic: 33.14 on 7 and 450 DF,  p-value: < 2.2e-16

Price Economy

fitEconomy<- lm(PriceEconomy ~ FlightDuration + SeatsEconomy + 
          PitchEconomy + WidthEconomy + WidthPremium , data = AirlinesData.df)
summary(fitEconomy)
## 
## Call:
## lm(formula = PriceEconomy ~ FlightDuration + SeatsEconomy + PitchEconomy + 
##     WidthEconomy + WidthPremium, data = AirlinesData.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1643.77  -552.78     0.23   579.02  1472.19 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -5619.513   2782.342  -2.020   0.0440 *  
## FlightDuration   168.623     11.529  14.627  < 2e-16 ***
## SeatsEconomy       1.080      0.494   2.185   0.0294 *  
## PitchEconomy     477.498     72.148   6.618 1.03e-10 ***
## WidthEconomy    -599.233     75.896  -7.895 2.21e-14 ***
## WidthPremium      63.325     40.786   1.553   0.1212    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 741.7 on 452 degrees of freedom
## Multiple R-squared:  0.4429, Adjusted R-squared:  0.4367 
## F-statistic: 71.86 on 5 and 452 DF,  p-value: < 2.2e-16

Price Premium

fitPremium<- lm(PricePremium ~ FlightDuration  + SeatsPremium + PitchEconomy + WidthEconomy 
                + WidthPremium + PriceEconomy , data = AirlinesData.df)
summary(fitPremium)
## 
## Call:
## lm(formula = PricePremium ~ FlightDuration + SeatsPremium + PitchEconomy + 
##     WidthEconomy + WidthPremium + PriceEconomy, data = AirlinesData.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -824.7 -270.0  -64.8  117.2 3263.3 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6324.12877 1757.13078   3.599 0.000355 ***
## FlightDuration   74.22807    9.01621   8.233 1.99e-15 ***
## SeatsPremium      8.89547    2.03423   4.373 1.52e-05 ***
## PitchEconomy   -226.12015   51.38957  -4.400 1.35e-05 ***
## WidthEconomy    -19.83956   58.57228  -0.339 0.734979    
## WidthPremium     33.58951   26.74551   1.256 0.209805    
## PriceEconomy      1.06873    0.03105  34.417  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 474.7 on 451 degrees of freedom
## Multiple R-squared:  0.866,  Adjusted R-squared:  0.8642 
## F-statistic: 485.6 on 6 and 451 DF,  p-value: < 2.2e-16

Pearson Test

cor.test(AirlinesData.df$PriceEconomy,AirlinesData.df$PricePremium)
## 
##  Pearson's product-moment correlation
## 
## data:  AirlinesData.df$PriceEconomy and AirlinesData.df$PricePremium
## t = 44.452, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8826622 0.9172579
## sample estimates:
##       cor 
## 0.9013887

p-value< 2e-5 which in turn < 0.05 so price economy and price premium are correlated Run T-Tests appropriate, to test your Hypotheses

t.test(AirlinesData.df$PricePremium~AirlinesData.df$Aircraft)
## 
##  Welch Two Sample t-test
## 
## data:  AirlinesData.df$PricePremium by AirlinesData.df$Aircraft
## t = 0.28645, df = 310.38, p-value = 0.7747
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -212.2929  284.6350
## sample estimates:
## mean in group AirBus mean in group Boeing 
##             1869.503             1833.332
t.test(AirlinesData.df$PriceEconomy~AirlinesData.df$Aircraft)
## 
##  Welch Two Sample t-test
## 
## data:  AirlinesData.df$PriceEconomy by AirlinesData.df$Aircraft
## t = 0.64317, df = 289.45, p-value = 0.5206
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -131.7801  259.7135
## sample estimates:
## mean in group AirBus mean in group Boeing 
##             1369.954             1305.987

SUMMARY

  1. oeing aircraft are more in numbers as compared to airbus of all the airlines on whole.
  2. Singapore airlines is having larger flight duration.
  3. British and virgin airlines are having more number of premium seats.
  4. British airlines have more number of total seats.
  5. Price Economy and Price Premium are highly correlated.
  6. Highest price for Economy is taken by Airfrance.