setwd("C:/Users/lenovo/Desktop/se")
airline.df=read.csv("SixAirlinesDataV2.csv")
View(airline.df)
library(psych)
## Warning: package 'psych' was built under R version 3.3.3
describe(airline.df)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23
summary(airline.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
a1<-aggregate(PriceEconomy~Airline,data=airline.df,mean)
a1
##     Airline PriceEconomy
## 1 AirFrance    2769.7838
## 2   British    1293.4800
## 3     Delta     560.9348
## 4       Jet     276.1639
## 5 Singapore     860.2500
## 6    Virgin    1603.5323
library(lattice)
barchart(PriceEconomy~Airline, data=a1, col="pink", xlab="Airlines",ylab="Economy price($)")

The above bar chart shows the average economy prices charged by the 6 airlines. The highest price for economy is charged by Air France, while the cheapest is Jet.

a2<-aggregate(PricePremium~Airline,data=airline.df,mean)
a2
##     Airline PricePremium
## 1 AirFrance    3065.2162
## 2   British    1937.0286
## 3     Delta     684.6739
## 4       Jet     483.3607
## 5 Singapore    1239.9250
## 6    Virgin    2721.6935
library(lattice)
barchart(PricePremium~Airline, data=a2, col="pink", xlab="Airlines",ylab="Premium Economy price($)")

The above bar chart shows the premium economy prices charged by the 6 airlines. The highest price for economy is charged by Air France, while the cheapest is Jet.

a3<-aggregate(FlightDuration~Airline,data=airline.df,mean)
a3
##     Airline FlightDuration
## 1 AirFrance       8.988514
## 2   British       7.854971
## 3     Delta       4.028913
## 4       Jet       4.143934
## 5 Singapore      10.481000
## 6    Virgin       9.250484
library(lattice)
barchart(FlightDuration~Airline, data=a3, col="pink", xlab="Airlines",ylab="Flight duration(hrs)")

The bar chart shows the average flight hours of each airline. Signapore has the highest average flight hour where as Delta has the least.

plot(airline.df$FlightDuration,airline.df$PriceEconomy, xlab = "Flight duration(hrs)", ylab="Economy price($)",col="blue")

The above representation illustrates a direct relation between flight duration to the economy price i.e as the duration increases the price increases.

plot(airline.df$FlightDuration,airline.df$PricePremium, xlab = "Flight duration(hrs)", ylab="Premium Economy price($)",col="Blue")

The above representation illustrates a direct relation between the flight duration and premium economy price .However the rate of increase decreases gradually.

cor(airline.df$FlightDuration,airline.df$PriceEconomy)
## [1] 0.5666404
cor(airline.df$FlightDuration,airline.df$PricePremium)
## [1] 0.6487398

Hence it can be concluded that premium economy prices showing stronger correlation.

b1<-aggregate(PriceEconomy~TravelMonth, data=airline.df,mean)
b1
##   TravelMonth PriceEconomy
## 1         Aug     1344.661
## 2         Jul     1280.493
## 3         Oct     1295.370
## 4         Sep     1368.062
barchart(PriceEconomy~TravelMonth,data=b1, xlab="Month", ylab="Average economy price", col="green")

The above bar graph shows that the price during the month of september is highest and during july is the lowest.

b2<-aggregate(PricePremium~TravelMonth, data=airline.df,mean)
b2
##   TravelMonth PricePremium
## 1         Aug     1871.126
## 2         Jul     1743.427
## 3         Oct     1836.055
## 4         Sep     1888.054
barchart(PricePremium~TravelMonth,data=b2, xlab="Month", ylab="Average premium economy price", col="green")

## The above bar graph shows that the premium price during the month of september is highest and during july is the lowest.

m1<-xtabs(~Aircraft+PitchEconomy+PitchPremium, data=airline.df)
ftable(m1)
##                       PitchPremium  34  35  38  40
## Aircraft PitchEconomy                             
## AirBus   30                          0   0   0   0
##          31                          0   0  86   0
##          32                          5   0  59   0
##          33                          0   1   0   0
## Boeing   30                          0   0   0  54
##          31                         12   0 157   0
##          32                         14   4  62   0
##          33                          0   4   0   0

The effect of pitch of the flights

m2<-xtabs(~Aircraft+WidthEconomy+WidthPremium, data=airline.df)
ftable(m2)
##                       WidthPremium  17  18  19  20  21
## Aircraft WidthEconomy                                 
## AirBus   17                          6   0   0   0   0
##          18                          0   0  90   0  39
##          19                          0   0   0  16   0
## Boeing   17                         22   0  32   0  54
##          18                          0  12 134   0  29
##          19                          0   0   0  24   0

The effect of the width of the flights

plot(airline.df$PitchEconomy, airline.df$PriceEconomy, xlab="Pitch", ylab="Price",ylim = c(0,3500), main="Economy :pitch vs price" ,col="red")

plot(airline.df$WidthEconomy, airline.df$PriceEconomy, xlab="Width", ylab="Price",ylim = c(0,3500), main="Economy : width vs price" ,col="red")

plot(airline.df$PitchPremium, airline.df$PricePremium, xlab="Pitch", ylab="Price",ylim = c(0,3500), main="Premium: Economy pitch vs price", col="red")

plot(airline.df$WidthPremium, airline.df$PricePremium, xlab="Width", ylab="Price",ylim = c(0,3500), main="Premium Economy width vs price", col="red")

boxplot(PriceEconomy~IsInternational,data=airline.df,horizontal=TRUE,yaxt="n",ylab="International/Domestic flight", xlab="average price",main="Average economy prices of domestic and international flights")
axis(side=2,at=c(1,2),labels=c("Domestic", "International"), col="blue")

boxplot(PricePremium~IsInternational,data=airline.df,horizontal=TRUE,yaxt="n",ylab="International/Domestic flight", xlab="average price",main="Average premium economy prices of domestic and international flights")
axis(side=2,at=c(1,2),labels=c("Domestic", "International"))

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.3.3
corrgram(airline.df, order=TRUE, lower.panel=panel.shade,
        upper.panel=panel.pie, text.panel=panel.txt,
        main="Corrgram of airline intercorrelations")

Applying regression

frame <- lm(PriceRelative ~ Airline+Aircraft+FlightDuration+TravelMonth+IsInternational+SeatsEconomy+SeatsPremium+PitchEconomy+PitchPremium+WidthEconomy+WidthPremium+PriceEconomy+PricePremium +PercentPremiumSeats+PitchDifference+WidthDifference+SeatsTotal, data =airline.df)
summary(frame)
## 
## Call:
## lm(formula = PriceRelative ~ Airline + Aircraft + FlightDuration + 
##     TravelMonth + IsInternational + SeatsEconomy + SeatsPremium + 
##     PitchEconomy + PitchPremium + WidthEconomy + WidthPremium + 
##     PriceEconomy + PricePremium + PercentPremiumSeats + PitchDifference + 
##     WidthDifference + SeatsTotal, data = airline.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76373 -0.08269  0.00438  0.08002  0.84672 
## 
## Coefficients: (3 not defined because of singularities)
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -3.993e-01  2.948e+00  -0.135 0.892302    
## AirlineBritish               -3.971e-01  1.107e-01  -3.586 0.000373 ***
## AirlineDelta                 -3.865e-01  2.203e-01  -1.755 0.080020 .  
## AirlineJet                   -2.584e-01  9.594e-02  -2.693 0.007354 ** 
## AirlineSingapore             -3.535e-01  1.297e-01  -2.725 0.006685 ** 
## AirlineVirgin                -3.575e-01  2.031e-01  -1.761 0.078997 .  
## AircraftBoeing                4.003e-02  2.968e-02   1.349 0.178089    
## FlightDuration                2.613e-02  4.727e-03   5.526 5.63e-08 ***
## TravelMonthJul                2.111e-02  3.145e-02   0.671 0.502475    
## TravelMonthOct                2.778e-02  2.670e-02   1.041 0.298619    
## TravelMonthSep               -6.617e-03  2.664e-02  -0.248 0.803924    
## IsInternationalInternational  2.785e-02  2.502e-01   0.111 0.911400    
## SeatsEconomy                  8.090e-04  5.462e-04   1.481 0.139313    
## SeatsPremium                 -7.374e-03  3.615e-03  -2.040 0.041967 *  
## PitchEconomy                 -1.756e-02  7.994e-02  -0.220 0.826207    
## PitchPremium                  5.960e-02  9.165e-02   0.650 0.515823    
## WidthEconomy                 -9.207e-02  5.266e-02  -1.748 0.081085 .  
## WidthPremium                  4.904e-02  1.365e-01   0.359 0.719527    
## PriceEconomy                 -9.325e-04  3.318e-05 -28.105  < 2e-16 ***
## PricePremium                  5.781e-04  2.294e-05  25.197  < 2e-16 ***
## PercentPremiumSeats           1.114e-02  7.653e-03   1.456 0.146197    
## PitchDifference                      NA         NA      NA       NA    
## WidthDifference                      NA         NA      NA       NA    
## SeatsTotal                           NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2123 on 437 degrees of freedom
## Multiple R-squared:  0.7878, Adjusted R-squared:  0.7781 
## F-statistic: 81.12 on 20 and 437 DF,  p-value: < 2.2e-16
coefficients(frame)
##                  (Intercept)               AirlineBritish 
##                -0.3993377527                -0.3971066460 
##                 AirlineDelta                   AirlineJet 
##                -0.3865140546                -0.2583533203 
##             AirlineSingapore                AirlineVirgin 
##                -0.3534833629                -0.3575114105 
##               AircraftBoeing               FlightDuration 
##                 0.0400301547                 0.0261250335 
##               TravelMonthJul               TravelMonthOct 
##                 0.0211075134                 0.0277808779 
##               TravelMonthSep IsInternationalInternational 
##                -0.0066168064                 0.0278544004 
##                 SeatsEconomy                 SeatsPremium 
##                 0.0008089538                -0.0073744940 
##                 PitchEconomy                 PitchPremium 
##                -0.0175636803                 0.0596015823 
##                 WidthEconomy                 WidthPremium 
##                -0.0920739338                 0.0490357851 
##                 PriceEconomy                 PricePremium 
##                -0.0009324897                 0.0005781041 
##          PercentPremiumSeats              PitchDifference 
##                 0.0111406012                           NA 
##              WidthDifference                   SeatsTotal 
##                           NA                           NA

Hence the varibales AirlineBritish , AirlineJet, AirlineSingapore,FlightDuration,SeatsPremium,PriceEconomy and PricePremium are statistically significant as their p value<0.05.

Hence we can conclude that :

We can conclude from the above analysis that due to more pitch and width in premium seats the price of premium seats is greater than the price of economy seats.The model is a good fit model as p value is less than 0.05.Parameters Width difference,pitch difference and flight duration are statistically significant as their p-values <0.05.