setwd("C:\\Users\\Adithya Nataraj\\Downloads")
air.df<-read.csv(paste("SixAirlinesDataV2.csv",sep =""))
View(air.df)
summary(air.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Boxplots of the Variables

boxplot(air.df$FlightDuration, main="Duration of Flight")

boxplot(air.df$SeatsEconomy, main="Number of Economy Seats in the Aircraft")

boxplot(air.df$SeatsPremium, main="Number of Premium Economy Seats in the Aircraft")

boxplot(air.df$PitchEconomy, main="Distance between two consecutive Economy Seats")

boxplot(air.df$PitchPremium, main="Distance between two consecutive Premium Economy Seats")

boxplot(air.df$WidthEconomy, main="Width between armrests of an Economy Seat")

boxplot(air.df$WidthPremium, main="Width between armrests of a Premium Economy Seat")

boxplot(air.df$PriceEconomy, main="Price of Economy Seat")

boxplot(air.df$PricePremium, main="Price of Premium Economy Seat")

Various Scatterplots from the data

plot(air.df$Airline, air.df$SeatsEconomy, main="Airline vs No. of seats in Economy Class")

plot(air.df$Airline, air.df$SeatsPremium, main="Airline vs No. of seats in PremiumClass")

plot(air.df$Airline, air.df$TravelMonth, main="Airline vs Month of Travel")

plot(air.df$Airline, air.df$Aircraft, main="Airline vs Aircraft")

plot(air.df$Airline, air.df$SeatsTotal, main="Airline vs Total no. of seats")

plot(air.df$Airline, air.df$IsInternational, main="Airline vs Type of flight(Int. or Dom.)")

plot(air.df$Aircraft, air.df$IsInternational, main="Aircraft vs Type of flight(Int. or Dom.)")

plot(air.df$Airline, air.df$FlightDuration, main="Airline vs Duration of flight")

plot(air.df$Aircraft, air.df$FlightDuration, main="Aircraft vs Duration of flight")

plot(air.df$Airline, air.df$PriceEconomy, main="Airline vs Price of Economy Class ticket")

plot(air.df$Aircraft, air.df$PriceEconomy, main="Aircraft vs Price of Economy Class ticket")

plot(air.df$Airline, air.df$PricePremium, main="Airline vs Price of Premium Class ticket")

plot(air.df$Aircraft, air.df$PricePremium, main="Aircraft vs Price of Premium Class ticket")

Correlation Matrix of all the numerical variables

library(corrplot)    
## corrplot 0.84 loaded
corrplot(corr=cor(air.df[ , c(3,6:18)], use="complete.obs"), 
         method ="ellipse")

Corrgram of all the variables

library(corrgram)
corrgram(air.df, order=TRUE,
         main="Corrgram of all the Variables",
         lower.panel=panel.shade, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt) 

Null hypothesis: The price of Economy ticket and Premium ticket are the same.

t.test(air.df$PriceEconomy, air.df$PricePremium)
## 
##  Welch Two Sample t-test
## 
## data:  air.df$PriceEconomy and air.df$PricePremium
## t = -6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0831 -369.2793
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

We can see that the p-value is lesser than 0.05 and hence the null hyphothesis is rejected. There is a significant difference between the Economy ticket price and Premium ticket price. Premium ticket price is more than Economy ticket price.

regr <- lm(PriceRelative ~ PriceEconomy + PricePremium + FlightDuration + SeatsEconomy + SeatsPremium + PitchEconomy + PitchPremium + WidthEconomy + WidthPremium + PercentPremiumSeats + PitchDifference + WidthDifference + SeatsTotal + Airline + Aircraft + TravelMonth + IsInternational, data = air.df)
summary(regr)
## 
## Call:
## lm(formula = PriceRelative ~ PriceEconomy + PricePremium + FlightDuration + 
##     SeatsEconomy + SeatsPremium + PitchEconomy + PitchPremium + 
##     WidthEconomy + WidthPremium + PercentPremiumSeats + PitchDifference + 
##     WidthDifference + SeatsTotal + Airline + Aircraft + TravelMonth + 
##     IsInternational, data = air.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76373 -0.08269  0.00438  0.08002  0.84672 
## 
## Coefficients: (3 not defined because of singularities)
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -3.993e-01  2.948e+00  -0.135 0.892302    
## PriceEconomy                 -9.325e-04  3.318e-05 -28.105  < 2e-16 ***
## PricePremium                  5.781e-04  2.294e-05  25.197  < 2e-16 ***
## FlightDuration                2.613e-02  4.727e-03   5.526 5.63e-08 ***
## SeatsEconomy                  8.090e-04  5.462e-04   1.481 0.139313    
## SeatsPremium                 -7.374e-03  3.615e-03  -2.040 0.041967 *  
## PitchEconomy                 -1.756e-02  7.994e-02  -0.220 0.826207    
## PitchPremium                  5.960e-02  9.165e-02   0.650 0.515823    
## WidthEconomy                 -9.207e-02  5.266e-02  -1.748 0.081085 .  
## WidthPremium                  4.904e-02  1.365e-01   0.359 0.719527    
## PercentPremiumSeats           1.114e-02  7.653e-03   1.456 0.146197    
## PitchDifference                      NA         NA      NA       NA    
## WidthDifference                      NA         NA      NA       NA    
## SeatsTotal                           NA         NA      NA       NA    
## AirlineBritish               -3.971e-01  1.107e-01  -3.586 0.000373 ***
## AirlineDelta                 -3.865e-01  2.203e-01  -1.755 0.080020 .  
## AirlineJet                   -2.584e-01  9.594e-02  -2.693 0.007354 ** 
## AirlineSingapore             -3.535e-01  1.297e-01  -2.725 0.006685 ** 
## AirlineVirgin                -3.575e-01  2.031e-01  -1.761 0.078997 .  
## AircraftBoeing                4.003e-02  2.968e-02   1.349 0.178089    
## TravelMonthJul                2.111e-02  3.145e-02   0.671 0.502475    
## TravelMonthOct                2.778e-02  2.670e-02   1.041 0.298619    
## TravelMonthSep               -6.617e-03  2.664e-02  -0.248 0.803924    
## IsInternationalInternational  2.785e-02  2.502e-01   0.111 0.911400    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2123 on 437 degrees of freedom
## Multiple R-squared:  0.7878, Adjusted R-squared:  0.7781 
## F-statistic: 81.12 on 20 and 437 DF,  p-value: < 2.2e-16
regr$coefficients
##                  (Intercept)                 PriceEconomy 
##                -0.3993377527                -0.0009324897 
##                 PricePremium               FlightDuration 
##                 0.0005781041                 0.0261250335 
##                 SeatsEconomy                 SeatsPremium 
##                 0.0008089538                -0.0073744940 
##                 PitchEconomy                 PitchPremium 
##                -0.0175636803                 0.0596015823 
##                 WidthEconomy                 WidthPremium 
##                -0.0920739338                 0.0490357851 
##          PercentPremiumSeats              PitchDifference 
##                 0.0111406012                           NA 
##              WidthDifference                   SeatsTotal 
##                           NA                           NA 
##               AirlineBritish                 AirlineDelta 
##                -0.3971066460                -0.3865140546 
##                   AirlineJet             AirlineSingapore 
##                -0.2583533203                -0.3534833629 
##                AirlineVirgin               AircraftBoeing 
##                -0.3575114105                 0.0400301547 
##               TravelMonthJul               TravelMonthOct 
##                 0.0211075134                 0.0277808779 
##               TravelMonthSep IsInternationalInternational 
##                -0.0066168064                 0.0278544004
confint(regr)
##                                      2.5 %        97.5 %
## (Intercept)                  -6.1929433320  5.3942678265
## PriceEconomy                 -0.0009976989 -0.0008672805
## PricePremium                  0.0005330119  0.0006231963
## FlightDuration                0.0168339700  0.0354160970
## SeatsEconomy                 -0.0002645551  0.0018824627
## SeatsPremium                 -0.0144799061 -0.0002690820
## PitchEconomy                 -0.1746859016  0.1395585411
## PitchPremium                 -0.1205254829  0.2397286476
## WidthEconomy                 -0.1955718265  0.0114239588
## WidthPremium                 -0.2191781676  0.3172497379
## PercentPremiumSeats          -0.0039009458  0.0261821482
## PitchDifference                         NA            NA
## WidthDifference                         NA            NA
## SeatsTotal                              NA            NA
## AirlineBritish               -0.6147350632 -0.1794782288
## AirlineDelta                 -0.8194540321  0.0464259229
## AirlineJet                   -0.4469062306 -0.0698004100
## AirlineSingapore             -0.6084196676 -0.0985470581
## AirlineVirgin                -0.7566003059  0.0415774849
## AircraftBoeing               -0.0182983856  0.0983586951
## TravelMonthJul               -0.0407034449  0.0829184718
## TravelMonthOct               -0.0246876236  0.0802493793
## TravelMonthSep               -0.0589659858  0.0457323729
## IsInternationalInternational -0.4638526936  0.5195614944

Variables PriceEconomy, PricePremium, FlightDuration, AirlineBritish, AirlineJet, AirlineSingapore, SeatsPremium are Statistically significant with p<0.05.

Variables SeatsEconomy, PitchEconomy, PitchPremium, WidthEconomy, WidthPremium, AirlineDelta, AirlineVirgin, Travels Months - Jul, Oct and Sep are statistically insignificant with p>0.05.

The expected change in PriceRelative if the Price Economy changes by 1 rupee is -0.0009324897. The expected change in PriceRelative if the Price Premium changes by 1 rupee is 0.0005781041. The expected change in PriceRelative if the Flight Duration changes by 0.01hrs is 0.0261250335. The expected change in PriceRelative if the Economy Seats change by 1 is 0.0008089538. The expected change in PriceRelative if the PRemium Seats change by 1 is -0.0073744940. The expected change in PriceRelative if the width of an Economy seat changes by 1 is -0.0920739338. The expected change in PriceRelative if the width of a Premium seat changes by 1 is 0.0490357851.

It can be said from the regression analysis and correlation matrices that some variables are related more than the others. Flight duration was more linked to the price of a premium ticket than that of an economy ticket price. Total seats(Economy seats + Premium seats) was more linked to the number of economy seats than the number of premium seats. From this we can conclude that the number of economy seats far exceeded the number of premium seats in any airplane. Price relative ie. (PricePremium - PriceEconomy) / PriceEconomy changed in such a way according to the beta coefficients trend such that a positive change in Premium price had a positive effect on the relative price and a positive change in Economy price had a negative effect on the relative price. This analysis proves to be true due to the fact that the Economy price is only inversely related to the relative price and the premium price is directly related would only shoot up the value of the denominator. Further it can be seen that the relative price is never negative, hence the price of an economy ticket is lesser than that of a Premium ticket.