Reading Data

airline.df <- read.csv("SixAirlinesDataV2.csv")
View(airline.df)

Summary

summary(airline.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Airlines

air <- table(airline.df$Airline)
air
## 
## AirFrance   British     Delta       Jet Singapore    Virgin 
##        74       175        46        61        40        62
barplot(air)

Travel Month

mth <- table(airline.df$TravelMonth)
mth
## 
## Aug Jul Oct Sep 
## 127  75 127 129
plot(airline.df$TravelMonth,col= "lightblue",main = "Frequecy of flights in travel months")

Flight Duration

par(mfrow=c(1,2))
hist(airline.df$FlightDuration,col="lightblue",main = "Flight Duration",breaks = 8,xlab = "Flight Duration")
boxplot(airline.df$FlightDuration,col="lightblue",main = "Flight Duration")

## ## Plots ## Internation Flights

plot(airline.df$IsInternational,ylim =c(0,450))

Histogram of Price Economy and Price Premium

par(mfrow=c(1,2))
hist(airline.df$PriceEconomy, breaks = 6, main = "Economy Price",col= "lightblue",xlab = "Price Economy", ylim = c(0,150))
hist(airline.df$PricePremium,breaks = 6,main = "Premium Price",col= "orange",xlab = "Price Premium")

EconomySeats and PremiumSeats

par(mfrow=c(1,2))
boxplot(airline.df$SeatsEconomy, breaks = 10, main = "Economy Seats",col= "lightblue")
boxplot(airline.df$SeatsPremium,breaks = 5,main = "Premium Seats",col= "orange")

par(mfrow=c(1,2))

hist(airline.df$SeatsEconomy, main = "Economy Seats",col= "lightblue")
hist(airline.df$SeatsPremium,main = "Premium Seats",col= "orange")

par(mfrow=c(1,2))
hist(airline.df$PitchEconomy,col = "lightblue",main="pitchEconomy",xlab = "Pitch Economy")
hist(airline.df$PitchPremium,col = "orange",main="PitchPremium",ylim = c(0,450))

Airline-Wise Economy Class-price Prdiction

boxplot(PriceEconomy~Airline, data = airline.df,horizontal = TRUE,col= c("red","purple","yellow","orange","lightblue","green"), yaxt="n",xlab = "Price in USD", main= "Airline-wise Economy class-price distribution")
axis(side=2,at=c(1:6),labels = c("Aif","Bts","Delt","Jet","Sigp","Vg"))

## Width Difference in Domestic Vs International Flights.

wd<- table(airline.df$WidthEconomy,airline.df$WidthPremium)
wd
##     
##       17  18  19  20  21
##   17  28   0  32   0  54
##   18   0  12 224   0  68
##   19   0   0   0  40   0
par(mfrow=c(1,2))
hist(airline.df$WidthEconomy,col="lightblue",main = "Width Difference in Economy Class",xlim = c(15,20),ylim = c(0,300))
hist(airline.df$WidthPremium,col="orange",main = "Width Difference in Premium Class",xlim = c(15,20),ylim = c(0,300))

## Pitch Difference in Domestic Vs International Flights.

table(airline.df$IsInternational,airline.df$PitchDifference)
##                
##                   2   3   6   7  10
##   Domestic       24  16   0   0   0
##   International   0   0 121 243  54
par(mfrow=c(1,2))
hist(airline.df$PitchEconomy,col="lightblue",main = "Pitch Difference in Economy Class")
hist(airline.df$PitchPremium,col="orange",main = "Pitch Difference in Premium Class")

Airline-Wise Relative Price Distribution

boxplot(PricePremium~Airline, data = airline.df ,horizontal = TRUE,col=c("red","purple","yellow","orange","lightblue","green"),yaxt="n",xlab = "Price in USD", main= "Airline-wise Economy class-price distribution")
axis(side=2,at=c(1:6),labels = c("Aif","Bts","Delt","Jet","Sigp","Vg"))

Relative Price-difference Vs Flight Duration

library(car)
scatterplot(PriceRelative~FlightDuration, cex = 0.9, pch=19, main = " Relative price difference vs Flight Duration",data = airline.df)

Width difference

library(car)
scatterplot(PriceRelative~WidthDifference,cex = 0.9, pch=19,main ="RELATIVE PRICE DIFFERECE VS WIDTH DIFFERENCE",data = airline.df)

PitchDifference

library(car)
scatterplot(PriceRelative~PitchDifference,main ="RELATIVE PRICE DIFFERECE VS PITCH DIFFERENCE",data = airline.df)

Scatterplot MAtrix

library(car)
scatterplotMatrix(airline.df[,c("PriceEconomy","FlightDuration","SeatsEconomy","SeatsPremium","PricePremium")],main="SCATTERPLOT MATRIX")

observations: 1. PriceEconomy is positively correlated with FlightDuration,Seatseconomy. 2. PriceEconomy is negatively correlated with SeatsPremium. 3. PricePremium is negatively correlated with SeatsEconomy and FlightDuration. 4. PricePremium is positively correlated with PriceEconomy.

library(car)
scatterplotMatrix(airline.df[,c("PriceEconomy","PitchDifference","WidthDifference","PricePremium")],main="SCATTERPLOT MATRIX")

Corelations

round(cor(Filter(is.numeric, airline.df)),2)
##                     FlightDuration SeatsEconomy SeatsPremium PitchEconomy
## FlightDuration                1.00         0.20         0.16         0.29
## SeatsEconomy                  0.20         1.00         0.63         0.14
## SeatsPremium                  0.16         0.63         1.00        -0.03
## PitchEconomy                  0.29         0.14        -0.03         1.00
## PitchPremium                  0.10         0.12         0.00        -0.55
## WidthEconomy                  0.46         0.37         0.46         0.29
## WidthPremium                  0.10         0.10         0.00        -0.54
## PriceEconomy                  0.57         0.13         0.11         0.37
## PricePremium                  0.65         0.18         0.22         0.23
## PriceRelative                 0.12         0.00        -0.10        -0.42
## SeatsTotal                    0.20         0.99         0.72         0.12
## PitchDifference              -0.04         0.04         0.02        -0.78
## WidthDifference              -0.12        -0.08        -0.22        -0.64
## PercentPremiumSeats           0.06        -0.33         0.49        -0.10
##                     PitchPremium WidthEconomy WidthPremium PriceEconomy
## FlightDuration              0.10         0.46         0.10         0.57
## SeatsEconomy                0.12         0.37         0.10         0.13
## SeatsPremium                0.00         0.46         0.00         0.11
## PitchEconomy               -0.55         0.29        -0.54         0.37
## PitchPremium                1.00        -0.02         0.75         0.05
## WidthEconomy               -0.02         1.00         0.08         0.07
## WidthPremium                0.75         0.08         1.00        -0.06
## PriceEconomy                0.05         0.07        -0.06         1.00
## PricePremium                0.09         0.15         0.06         0.90
## PriceRelative               0.42        -0.04         0.50        -0.29
## SeatsTotal                  0.11         0.41         0.09         0.13
## PitchDifference             0.95        -0.13         0.76        -0.10
## WidthDifference             0.70        -0.39         0.88        -0.08
## PercentPremiumSeats        -0.18         0.23        -0.18         0.07
##                     PricePremium PriceRelative SeatsTotal PitchDifference
## FlightDuration              0.65          0.12       0.20           -0.04
## SeatsEconomy                0.18          0.00       0.99            0.04
## SeatsPremium                0.22         -0.10       0.72            0.02
## PitchEconomy                0.23         -0.42       0.12           -0.78
## PitchPremium                0.09          0.42       0.11            0.95
## WidthEconomy                0.15         -0.04       0.41           -0.13
## WidthPremium                0.06          0.50       0.09            0.76
## PriceEconomy                0.90         -0.29       0.13           -0.10
## PricePremium                1.00          0.03       0.19           -0.02
## PriceRelative               0.03          1.00      -0.01            0.47
## SeatsTotal                  0.19         -0.01       1.00            0.03
## PitchDifference            -0.02          0.47       0.03            1.00
## WidthDifference            -0.01          0.49      -0.11            0.76
## PercentPremiumSeats         0.12         -0.16      -0.22           -0.09
##                     WidthDifference PercentPremiumSeats
## FlightDuration                -0.12                0.06
## SeatsEconomy                  -0.08               -0.33
## SeatsPremium                  -0.22                0.49
## PitchEconomy                  -0.64               -0.10
## PitchPremium                   0.70               -0.18
## WidthEconomy                  -0.39                0.23
## WidthPremium                   0.88               -0.18
## PriceEconomy                  -0.08                0.07
## PricePremium                  -0.01                0.12
## PriceRelative                  0.49               -0.16
## SeatsTotal                    -0.11               -0.22
## PitchDifference                0.76               -0.09
## WidthDifference                1.00               -0.28
## PercentPremiumSeats           -0.28                1.00

Corrgram to store Variable

par(mfrow=c(1,1))
library(corrgram)
corrgram(airline.df, upper.panel=panel.pie,main= "Corrgram of store variables" )

Analysis

T-test

NullHypothesis: There is no difference between an economy class ticket and a premium economy class ticket.

t.test(airline.df$PriceEconomy,airline.df$PricePremium,var.equal = TRUE,paired = FALSE)
## 
##  Two Sample t-test
## 
## data:  airline.df$PriceEconomy and airline.df$PricePremium
## t = -6.8304, df = 914, p-value = 1.544e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0699 -369.2926
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

The null hypothesis is rejected because the t-Test gives a very low p-value(p<0.5) and there is a difference between economy class and premium economy class tickets.

Pearson’s Correlation Tests:

1.Check the correlation Between PriceRelative and PitchDifference

cor.test(airline.df$PriceRelative,airline.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceRelative and airline.df$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3940262 0.5372817
## sample estimates:
##       cor 
## 0.4687302

As p-value<0.5,we can conclude that there is indeed a relation between Price of two classes And Pitch Difference

2.Check the Correlation between FlightDuration And Relative price of two classes

cor.test(airline.df$PriceRelative,airline.df$FlightDuration,method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceRelative and airline.df$FlightDuration
## t = 2.6046, df = 456, p-value = 0.009498
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02977856 0.21036806
## sample estimates:
##      cor 
## 0.121075

As, pvalue <0.5,we can conclude that there is a correlation between FlightDuration And Relative price of two classes.

3.Check Relation Between in Relativeprice with SeatsPremium

cor.test(airline.df$PriceRelative,airline.df$SeatsPremium)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceRelative and airline.df$SeatsPremium
## t = -2.0854, df = 456, p-value = 0.03759
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.18715605 -0.00561924
## sample estimates:
##         cor 
## -0.09719601

pvalue<0.5,Hence we can say that there is a significant correlation between Prices of two classes and SeatsPremium.

4.Check The Correlation between RelativePrice and WidthDifference.

cor.test(airline.df$PriceRelative,airline.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceRelative and airline.df$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4125388 0.5528218
## sample estimates:
##       cor 
## 0.4858024

pvalue<0.5,There is Significant Correlation between Price of twoclasses and Widthdifference.

  1. Check the correlation between Internation and Domestic Flights with Price of twoclasses.
t.test(PriceRelative ~ IsInternational,data = airline.df)
## 
##  Welch Two Sample t-test
## 
## data:  PriceRelative by IsInternational
## t = -19.451, df = 446.12, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4855215 -0.3964139
## sample estimates:
##      mean in group Domestic mean in group International 
##                   0.0847500                   0.5257177

p-value<0.5,Hence there is relation between Relative Price and Internation flights Vs Domestic flights.

Regression Analysis

Consider the following Regression Equation:

y=B0 + B1(X1) + B2(X2) + B3(X3) where,y is dependent variable,X1,X2,X3 are dependent variables and β0,β1 and β2 are Beta-Coefficent. y=B0 + B1(FlightDuration) + B2(IsInternational) + B3(PitchDifference) + B4(WidthDifference)

fit=lm(PriceRelative ~ FlightDuration+IsInternational+PitchDifference+WidthDifference,airline.df)
summary(fit)
## 
## Call:
## lm(formula = PriceRelative ~ FlightDuration + IsInternational + 
##     PitchDifference + WidthDifference, data = airline.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82785 -0.24889 -0.06653  0.13341  1.30701 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -0.415845   0.092414  -4.500 8.66e-06 ***
## FlightDuration                0.040569   0.006253   6.488 2.29e-10 ***
## IsInternationalInternational -0.627755   0.125854  -4.988 8.71e-07 ***
## PitchDifference               0.152867   0.024902   6.139 1.82e-09 ***
## WidthDifference               0.089529   0.024166   3.705 0.000238 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3719 on 453 degrees of freedom
## Multiple R-squared:  0.3246, Adjusted R-squared:  0.3187 
## F-statistic: 54.44 on 4 and 453 DF,  p-value: < 2.2e-16

Hence, the p-values and the coefficients suggest that the model is a good fit and the regression is good and that we have to reject the null hypothesis in case of the 3 variables.

fit$coefficients
##                  (Intercept)               FlightDuration 
##                  -0.41584519                   0.04056873 
## IsInternationalInternational              PitchDifference 
##                  -0.62775507                   0.15286694 
##              WidthDifference 
##                   0.08952863
airline.df$PitchDifference <- factor(airline.df$PitchDifference)
airline.df$WidthDifference <- factor(airline.df$WidthDifference)
airline.df$IsInternational<- factor(airline.df$IsInternational)

table(airline.df$PitchDifference)
## 
##   2   3   6   7  10 
##  24  16 121 243  54
table(airline.df$WidthDifference)
## 
##   0   1   2   3   4 
##  40 264  32  68  54
table(airline.df$IsInternational)
## 
##      Domestic International 
##            40           418
table(airline.df$PitchDifference)
## 
##   2   3   6   7  10 
##  24  16 121 243  54
newmodel <- lm(PriceRelative ~ FlightDuration+IsInternational+PitchDifference+WidthDifference,airline.df)
summary(newmodel)
## 
## Call:
## lm(formula = PriceRelative ~ FlightDuration + IsInternational + 
##     PitchDifference + WidthDifference, data = airline.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81588 -0.24663 -0.05383  0.11960  1.48499 
## 
## Coefficients: (3 not defined because of singularities)
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -0.038819   0.077923  -0.498 0.618604    
## FlightDuration                0.038630   0.006286   6.146 1.76e-09 ***
## IsInternationalInternational  0.875034   0.090368   9.683  < 2e-16 ***
## PitchDifference3             -0.009391   0.118873  -0.079 0.937071    
## PitchDifference6             -0.593447   0.095382  -6.222 1.13e-09 ***
## PitchDifference7             -0.464184   0.076211  -6.091 2.41e-09 ***
## PitchDifference10                   NA         NA      NA       NA    
## WidthDifference1             -0.237927   0.053327  -4.462 1.03e-05 ***
## WidthDifference2             -0.354692   0.093457  -3.795 0.000168 ***
## WidthDifference3                    NA         NA      NA       NA    
## WidthDifference4                    NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3683 on 450 degrees of freedom
## Multiple R-squared:  0.3421, Adjusted R-squared:  0.3319 
## F-statistic: 33.43 on 7 and 450 DF,  p-value: < 2.2e-16
newmodel$coefficients
##                  (Intercept)               FlightDuration 
##                 -0.038819094                  0.038630251 
## IsInternationalInternational             PitchDifference3 
##                  0.875033562                 -0.009390536 
##             PitchDifference6             PitchDifference7 
##                 -0.593447129                 -0.464184176 
##            PitchDifference10             WidthDifference1 
##                           NA                 -0.237926955 
##             WidthDifference2             WidthDifference3 
##                 -0.354691979                           NA 
##             WidthDifference4 
##                           NA

-> We can notice that there is significant increase IsInternational Variable after converting it into factors. -> Hence this is best fitted moodel

Following factors explain the difference in price between an economy ticket and a premium-economy airline ticket :

Hence, the p-values and the coefficients suggest that the model is a good fit and the regression is good and that we have to reject the null hypothesis in case of the data variables considered. Obiviously, the Prices of Premium class Tickets are high than the Economy class Tickets: Following Factor describes the difference in price between an economy ticket and a premium-economy airline ticket:

  1. FlightDuration, PitchDiffernce, WidthDifference and International or Domestic flights have Significant impact on Prices of Airline Tickets.
  2. Width Differcence i.e. Width between armrests of a Premium Seat is relatively very higher than that of widthdiffernce in seats of Economy class.
  3. Pitch Difference i.e Distance between two consecutive Premium Seats is relatively very higher than that of widthdiffernce in seats of Economy class.
  4. Price of Premium seats of an International Flights are Higher than those in Domestic flight.
  5. Factor 2 and 3 makes premium class seats more comfortable,Hence making it expensive.