Reading Data

airline.df <- read.csv("SixAirlinesDataV2.csv")
View(airline.df)

Summary

summary(airline.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Airlines

air <- table(airline.df$Airline)
air
## 
## AirFrance   British     Delta       Jet Singapore    Virgin 
##        74       175        46        61        40        62
barplot(air)

mth <- table(airline.df$TravelMonth)
mth
## 
## Aug Jul Oct Sep 
## 127  75 127 129
plot(airline.df$TravelMonth,col= "lightblue",main = "Frequecy of flights in travel months")

Plots

Histogram of Price Economy and Price Premium

par(mfrow=c(1,2))
hist(airline.df$PriceEconomy, breaks = 6, main = "Economy Price",col= "lightblue",xlab = "Price Economy", ylim = c(0,150))
hist(airline.df$PricePremium,breaks = 6,main = "Premium Price",col= "orange",xlab = "Price Premium")

Histogram of EconomySeats and PremiumSeats

par(mfrow=c(1,2))
boxplot(airline.df$SeatsEconomy, breaks = 10, main = "Economy Seats",col= "lightblue")
boxplot(airline.df$SeatsPremium,breaks = 5,main = "Premium Seats",col= "orange")

library(lattice)
histogram(~PitchEconomy|Airline, data=airline.df,type="count")

Airline-Wise Economy Class-price Prdiction

boxplot(PriceEconomy~Airline, data = airline.df,horizontal = TRUE,col= c("red","purple","yellow","orange","lightblue","green"), yaxt="n",xlab = "Price in USD", main= "Airline-wise Economy class-price distribution")
axis(side=2,at=c(1:6),labels = c("Aif","Bts","Delt","Jet","Sigp","Vg"))

Airline-Wise Relative Price Distribution

boxplot(PriceEconomy~Airline, data = airline.df ,horizontal = TRUE,col=c("red","purple","yellow","orange","lightblue","green"),yaxt="n",xlab = "Price in USD", main= "Airline-wise Economy class-price distribution")
axis(side=2,at=c(1:6),labels = c("Aif","Bts","Delt","Jet","Sigp","Vg"))

Relative Price-difference Vs Flight Duration

library(car)
scatterplot(PriceRelative~FlightDuration, cex = 0.9, pch=19, main = " Relative price difference vs Flight Duration",data = airline.df)

Corelations

round(cor(Filter(is.numeric, airline.df)),2)
##                     FlightDuration SeatsEconomy SeatsPremium PitchEconomy
## FlightDuration                1.00         0.20         0.16         0.29
## SeatsEconomy                  0.20         1.00         0.63         0.14
## SeatsPremium                  0.16         0.63         1.00        -0.03
## PitchEconomy                  0.29         0.14        -0.03         1.00
## PitchPremium                  0.10         0.12         0.00        -0.55
## WidthEconomy                  0.46         0.37         0.46         0.29
## WidthPremium                  0.10         0.10         0.00        -0.54
## PriceEconomy                  0.57         0.13         0.11         0.37
## PricePremium                  0.65         0.18         0.22         0.23
## PriceRelative                 0.12         0.00        -0.10        -0.42
## SeatsTotal                    0.20         0.99         0.72         0.12
## PitchDifference              -0.04         0.04         0.02        -0.78
## WidthDifference              -0.12        -0.08        -0.22        -0.64
## PercentPremiumSeats           0.06        -0.33         0.49        -0.10
##                     PitchPremium WidthEconomy WidthPremium PriceEconomy
## FlightDuration              0.10         0.46         0.10         0.57
## SeatsEconomy                0.12         0.37         0.10         0.13
## SeatsPremium                0.00         0.46         0.00         0.11
## PitchEconomy               -0.55         0.29        -0.54         0.37
## PitchPremium                1.00        -0.02         0.75         0.05
## WidthEconomy               -0.02         1.00         0.08         0.07
## WidthPremium                0.75         0.08         1.00        -0.06
## PriceEconomy                0.05         0.07        -0.06         1.00
## PricePremium                0.09         0.15         0.06         0.90
## PriceRelative               0.42        -0.04         0.50        -0.29
## SeatsTotal                  0.11         0.41         0.09         0.13
## PitchDifference             0.95        -0.13         0.76        -0.10
## WidthDifference             0.70        -0.39         0.88        -0.08
## PercentPremiumSeats        -0.18         0.23        -0.18         0.07
##                     PricePremium PriceRelative SeatsTotal PitchDifference
## FlightDuration              0.65          0.12       0.20           -0.04
## SeatsEconomy                0.18          0.00       0.99            0.04
## SeatsPremium                0.22         -0.10       0.72            0.02
## PitchEconomy                0.23         -0.42       0.12           -0.78
## PitchPremium                0.09          0.42       0.11            0.95
## WidthEconomy                0.15         -0.04       0.41           -0.13
## WidthPremium                0.06          0.50       0.09            0.76
## PriceEconomy                0.90         -0.29       0.13           -0.10
## PricePremium                1.00          0.03       0.19           -0.02
## PriceRelative               0.03          1.00      -0.01            0.47
## SeatsTotal                  0.19         -0.01       1.00            0.03
## PitchDifference            -0.02          0.47       0.03            1.00
## WidthDifference            -0.01          0.49      -0.11            0.76
## PercentPremiumSeats         0.12         -0.16      -0.22           -0.09
##                     WidthDifference PercentPremiumSeats
## FlightDuration                -0.12                0.06
## SeatsEconomy                  -0.08               -0.33
## SeatsPremium                  -0.22                0.49
## PitchEconomy                  -0.64               -0.10
## PitchPremium                   0.70               -0.18
## WidthEconomy                  -0.39                0.23
## WidthPremium                   0.88               -0.18
## PriceEconomy                  -0.08                0.07
## PricePremium                  -0.01                0.12
## PriceRelative                  0.49               -0.16
## SeatsTotal                    -0.11               -0.22
## PitchDifference                0.76               -0.09
## WidthDifference                1.00               -0.28
## PercentPremiumSeats           -0.28                1.00

Corrgram to store Variable

par(mfrow=c(1,1))
library(corrgram)
corrgram(airline.df, upper.panel=panel.pie,main= "Corrgram of store variables" )

Analysis

T-test

Hypothesis: there is no difference between an economy class ticket and a premium economy class ticket.

t.test(airline.df$PriceEconomy,airline.df$PricePremium,var.equal = TRUE,paired = FALSE)
## 
##  Two Sample t-test
## 
## data:  airline.df$PriceEconomy and airline.df$PricePremium
## t = -6.8304, df = 914, p-value = 1.544e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0699 -369.2926
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

The null hypothesis is rejected because the t-Test gives a very low p-value(p<0.5) and there is a difference between economy class and premium economy class tickets.

Pearson’s Correlation Tests:

1.between difference price with width

cor.test((airline.df$PricePremium-airline.df$PriceEconomy),airline.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  (airline.df$PricePremium - airline.df$PriceEconomy) and airline.df$WidthDifference
## t = 2.5291, df = 456, p-value = 0.01177
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02627012 0.20700978
## sample estimates:
##       cor 
## 0.1176138

2.Between difference in price with pitch

cor.test((airline.df$PricePremium-airline.df$PriceEconomy),airline.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  (airline.df$PricePremium - airline.df$PriceEconomy) and airline.df$PitchDifference
## t = 2.7688, df = 456, p-value = 0.005855
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03739893 0.21764764
## sample estimates:
##       cor 
## 0.1285851

3.Between difference in price with flight duration

cor.test((airline.df$PricePremium-airline.df$PriceEconomy),airline.df$FlightDuration)
## 
##  Pearson's product-moment correlation
## 
## data:  (airline.df$PricePremium - airline.df$PriceEconomy) and airline.df$FlightDuration
## t = 11.435, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3976578 0.5403379
## sample estimates:
##       cor 
## 0.4720837

The 3 correlations tests yield that the difference in pricing of the given two class of tickets depends strongly on the flightduration and also on the pitch and width difference.(p-value<0.05).

Regression Analysis

fit=lm((airline.df$PricePremium-airline.df$PriceEconomy) ~ airline.df$PitchDifference+airline.df$WidthDifference+airline.df$FlightDuration)
summary(fit)
## 
## Call:
## lm(formula = (airline.df$PricePremium - airline.df$PriceEconomy) ~ 
##     airline.df$PitchDifference + airline.df$WidthDifference + 
##         airline.df$FlightDuration)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -859.4 -324.7  -62.7  150.1 3331.5 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -286.933    117.833  -2.435   0.0153 *  
## airline.df$PitchDifference   10.387     20.779   0.500   0.6174    
## airline.df$WidthDifference   74.641     30.977   2.410   0.0164 *  
## airline.df$FlightDuration    80.992      6.754  11.992   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.1 on 454 degrees of freedom
## Multiple R-squared:  0.2538, Adjusted R-squared:  0.2489 
## F-statistic: 51.48 on 3 and 454 DF,  p-value: < 2.2e-16

Hence, the p-values and the coefficients suggest that the model is a good fit and the regression is good and that we have to reject the null hypothesis in case of the 3 variables.

fit$coefficients
##                (Intercept) airline.df$PitchDifference 
##                 -286.93258                   10.38682 
## airline.df$WidthDifference  airline.df$FlightDuration 
##                   74.64098                   80.99227

Following factors explain the difference in price between an economy ticket and a premium-economy airline ticket :

Hence, the p-values and the coefficients suggest that the model is a good fit and the regression is good and that we have to reject the null hypothesis in case of the 3 variables.

1.Variation in prices were more in International flights.

2.Pitch And Width Differences in International flights were more.

3.Prices of premium economy seats is more with increasing width, pitch and flight duration.

4.Relative pricing and the no. of premium class seats are more in international flights.

5.It is obvious that premium economy class airline tickets are more compared to economy class airline tickets. The contributing factors could be width difference, pitch difference and fligth duration.