air.df<-read.csv(paste("SixAirlinesDataV2.csv",sep=""))
View(air.df)

Summary

library(psych)
summary(air.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
str(air.df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

Mean of Premium price based on airlines

aggregate(air.df$PricePremium~air.df$Airline,FUN=mean)
##   air.df$Airline air.df$PricePremium
## 1      AirFrance           3065.2162
## 2        British           1937.0286
## 3          Delta            684.6739
## 4            Jet            483.3607
## 5      Singapore           1239.9250
## 6         Virgin           2721.6935

Mean Pitch Diffence based on different airlines

aggregate(air.df$PitchDifference~air.df$Airline,FUN = mean)
##   air.df$Airline air.df$PitchDifference
## 1      AirFrance               6.000000
## 2        British               7.000000
## 3          Delta               3.000000
## 4            Jet               9.540984
## 5      Singapore               6.000000
## 6         Virgin               7.000000

Mean width differece based on different airlines

aggregate(air.df$WidthDifference~air.df$Airline,FUN = mean)
##   air.df$Airline air.df$WidthDifference
## 1      AirFrance              1.4324324
## 2        British              1.0000000
## 3          Delta              0.3913043
## 4            Jet              3.6557377
## 5      Singapore              1.0000000
## 6         Virgin              3.0000000

Distribution of Relative pricing of different airlines

library(lattice)
histogram(air.df$PriceRelative,main = "Distribution pitch diff", col="light blue" )

Boxplot comparing no of Premium economy seats based on airlines

boxplot(air.df$SeatsPremium~air.df$Airline,ylab="Airlines",xlab="#Premium Class seats",col="light blue",horizontal=TRUE)

Boxplot comparing price of Premium economy seats based on airlines

boxplot(PricePremium~Airline, data =air.df, ylab = "Airlines", xlab = "Premium economy price", col = "light blue",horizontal=TRUE)

Some hypothesis we can get are 1.price is relatively dependent on width difference 2.price is relatively dependent on pitch difference 3.price is relatively is dependent on flight duration Checking corelation and scatterplot of price relative vs width difference

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
cor.test(air.df$PriceRelative,air.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  air.df$PriceRelative and air.df$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4125388 0.5528218
## sample estimates:
##       cor 
## 0.4858024
scatterplot(air.df$PriceRelative , air.df$WidthDifference, main="Price Relative vs width difference")

The correlation test coefficent shows that hypothesis 1 holds good.Width diff has dependence on price relative. Checking corelation and scatterplot of price relative vs width difference

cor.test(air.df$PriceRelative,air.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  air.df$PriceRelative and air.df$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3940262 0.5372817
## sample estimates:
##       cor 
## 0.4687302
scatterplot(air.df$PriceRelative ,air.df$PitchDifference, main="Price Relative vs pitch difference" )

The correlation test coefficent shows that hypothesis 2 holds good .Pitch diff has dependence on price relative. Scatterplot matirx between price relative/pitch difference/width difference

scatterplotMatrix(~ air.df$PriceRelative + air.df$PitchDifference + air.df$WidthDifference)

Checking corelation and scatterplot of price relative vs width difference

cor.test(air.df$PriceRelative,air.df$FlightDuration)
## 
##  Pearson's product-moment correlation
## 
## data:  air.df$PriceRelative and air.df$FlightDuration
## t = 2.6046, df = 456, p-value = 0.009498
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02977856 0.21036806
## sample estimates:
##      cor 
## 0.121075
scatterplot(air.df$PriceRelative ,air.df$FlightDuration, main="Price Relative vs flight duration" )

The correlation test coefficient is low for this .This shows that hypothesis 3 does not holds good and price doesn’t have significant dependence on flight duration.

Corrgram

library(corrgram)

corrgram(air.df,main="Corrgram",upper.panel = panel.pie)

From the above hypothesis we canconsider only hypothesis 1 & 2.So,to use linear regression we take “Y” as Price Relative ,“x1” as pitch diff and “x2” as width diff

fit <- lm(air.df$PriceRelative ~ air.df$WidthDifference + air.df$PitchDifference)
summary(fit)
## 
## Call:
## lm(formula = air.df$PriceRelative ~ air.df$WidthDifference + 
##     air.df$PitchDifference)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84163 -0.28484 -0.07241  0.17698  1.18778 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -0.10514    0.08304  -1.266 0.206077    
## air.df$WidthDifference  0.11621    0.02356   4.933 1.14e-06 ***
## air.df$PitchDifference  0.06019    0.01590   3.785 0.000174 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared:  0.2593, Adjusted R-squared:  0.2561 
## F-statistic: 79.65 on 2 and 455 DF,  p-value: < 2.2e-16

The p value is very small So our hypothesis 1 and 2 holds good . This implies that the price relative of a aircraft depends upon the pitch difference and width difference. So the factor that play a role in creating the difference in pricing between economy and premium class seats are: 1.Pitch difference 2.Width difference