airline <- read.csv(paste("file:///C:/Users/hp/Desktop/IIML/My Project files/SixAirlinesDataV2.csv", sep=""))
View(airline)
str(airline)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
summary(airline)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
mytable<-with(airline,table(Airline))
mytable
## Airline
## AirFrance British Delta Jet Singapore Virgin
## 74 175 46 61 40 62
mytable<-with(airline,table(Aircraft))
mytable
## Aircraft
## AirBus Boeing
## 151 307
mytable<-with(airline,table(TravelMonth))
mytable
## TravelMonth
## Aug Jul Oct Sep
## 127 75 127 129
mytable<-with(airline,table(IsInternational))
mytable
## IsInternational
## Domestic International
## 40 418
library(car)
## Warning: package 'car' was built under R version 3.3.3
par(mfrow=c(1,2))
hist(airline$FlightDuration,col="lightblue",xlim=c(0,15),ylim = c(0,60))
boxplot(airline$FlightDuration,main="Boxplot of flight duration",ylab="Fight Duration")
## comparision of price economy and price primium
library(car)
par(mfrow=c(1,2))
boxplot(airline$PriceEconomy, main="Boxplot of PriceEconomy")
boxplot(airline$PricePremium, main="Boxplot of PricePremium")
Price primium is more than price economy.
by(airline$PriceEconomy,airline$IsInternational,mean)
## airline$IsInternational: Domestic
## [1] 356.625
## --------------------------------------------------------
## airline$IsInternational: International
## [1] 1419.943
by(airline$PricePremium,airline$IsInternational,mean)
## airline$IsInternational: Domestic
## [1] 385.9
## --------------------------------------------------------
## airline$IsInternational: International
## [1] 1984.909
par(mfrow=c(1,2))
boxplot(PriceEconomy~IsInternational,data=airline, ylab="PriceEconomy",col = c("purple","navy"))
boxplot(PricePremium~IsInternational,data=airline, ylab="PricePremium",col = c("purple","navy"))
by(airline$PricePremium,airline$Aircraft,mean)
## airline$Aircraft: AirBus
## [1] 1869.503
## --------------------------------------------------------
## airline$Aircraft: Boeing
## [1] 1833.332
by(airline$PricePremium,airline$Aircraft,mean)
## airline$Aircraft: AirBus
## [1] 1869.503
## --------------------------------------------------------
## airline$Aircraft: Boeing
## [1] 1833.332
par(mfrow=c(1,2))
boxplot(PriceEconomy~Aircraft,data=airline, ylab="PriceEconomy",col = c("purple","navy"))
boxplot(PricePremium~Aircraft,data=airline, ylab="PricePremium",col = c("purple","navy"))
par(mfrow=c(2,1))
boxplot(airline$PriceEconomy~Airline,data=airline, ylab="PriceEconomy",col = c("purple","navy"))
boxplot(airline$PricePremium~Airline,data=airline, ylab="PricePremium",col = c("purple","navy"))
par(mfrow=c(1,2))
boxplot(airline$PriceEconomy~TravelMonth,data=airline, ylab="PriceEconomy",col = c("purple","navy","red","darkgreen"))
boxplot(airline$PricePremium~TravelMonth,data=airline, ylab="PricePremium",col = c("purple","navy","red","darkgreen"))
par(mfrow=c(2,1))
boxplot(PriceRelative~WidthDifference,data=airline, ylab="PriceRelative",xlab="WidthDifference",col = c("purple","navy","red","darkgreen"))
boxplot(PriceRelative~PitchDifference,data=airline, ylab="PriceRelative",xlab="PitchDifference",col = c("purple","navy","red","darkgreen"))
scatterplot(airline$WidthDifference,airline$PriceRelative, main="PriceRelative vs WidthDifference")
scatterplot(airline$PitchDifference,airline$PriceRelative, main="PriceRelative vs PitchDifference")
scatterplot(airline$FlightDuration,airline$PricePremium,main="PricePremium vs FlightDuration")
scatterplot(airline$SeatsEconomy,airline$PriceEconomy,main="PriceEconomy vs SeatsEconomy")
scatterplot(airline$SeatsPremium,airline$PricePremium,main="PricePremium vs SeatsPremium")
scatterplot(airline$PercentPremiumSeats,airline$PricePremium,main="PricePremium vs PercentPremiumSeats")
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.3.3
corrgram(airline, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of Store Variables")
cor.test(airline$PriceEconomy,airline$FlightDuration)
##
## Pearson's product-moment correlation
##
## data: airline$PriceEconomy and airline$FlightDuration
## t = 14.685, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5010266 0.6257772
## sample estimates:
## cor
## 0.5666404
cor.test(airline$PricePremium,airline$FlightDuration)
##
## Pearson's product-moment correlation
##
## data: airline$PricePremium and airline$FlightDuration
## t = 18.204, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5923218 0.6988270
## sample estimates:
## cor
## 0.6487398
cor.test(airline$PriceRelative,airline$FlightDuration)
##
## Pearson's product-moment correlation
##
## data: airline$PriceRelative and airline$FlightDuration
## t = 2.6046, df = 456, p-value = 0.009498
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02977856 0.21036806
## sample estimates:
## cor
## 0.121075
Inference: There is a high positive correlation with FlightDuration and price of economy and premium.Also the relative price increases slightly with increase in flight duration.
cor.test(airline$PriceRelative,airline$WidthDifference)
##
## Pearson's product-moment correlation
##
## data: airline$PriceRelative and airline$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4125388 0.5528218
## sample estimates:
## cor
## 0.4858024
The WidthDifference is a statistically significant variable which is positively correlated with PriceRelative. As WidthDifference increases, PriceRelative increases
cor.test(airline$PriceRelative,airline$PitchDifference)
##
## Pearson's product-moment correlation
##
## data: airline$PriceRelative and airline$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3940262 0.5372817
## sample estimates:
## cor
## 0.4687302
Similar to WidthDifference, PitchDifference is also positively correlated with PriceRelative with nearly same correlation coefficients
cor.test(airline$PriceRelative,airline$SeatsTotal)
##
## Pearson's product-moment correlation
##
## data: airline$PriceRelative and airline$SeatsTotal
## t = -0.24706, df = 456, p-value = 0.805
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.10308648 0.08014282
## sample estimates:
## cor
## -0.01156894
Since p-value is greater than 0.05, therefore SeatsTotal is statistically insignificant for PriceRelative determination
cor.test(airline$PriceRelative,airline$PercentPremiumSeats)
##
## Pearson's product-moment correlation
##
## data: airline$PriceRelative and airline$PercentPremiumSeats
## t = -3.496, df = 456, p-value = 0.0005185
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.24949885 -0.07098966
## sample estimates:
## cor
## -0.1615656
As PercentPremiumSeats increases, the PriceRelative decreases as they have a small negative correlation coefficient
cor.test(airline$PriceRelative,airline$PriceEconomy)
##
## Pearson's product-moment correlation
##
## data: airline$PriceRelative and airline$PriceEconomy
## t = -6.4359, df = 456, p-value = 3.112e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3704004 -0.2022889
## sample estimates:
## cor
## -0.2885671
cor.test(airline$PriceRelative,airline$PricePremium)
##
## Pearson's product-moment correlation
##
## data: airline$PriceRelative and airline$PricePremium
## t = 0.6804, df = 456, p-value = 0.4966
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05995522 0.12311410
## sample estimates:
## cor
## 0.03184654
From the above two tests, it is clear that PriceEconomy is statistically significant in determining PriceRelative whereas PricePremium is not. Moreover, PriceEconomy is negatively correlated with PriceRelative.
dependance of pricepremium on various factors
model=lm(PricePremium ~Airline+TravelMonth+FlightDuration+PitchDifference+WidthDifference+PercentPremiumSeats+PriceRelative,airline)
summary(model)
##
## Call:
## lm(formula = PricePremium ~ Airline + TravelMonth + FlightDuration +
## PitchDifference + WidthDifference + PercentPremiumSeats +
## PriceRelative, data = airline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2216.1 -408.6 102.1 392.2 4277.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 624.427 534.907 1.167 0.2437
## AirlineBritish -972.777 218.739 -4.447 1.10e-05 ***
## AirlineDelta -1133.805 265.671 -4.268 2.42e-05 ***
## AirlineJet -2497.060 241.586 -10.336 < 2e-16 ***
## AirlineSingapore -2077.175 166.680 -12.462 < 2e-16 ***
## AirlineVirgin -1026.230 210.448 -4.876 1.51e-06 ***
## TravelMonthJul 78.482 111.257 0.705 0.4809
## TravelMonthOct -39.008 94.984 -0.411 0.6815
## TravelMonthSep -4.181 94.494 -0.044 0.9647
## FlightDuration 187.775 12.780 14.693 < 2e-16 ***
## PitchDifference 25.189 116.907 0.215 0.8295
## WidthDifference 259.734 157.784 1.646 0.1004
## PercentPremiumSeats 15.601 9.352 1.668 0.0960 .
## PriceRelative 234.797 100.289 2.341 0.0197 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 755.4 on 444 degrees of freedom
## Multiple R-squared: 0.6658, Adjusted R-squared: 0.6561
## F-statistic: 68.05 on 13 and 444 DF, p-value: < 2.2e-16
Since this model has a very low p-value, therefore it is statistically significant.
model=lm(PriceRelative ~Airline+TravelMonth+FlightDuration+PitchDifference+WidthDifference+PercentPremiumSeats+PriceEconomy,airline)
summary(model)
##
## Call:
## lm(formula = PriceRelative ~ Airline + TravelMonth + FlightDuration +
## PitchDifference + WidthDifference + PercentPremiumSeats +
## PriceEconomy, data = airline)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.87850 -0.19874 -0.02757 0.12174 1.00366
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.301e-01 2.375e-01 0.548 0.5841
## AirlineBritish 1.438e-02 1.034e-01 0.139 0.8894
## AirlineDelta -1.239e-01 1.242e-01 -0.998 0.3190
## AirlineJet 6.487e-02 1.245e-01 0.521 0.6027
## AirlineSingapore -1.894e-01 9.352e-02 -2.025 0.0434 *
## AirlineVirgin 6.366e-02 1.026e-01 0.620 0.5353
## TravelMonthJul 6.990e-03 4.898e-02 0.143 0.8866
## TravelMonthOct 4.254e-02 4.172e-02 1.020 0.3084
## TravelMonthSep -1.189e-02 4.154e-02 -0.286 0.7749
## FlightDuration 6.228e-02 6.488e-03 9.599 < 2e-16 ***
## PitchDifference 1.678e-02 5.140e-02 0.327 0.7442
## WidthDifference 1.253e-01 6.962e-02 1.799 0.0727 .
## PercentPremiumSeats -7.036e-03 4.128e-03 -1.705 0.0890 .
## PriceEconomy -2.501e-04 2.984e-05 -8.384 6.84e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3322 on 444 degrees of freedom
## Multiple R-squared: 0.472, Adjusted R-squared: 0.4566
## F-statistic: 30.54 on 13 and 444 DF, p-value: < 2.2e-16
Since this model has a very low p-value, therefore it is statistically significant.
1.The price of Premium seats is higher than price of Economy seats in a flight
2.The factors that affect the difference in price of Premium and Economy seats in a flight are FlightDuration, WidthDifference, PitchDifference, Airline, PercentPremiumSeats
3.Domestic Flights are cheaper than international flights, but their number is also too low as compared to number of international flights
4.AirFrance is the most expensive flight whereas Jet and Delta are the cheaper ones.
5.In the month of July, the number of flights goes down and so does the price due to low demand. In the other three months, it is similar.