airl <- read.csv("SixAirlinesDataV2.csv")
summary(airl)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
par(mfrow =c(2,2))
boxplot(airl$PriceEconomy ~ airl$Airline, horizontal = TRUE, las=1,
xlab = "Price of Economy Seat", ylab= "Airlines",
col = c("Red", "Blue"))
boxplot(airl$PricePremium ~ airl$Airline, horizontal = TRUE, las=1,
xlab = "Price of Premium Seat", ylab= "Airlines",
col = c("Red", "Blue"))
par(mfrow =c(2,2))
boxplot(airl$PriceEconomy ~ airl$Aircraft, horizontal = TRUE, las=1,
xlab = "Price of Economy Seat", ylab= "Aircraft",
col = c("Red", "Blue"))
boxplot(airl$PricePremium ~ airl$Aircraft, horizontal = TRUE, las=1,
xlab = "Price of Premium Seat", ylab= "Aircraft",
col = c("Red", "Blue"))
par(mfrow =c(2,2))
boxplot(airl$PriceEconomy ~ airl$PitchEconomy, horizontal = TRUE, las=1,
xlab = "Price of Economy Seat", ylab= "Pitch",
col = c("Red", "Blue"))
boxplot(airl$PricePremium ~ airl$PitchPremium, horizontal = TRUE, las=1,
xlab = "Price of Premium Seat", ylab= "Pitch",
col = c("Red", "Blue"))
par(mfrow =c(2,2))
boxplot(airl$PriceEconomy ~ airl$WidthEconomy, horizontal = TRUE, las=1,
xlab = "Price of Economy Seat", ylab= "Width",
col = c("Red", "Blue"))
boxplot(airl$PricePremium ~ airl$WidthPremium, horizontal = TRUE, las=1,
xlab = "Price of Premium Seat", ylab= "Width",
col = c("Red", "Blue"))
library("car")
## Warning: package 'car' was built under R version 3.4.3
scatterplot(airl$PriceRelative,airl$WidthDifference, boxplots ="Price vs. Width diff",)
scatterplot(airl$PriceRelative,airl$PitchDifference, boxplots ="Price vs. Pitch diff",)
par(mfrow =c(1,1))
scatterplotMatrix(~PriceRelative+WidthDifference+PitchDifference, data = airl,
main ="Price, Width and Pitch",)
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(airl, order=TRUE,
main="Corrgram of Airplane data variables",
lower.panel=panel.shade, upper.panel=panel.pie,
diag.panel=panel.minmax, text.panel=panel.txt)
cor.test(airl$PriceRelative,airl$PitchDifference)
##
## Pearson's product-moment correlation
##
## data: airl$PriceRelative and airl$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3940262 0.5372817
## sample estimates:
## cor
## 0.4687302
The test concludes that as p value is less than 0.05, there is good association between Relative Price and Pitch Difference
cor.test(airl$PriceRelative,airl$WidthDifference)
##
## Pearson's product-moment correlation
##
## data: airl$PriceRelative and airl$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4125388 0.5528218
## sample estimates:
## cor
## 0.4858024
The test concludes that as p value is less than 0.05, there is good association between Relative Price and Width Difference
cor.test(airl$PriceRelative, airl$PercentPremiumSeats)
##
## Pearson's product-moment correlation
##
## data: airl$PriceRelative and airl$PercentPremiumSeats
## t = -3.496, df = 456, p-value = 0.0005185
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.24949885 -0.07098966
## sample estimates:
## cor
## -0.1615656
The test concludes that as p value is less than 0.05, there is good association between Relative Price and Percentage of Premium seats
cor.test(airl$PriceRelative, airl$SeatsEconomy)
##
## Pearson's product-moment correlation
##
## data: airl$PriceRelative and airl$SeatsEconomy
## t = 0.084498, df = 456, p-value = 0.9327
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08770167 0.09554911
## sample estimates:
## cor
## 0.003956939
The test concludes that as p value is greater than 0.05, there is not good association between Relative Price and No of economy seats
airpl <- lm(PriceRelative ~ PitchDifference + WidthDifference + PercentPremiumSeats, data = airl)
summary(airpl)
##
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference +
## PercentPremiumSeats, data = airl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.88643 -0.29471 -0.05005 0.19013 1.17157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.031508 0.097220 -0.324 0.746
## PitchDifference 0.064596 0.016171 3.994 7.56e-05 ***
## WidthDifference 0.104782 0.024813 4.223 2.92e-05 ***
## PercentPremiumSeats -0.005764 0.003971 -1.451 0.147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3882 on 454 degrees of freedom
## Multiple R-squared: 0.2627, Adjusted R-squared: 0.2579
## F-statistic: 53.93 on 3 and 454 DF, p-value: < 2.2e-16
From the above model we can see that Pitch difference and Width difference are the best predictors and associated the most with Price Relative.
So we will construct another model with Pitch difference and Width difference.
airpl <- lm(PriceRelative ~ PitchDifference + WidthDifference, data = airl)
summary(airpl)
##
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference,
## data = airl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84163 -0.28484 -0.07241 0.17698 1.18778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10514 0.08304 -1.266 0.206077
## PitchDifference 0.06019 0.01590 3.785 0.000174 ***
## WidthDifference 0.11621 0.02356 4.933 1.14e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared: 0.2593, Adjusted R-squared: 0.2561
## F-statistic: 79.65 on 2 and 455 DF, p-value: < 2.2e-16
So from the above analysis we can conclude that we can reject the null hypothesis that Price relative is not associated to Pitch difference and Width Difference.