airline.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
attach(airline.df)
## ------------------------------------------------------------------------
library(psych)
describe(airline.df)[,c(2,3,4,5,8,9)]
## n mean sd median min max
## Airline* 458 3.01 1.65 2.00 1.00 6.00
## Aircraft* 458 1.67 0.47 2.00 1.00 2.00
## FlightDuration 458 7.58 3.54 7.79 1.25 14.66
## TravelMonth* 458 2.56 1.17 3.00 1.00 4.00
## IsInternational* 458 1.91 0.28 2.00 1.00 2.00
## SeatsEconomy 458 202.31 76.37 185.00 78.00 389.00
## SeatsPremium 458 33.65 13.26 36.00 8.00 66.00
## PitchEconomy 458 31.22 0.66 31.00 30.00 33.00
## PitchPremium 458 37.91 1.31 38.00 34.00 40.00
## WidthEconomy 458 17.84 0.56 18.00 17.00 19.00
## WidthPremium 458 19.47 1.10 19.00 17.00 21.00
## PriceEconomy 458 1327.08 988.27 1242.00 65.00 3593.00
## PricePremium 458 1845.26 1288.14 1737.00 86.00 7414.00
## PriceRelative 458 0.49 0.45 0.36 0.02 1.89
## SeatsTotal 458 235.96 85.29 227.00 98.00 441.00
## PitchDifference 458 6.69 1.76 7.00 2.00 10.00
## WidthDifference 458 1.63 1.19 1.00 0.00 4.00
## PercentPremiumSeats 458 14.65 4.84 13.21 4.71 24.69
pitchDifferenceTable <- table(airline.df$PitchDifference)
pitchDifferenceTable
##
## 2 3 6 7 10
## 24 16 121 243 54
pd = aggregate(cbind(PriceEconomy,PricePremium, PriceRelative) ~ PitchDifference,
data = airline.df, mean)
pd
## PitchDifference PriceEconomy PricePremium PriceRelative
## 1 2 348.0000 377.3333 0.08708333
## 2 3 369.5625 398.7500 0.08125000
## 3 6 2008.6942 2333.7438 0.34082645
## 4 7 1388.1317 2155.4897 0.51888889
## 5 10 243.8519 435.6481 0.97074074
widthDifferenceTable <- table(airline.df$WidthDifference)
widthDifferenceTable
##
## 0 1 2 3 4
## 40 264 32 68 54
aggregate(cbind(PriceEconomy,PricePremium, PriceRelative) ~ WidthDifference,
data = airline.df, mean)
## WidthDifference PriceEconomy PricePremium PriceRelative
## 1 0 356.6250 385.9000 0.0847500
## 2 1 1428.4053 1966.0795 0.4184091
## 3 2 2884.7500 3197.4375 0.2296875
## 4 3 1631.7206 2717.7059 0.7282353
## 5 4 243.8519 435.6481 0.9707407
pitchWidthTable <- xtabs(~WidthDifference + PitchDifference, data=airline.df)
ftable(pitchWidthTable)
## PitchDifference 2 3 6 7 10
## WidthDifference
## 0 24 16 0 0 0
## 1 0 0 89 175 0
## 2 0 0 32 0 0
## 3 0 0 0 68 0
## 4 0 0 0 0 54
t1 = aggregate(cbind(PriceEconomy,PricePremium, PriceRelative) ~ SeatsTotal,
data = airline.df, mean)
t1
## SeatsTotal PriceEconomy PricePremium PriceRelative
## 1 98 291.7500 306.7500 0.06125000
## 2 138 299.4000 316.4000 0.06200000
## 3 140 184.7037 377.8889 1.15370370
## 4 144 365.7692 402.6154 0.10230769
## 5 156 328.4000 348.6000 0.07200000
## 6 158 458.0000 497.0000 0.09000000
## 7 160 415.5000 459.5000 0.10750000
## 8 162 1511.8627 2154.0000 0.42254902
## 9 166 1040.4444 1459.8333 0.40722222
## 10 168 2194.6857 2448.4286 0.27914286
## 11 170 257.0526 461.1579 0.95263158
## 12 198 3341.0000 3450.6667 0.03000000
## 13 200 479.2500 522.0000 0.09500000
## 14 212 1035.4167 1361.8750 0.34958333
## 15 227 1625.5000 2910.8333 0.60416667
## 16 228 3144.3636 3398.8182 0.08590909
## 17 233 1509.7273 2552.2121 0.82121212
## 18 240 772.5000 1710.0000 1.32000000
## 19 271 1638.2222 2658.1111 0.61111111
## 20 279 1666.2195 2439.9268 0.50414634
## 21 299 1633.8750 2765.3750 0.73500000
## 22 358 631.8654 965.8077 0.32211538
## 23 367 74.0000 97.0000 0.31000000
## 24 369 597.5000 1057.0000 0.80000000
## 25 427 2822.5714 3198.0714 0.20928571
## 26 441 2113.0000 3601.5000 0.74000000
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
colairlines <- c("PricePremium","PriceEconomy","PitchDifference","WidthDifference")
corMatrix <- rcorr(as.matrix(airline.df[,colairlines]))
corMatrix
## PricePremium PriceEconomy PitchDifference WidthDifference
## PricePremium 1.00 0.90 -0.02 -0.01
## PriceEconomy 0.90 1.00 -0.10 -0.08
## PitchDifference -0.02 -0.10 1.00 0.76
## WidthDifference -0.01 -0.08 0.76 1.00
##
## n= 458
##
##
## P
## PricePremium PriceEconomy PitchDifference WidthDifference
## PricePremium 0.0000 0.6998 0.8059
## PriceEconomy 0.0000 0.0332 0.0708
## PitchDifference 0.6998 0.0332 0.0000
## WidthDifference 0.8059 0.0708 0.0000
colairlines2 <- c("PricePremium","PriceEconomy","SeatsTotal","PercentPremiumSeats")
corMatrix2 <- rcorr(as.matrix(airline.df[,colairlines2]))
corMatrix2
## PricePremium PriceEconomy SeatsTotal
## PricePremium 1.00 0.90 0.19
## PriceEconomy 0.90 1.00 0.13
## SeatsTotal 0.19 0.13 1.00
## PercentPremiumSeats 0.12 0.07 -0.22
## PercentPremiumSeats
## PricePremium 0.12
## PriceEconomy 0.07
## SeatsTotal -0.22
## PercentPremiumSeats 1.00
##
## n= 458
##
##
## P
## PricePremium PriceEconomy SeatsTotal
## PricePremium 0.0000 0.0000
## PriceEconomy 0.0000 0.0045
## SeatsTotal 0.0000 0.0045
## PercentPremiumSeats 0.0127 0.1628 0.0000
## PercentPremiumSeats
## PricePremium 0.0127
## PriceEconomy 0.1628
## SeatsTotal 0.0000
## PercentPremiumSeats
t.test(PitchPremium, PitchEconomy)
##
## Welch Two Sample t-test
##
## data: PitchPremium and PitchEconomy
## t = 97.482, df = 671.02, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.553067 6.822479
## sample estimates:
## mean of x mean of y
## 37.90611 31.21834
t.test(PricePremium,PriceEconomy)
##
## Welch Two Sample t-test
##
## data: PricePremium and PriceEconomy
## t = 6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 369.2793 667.0831
## sample estimates:
## mean of x mean of y
## 1845.258 1327.076
t.test(WidthPremium, WidthEconomy)
##
## Welch Two Sample t-test
##
## data: WidthPremium and WidthEconomy
## t = 28.4, df = 678.24, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.520276 1.746100
## sample estimates:
## mean of x mean of y
## 19.47162 17.83843
##Formulating multivariate linear regression model to fit price with respect to width and pitch
##MODEL 1
##In this model we try regressing Price Premium on ALL the remaining columns.
Model1 <- PricePremium ~ PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats + SeatsTotal + IsInternational + TravelMonth + FlightDuration + Aircraft
fit1 <- lm(Model1, data = airline.df)
summary(fit1)
##
## Call:
## lm(formula = Model1, data = airline.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -977.2 -246.3 -47.9 135.2 3419.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.211e+03 1.755e+02 -6.898 1.82e-11 ***
## PriceEconomy 1.064e+00 3.114e-02 34.175 < 2e-16 ***
## PitchDifference 8.510e+01 3.913e+01 2.175 0.030163 *
## WidthDifference 1.240e+02 3.438e+01 3.607 0.000345 ***
## PercentPremiumSeats 3.177e+01 5.250e+00 6.052 3.04e-09 ***
## SeatsTotal 1.925e+00 3.360e-01 5.729 1.87e-08 ***
## IsInternationalInternational -7.537e+02 2.135e+02 -3.530 0.000458 ***
## TravelMonthJul -3.441e+01 7.074e+01 -0.486 0.626904
## TravelMonthOct 2.692e+01 6.036e+01 0.446 0.655795
## TravelMonthSep -2.097e+00 6.015e+01 -0.035 0.972203
## FlightDuration 8.455e+01 8.809e+00 9.598 < 2e-16 ***
## AircraftBoeing -2.082e+00 5.651e+01 -0.037 0.970625
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 480.7 on 446 degrees of freedom
## Multiple R-squared: 0.8641, Adjusted R-squared: 0.8607
## F-statistic: 257.7 on 11 and 446 DF, p-value: < 2.2e-16
##MODEL 2 -- Revise MODEL 1, by excluding TravelMonth and Aircraft from the list of regressors
Model2 <- PricePremium ~ PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats + SeatsTotal + FlightDuration + IsInternational
fit2 <- lm(Model2, data = airline.df)
summary(fit2)
##
## Call:
## lm(formula = Model2, data = airline.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1010.0 -258.4 -49.9 133.6 3416.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.213e+03 1.695e+02 -7.156 3.40e-12 ***
## PriceEconomy 1.063e+00 3.077e-02 34.537 < 2e-16 ***
## PitchDifference 8.421e+01 3.656e+01 2.303 0.021722 *
## WidthDifference 1.224e+02 3.373e+01 3.629 0.000318 ***
## PercentPremiumSeats 3.190e+01 5.220e+00 6.112 2.14e-09 ***
## SeatsTotal 1.920e+00 3.241e-01 5.922 6.31e-09 ***
## FlightDuration 8.459e+01 8.507e+00 9.943 < 2e-16 ***
## IsInternationalInternational -7.412e+02 2.001e+02 -3.704 0.000238 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 479 on 450 degrees of freedom
## Multiple R-squared: 0.8638, Adjusted R-squared: 0.8617
## F-statistic: 407.9 on 7 and 450 DF, p-value: < 2.2e-16
# the Adjusted R Squared for Model 2 is less than Model 1
summary(fit1)$adj.r.squared
## [1] 0.8607235
summary(fit2)$adj.r.squared
## [1] 0.861724
boxplot(airline.df$PriceRelative, airline.df$PitchDifference)
boxplot(airline.df$PriceRelative, airline.df$WidthDifference)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(formula = ~ PriceRelative + PitchDifference + WidthDifference , data= airline.df, cex=0.6, diagonal="histogram")
library(corrgram)
corrgram(airline.df,
main="Premium Class Analysis Various Factors",
lower.panel=panel.shade, upper.panel=panel.pie,
diag.panel=panel.minmax, text.panel=panel.txt)
fit <- lm(PriceRelative ~ WidthDifference + PitchDifference, data = airline.df)
summary(fit)
##
## Call:
## lm(formula = PriceRelative ~ WidthDifference + PitchDifference,
## data = airline.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84163 -0.28484 -0.07241 0.17698 1.18778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10514 0.08304 -1.266 0.206077
## WidthDifference 0.11621 0.02356 4.933 1.14e-06 ***
## PitchDifference 0.06019 0.01590 3.785 0.000174 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared: 0.2593, Adjusted R-squared: 0.2561
## F-statistic: 79.65 on 2 and 455 DF, p-value: < 2.2e-16
#Since the F statistics has very low p value so we reject the Null Hypothesis. Thus our model is a good fit. Further both the parameters for WidthDifference and PitchDifference are both statistically signifcant.Thus both these variables do have a effect in the difference in prices of Premium and relative class. But the R square is quite low which is quite worrisome. Thus we need to include more variables.
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Since the F statistics has very low p value so we reject the Null Hypothesis. Thus our model is a good fit. Further both the parameters for WidthDifference and PitchDifference are both statistically signifcant.Thus both these variables do have a effect in the difference in prices of Premium and relative class. But the R square is quite low which is quite worrisome. Thus we need to include more variables.