airline.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
attach(airline.df)

## ------------------------------------------------------------------------
library(psych)
describe(airline.df)[,c(2,3,4,5,8,9)]
##                       n    mean      sd  median   min     max
## Airline*            458    3.01    1.65    2.00  1.00    6.00
## Aircraft*           458    1.67    0.47    2.00  1.00    2.00
## FlightDuration      458    7.58    3.54    7.79  1.25   14.66
## TravelMonth*        458    2.56    1.17    3.00  1.00    4.00
## IsInternational*    458    1.91    0.28    2.00  1.00    2.00
## SeatsEconomy        458  202.31   76.37  185.00 78.00  389.00
## SeatsPremium        458   33.65   13.26   36.00  8.00   66.00
## PitchEconomy        458   31.22    0.66   31.00 30.00   33.00
## PitchPremium        458   37.91    1.31   38.00 34.00   40.00
## WidthEconomy        458   17.84    0.56   18.00 17.00   19.00
## WidthPremium        458   19.47    1.10   19.00 17.00   21.00
## PriceEconomy        458 1327.08  988.27 1242.00 65.00 3593.00
## PricePremium        458 1845.26 1288.14 1737.00 86.00 7414.00
## PriceRelative       458    0.49    0.45    0.36  0.02    1.89
## SeatsTotal          458  235.96   85.29  227.00 98.00  441.00
## PitchDifference     458    6.69    1.76    7.00  2.00   10.00
## WidthDifference     458    1.63    1.19    1.00  0.00    4.00
## PercentPremiumSeats 458   14.65    4.84   13.21  4.71   24.69
pitchDifferenceTable <- table(airline.df$PitchDifference)
pitchDifferenceTable
## 
##   2   3   6   7  10 
##  24  16 121 243  54
pd = aggregate(cbind(PriceEconomy,PricePremium, PriceRelative) ~ PitchDifference, 
               data = airline.df, mean)
pd
##   PitchDifference PriceEconomy PricePremium PriceRelative
## 1               2     348.0000     377.3333    0.08708333
## 2               3     369.5625     398.7500    0.08125000
## 3               6    2008.6942    2333.7438    0.34082645
## 4               7    1388.1317    2155.4897    0.51888889
## 5              10     243.8519     435.6481    0.97074074
widthDifferenceTable <- table(airline.df$WidthDifference)
widthDifferenceTable
## 
##   0   1   2   3   4 
##  40 264  32  68  54
aggregate(cbind(PriceEconomy,PricePremium, PriceRelative) ~ WidthDifference, 
          data = airline.df, mean)
##   WidthDifference PriceEconomy PricePremium PriceRelative
## 1               0     356.6250     385.9000     0.0847500
## 2               1    1428.4053    1966.0795     0.4184091
## 3               2    2884.7500    3197.4375     0.2296875
## 4               3    1631.7206    2717.7059     0.7282353
## 5               4     243.8519     435.6481     0.9707407
pitchWidthTable <- xtabs(~WidthDifference + PitchDifference, data=airline.df)
ftable(pitchWidthTable)
##                 PitchDifference   2   3   6   7  10
## WidthDifference                                    
## 0                                24  16   0   0   0
## 1                                 0   0  89 175   0
## 2                                 0   0  32   0   0
## 3                                 0   0   0  68   0
## 4                                 0   0   0   0  54
t1 = aggregate(cbind(PriceEconomy,PricePremium, PriceRelative) ~ SeatsTotal, 
               data = airline.df, mean)
t1
##    SeatsTotal PriceEconomy PricePremium PriceRelative
## 1          98     291.7500     306.7500    0.06125000
## 2         138     299.4000     316.4000    0.06200000
## 3         140     184.7037     377.8889    1.15370370
## 4         144     365.7692     402.6154    0.10230769
## 5         156     328.4000     348.6000    0.07200000
## 6         158     458.0000     497.0000    0.09000000
## 7         160     415.5000     459.5000    0.10750000
## 8         162    1511.8627    2154.0000    0.42254902
## 9         166    1040.4444    1459.8333    0.40722222
## 10        168    2194.6857    2448.4286    0.27914286
## 11        170     257.0526     461.1579    0.95263158
## 12        198    3341.0000    3450.6667    0.03000000
## 13        200     479.2500     522.0000    0.09500000
## 14        212    1035.4167    1361.8750    0.34958333
## 15        227    1625.5000    2910.8333    0.60416667
## 16        228    3144.3636    3398.8182    0.08590909
## 17        233    1509.7273    2552.2121    0.82121212
## 18        240     772.5000    1710.0000    1.32000000
## 19        271    1638.2222    2658.1111    0.61111111
## 20        279    1666.2195    2439.9268    0.50414634
## 21        299    1633.8750    2765.3750    0.73500000
## 22        358     631.8654     965.8077    0.32211538
## 23        367      74.0000      97.0000    0.31000000
## 24        369     597.5000    1057.0000    0.80000000
## 25        427    2822.5714    3198.0714    0.20928571
## 26        441    2113.0000    3601.5000    0.74000000
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
colairlines <- c("PricePremium","PriceEconomy","PitchDifference","WidthDifference")
corMatrix <- rcorr(as.matrix(airline.df[,colairlines]))
corMatrix
##                 PricePremium PriceEconomy PitchDifference WidthDifference
## PricePremium            1.00         0.90           -0.02           -0.01
## PriceEconomy            0.90         1.00           -0.10           -0.08
## PitchDifference        -0.02        -0.10            1.00            0.76
## WidthDifference        -0.01        -0.08            0.76            1.00
## 
## n= 458 
## 
## 
## P
##                 PricePremium PriceEconomy PitchDifference WidthDifference
## PricePremium                 0.0000       0.6998          0.8059         
## PriceEconomy    0.0000                    0.0332          0.0708         
## PitchDifference 0.6998       0.0332                       0.0000         
## WidthDifference 0.8059       0.0708       0.0000
colairlines2 <- c("PricePremium","PriceEconomy","SeatsTotal","PercentPremiumSeats")
corMatrix2 <- rcorr(as.matrix(airline.df[,colairlines2]))
corMatrix2
##                     PricePremium PriceEconomy SeatsTotal
## PricePremium                1.00         0.90       0.19
## PriceEconomy                0.90         1.00       0.13
## SeatsTotal                  0.19         0.13       1.00
## PercentPremiumSeats         0.12         0.07      -0.22
##                     PercentPremiumSeats
## PricePremium                       0.12
## PriceEconomy                       0.07
## SeatsTotal                        -0.22
## PercentPremiumSeats                1.00
## 
## n= 458 
## 
## 
## P
##                     PricePremium PriceEconomy SeatsTotal
## PricePremium                     0.0000       0.0000    
## PriceEconomy        0.0000                    0.0045    
## SeatsTotal          0.0000       0.0045                 
## PercentPremiumSeats 0.0127       0.1628       0.0000    
##                     PercentPremiumSeats
## PricePremium        0.0127             
## PriceEconomy        0.1628             
## SeatsTotal          0.0000             
## PercentPremiumSeats
t.test(PitchPremium, PitchEconomy)
## 
##  Welch Two Sample t-test
## 
## data:  PitchPremium and PitchEconomy
## t = 97.482, df = 671.02, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  6.553067 6.822479
## sample estimates:
## mean of x mean of y 
##  37.90611  31.21834
t.test(PricePremium,PriceEconomy)
## 
##  Welch Two Sample t-test
## 
## data:  PricePremium and PriceEconomy
## t = 6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  369.2793 667.0831
## sample estimates:
## mean of x mean of y 
##  1845.258  1327.076
t.test(WidthPremium, WidthEconomy)
## 
##  Welch Two Sample t-test
## 
## data:  WidthPremium and WidthEconomy
## t = 28.4, df = 678.24, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.520276 1.746100
## sample estimates:
## mean of x mean of y 
##  19.47162  17.83843
##Formulating multivariate linear regression model to fit price with respect to width and pitch
##MODEL 1
##In this model we try regressing Price Premium on ALL the remaining columns.
Model1 <- PricePremium ~ PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats + SeatsTotal + IsInternational + TravelMonth + FlightDuration + Aircraft
fit1 <- lm(Model1, data = airline.df)
summary(fit1)
## 
## Call:
## lm(formula = Model1, data = airline.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -977.2 -246.3  -47.9  135.2 3419.7 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -1.211e+03  1.755e+02  -6.898 1.82e-11 ***
## PriceEconomy                  1.064e+00  3.114e-02  34.175  < 2e-16 ***
## PitchDifference               8.510e+01  3.913e+01   2.175 0.030163 *  
## WidthDifference               1.240e+02  3.438e+01   3.607 0.000345 ***
## PercentPremiumSeats           3.177e+01  5.250e+00   6.052 3.04e-09 ***
## SeatsTotal                    1.925e+00  3.360e-01   5.729 1.87e-08 ***
## IsInternationalInternational -7.537e+02  2.135e+02  -3.530 0.000458 ***
## TravelMonthJul               -3.441e+01  7.074e+01  -0.486 0.626904    
## TravelMonthOct                2.692e+01  6.036e+01   0.446 0.655795    
## TravelMonthSep               -2.097e+00  6.015e+01  -0.035 0.972203    
## FlightDuration                8.455e+01  8.809e+00   9.598  < 2e-16 ***
## AircraftBoeing               -2.082e+00  5.651e+01  -0.037 0.970625    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 480.7 on 446 degrees of freedom
## Multiple R-squared:  0.8641, Adjusted R-squared:  0.8607 
## F-statistic: 257.7 on 11 and 446 DF,  p-value: < 2.2e-16
##MODEL 2 -- Revise MODEL 1, by excluding TravelMonth and Aircraft from the list of regressors
Model2 <- PricePremium ~ PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats + SeatsTotal + FlightDuration + IsInternational
fit2 <- lm(Model2, data = airline.df)
summary(fit2)
## 
## Call:
## lm(formula = Model2, data = airline.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1010.0  -258.4   -49.9   133.6  3416.7 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -1.213e+03  1.695e+02  -7.156 3.40e-12 ***
## PriceEconomy                  1.063e+00  3.077e-02  34.537  < 2e-16 ***
## PitchDifference               8.421e+01  3.656e+01   2.303 0.021722 *  
## WidthDifference               1.224e+02  3.373e+01   3.629 0.000318 ***
## PercentPremiumSeats           3.190e+01  5.220e+00   6.112 2.14e-09 ***
## SeatsTotal                    1.920e+00  3.241e-01   5.922 6.31e-09 ***
## FlightDuration                8.459e+01  8.507e+00   9.943  < 2e-16 ***
## IsInternationalInternational -7.412e+02  2.001e+02  -3.704 0.000238 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 479 on 450 degrees of freedom
## Multiple R-squared:  0.8638, Adjusted R-squared:  0.8617 
## F-statistic: 407.9 on 7 and 450 DF,  p-value: < 2.2e-16
# the Adjusted R Squared for Model 2 is less than Model 1
summary(fit1)$adj.r.squared
## [1] 0.8607235
summary(fit2)$adj.r.squared
## [1] 0.861724
boxplot(airline.df$PriceRelative, airline.df$PitchDifference)

boxplot(airline.df$PriceRelative, airline.df$WidthDifference)

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(formula = ~ PriceRelative + PitchDifference + WidthDifference , data= airline.df, cex=0.6, diagonal="histogram")

library(corrgram)
corrgram(airline.df,
         main="Premium Class Analysis Various Factors",
         lower.panel=panel.shade, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt)

fit <- lm(PriceRelative ~ WidthDifference + PitchDifference, data = airline.df)
summary(fit)
## 
## Call:
## lm(formula = PriceRelative ~ WidthDifference + PitchDifference, 
##     data = airline.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84163 -0.28484 -0.07241  0.17698  1.18778 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.10514    0.08304  -1.266 0.206077    
## WidthDifference  0.11621    0.02356   4.933 1.14e-06 ***
## PitchDifference  0.06019    0.01590   3.785 0.000174 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared:  0.2593, Adjusted R-squared:  0.2561 
## F-statistic: 79.65 on 2 and 455 DF,  p-value: < 2.2e-16
#Since the F statistics has very low p value so we reject the Null Hypothesis. Thus our model is a good fit. Further both the parameters for WidthDifference and PitchDifference are both statistically signifcant.Thus both these variables do have a effect in the difference in prices of Premium and relative class. But the R square is quite low which is quite worrisome. Thus we need to include more variables.

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Since the F statistics has very low p value so we reject the Null Hypothesis. Thus our model is a good fit. Further both the parameters for WidthDifference and PitchDifference are both statistically signifcant.Thus both these variables do have a effect in the difference in prices of Premium and relative class. But the R square is quite low which is quite worrisome. Thus we need to include more variables.