Read and summarize tha data

airlines <- read.csv(paste("SixAirlinesDataV2.csv",sep=""))
View(airlines)

we will summarize the data

library(psych)
describe(airlines)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23
str(airlines)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

To visualize the distribution of variable

box plots

boxplot(FlightDuration~Aircraft,data=airlines,xlab="Aircraft type", ylab="Flight duration",col = c("peachpuff","gray"))

boxplot(FlightDuration~Airline,data=airlines,xlab="Airline", ylab="Flight duration",col = c("peachpuff","gray","lightyellow","lightgreen","lightblue"))

scatterplots

library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
ggplot(airlines, aes(PricePremium, FlightDuration)) + geom_point(aes(color = Airline)) +  scale_x_continuous("price of Premium ticket") + scale_y_continuous("Flight duration")+ labs(title="Scatterplot of ticket prices")

library(ggplot2)
ggplot(airlines, aes(PricePremium, FlightDuration)) + geom_point(aes(color = Airline)) +  scale_x_continuous("price of Premium ticket") + scale_y_continuous("Flight duration")+ labs(title="Scatterplot of ticket prices") + facet_wrap( ~ TravelMonth)

ggplot(airlines, aes(PriceEconomy, FlightDuration)) + geom_point(aes(color = Airline)) +  scale_x_continuous("Price of economy ticket") + scale_y_continuous("Flight duration") + labs(title="Scatterplot of ticket prices")

ggplot(airlines, aes(PriceEconomy, FlightDuration)) + geom_point(aes(color = Airline)) +  scale_x_continuous("price of economy ticket") + scale_y_continuous("Flight duration") + labs(title="Scatterplot of ticket prices") + facet_wrap( ~ TravelMonth)

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(PriceRelative ~PitchDifference,     data=airlines,
            spread=FALSE, smoother.args=list(lty=2),
            main="Scatter plot of price relative vs pitch difference",
            xlab="pitch difference",
            ylab="price relative")

scatterplot(PriceRelative ~ WidthDifference, data= airlines,
            spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatter plot of price relative vs Width difference",
            xlab="Width difference",
            ylab="Price relative")

correlogram

library(corrgram)
corrgram(airlines, order=NULL, panel=panel.cor,lower.panel=panel.shade, text.panel=panel.txt,main="Correlogram")

### Regression Models

correlation test

attach(airlines)
cor.test(PriceRelative,WidthPremium)
## 
##  Pearson's product-moment correlation
## 
## data:  PriceRelative and WidthPremium
## t = 12.469, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4326084 0.5695593
## sample estimates:
##       cor 
## 0.5042476
cor.test(PriceEconomy,PitchEconomy)
## 
##  Pearson's product-moment correlation
## 
## data:  PriceEconomy and PitchEconomy
## t = 8.469, df = 456, p-value = 3.428e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2867196 0.4452479
## sample estimates:
##       cor 
## 0.3686612

t-test

t.test(airlines$PriceEconomy, airlines$PricePremium)
## 
##  Welch Two Sample t-test
## 
## data:  airlines$PriceEconomy and airlines$PricePremium
## t = -6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0831 -369.2793
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

linear regression model

data1 <- lm(PriceRelative ~ PitchEconomy + PitchPremium + WidthPremium + PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats, data = airlines)
summary(data1)
## 
## Call:
## lm(formula = PriceRelative ~ PitchEconomy + PitchPremium + WidthPremium + 
##     PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats, 
##     data = airlines)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.90093 -0.22133 -0.02915  0.15791  1.16165 
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.102e+00  1.752e+00  -0.629 0.529437    
## PitchEconomy        -6.810e-02  4.511e-02  -1.510 0.131826    
## PitchPremium         3.359e-02  2.192e-02   1.533 0.126056    
## WidthPremium         1.371e-01  3.827e-02   3.583 0.000377 ***
## PriceEconomy        -1.056e-04  2.085e-05  -5.064 5.99e-07 ***
## PitchDifference             NA         NA      NA       NA    
## WidthDifference      7.238e-03  3.769e-02   0.192 0.847790    
## PercentPremiumSeats -6.789e-03  4.267e-03  -1.591 0.112312    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3688 on 451 degrees of freedom
## Multiple R-squared:  0.339,  Adjusted R-squared:  0.3302 
## F-statistic: 38.56 on 6 and 451 DF,  p-value: < 2.2e-16
data2 <- lm(PriceRelative ~ PitchDifference + WidthPremium + PriceEconomy)
coefficients(data2)
##     (Intercept) PitchDifference    WidthPremium    PriceEconomy 
##   -2.5405744540    0.0432138143    0.1484583578   -0.0001144987

conclusion

Both the Null hypothesis are rejected as the p value << 0.05 and Relative price mainly depends on the width of premium class seats and pitch difference.