airlines <- read.csv(paste("SixAirlinesDataV2.csv",sep=""))
View(airlines)
we will summarize the data
library(psych)
describe(airlines)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
str(airlines)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
boxplot(FlightDuration~Aircraft,data=airlines,xlab="Aircraft type", ylab="Flight duration",col = c("peachpuff","gray"))
boxplot(FlightDuration~Airline,data=airlines,xlab="Airline", ylab="Flight duration",col = c("peachpuff","gray","lightyellow","lightgreen","lightblue"))
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggplot(airlines, aes(PricePremium, FlightDuration)) + geom_point(aes(color = Airline)) + scale_x_continuous("price of Premium ticket") + scale_y_continuous("Flight duration")+ labs(title="Scatterplot of ticket prices")
library(ggplot2)
ggplot(airlines, aes(PricePremium, FlightDuration)) + geom_point(aes(color = Airline)) + scale_x_continuous("price of Premium ticket") + scale_y_continuous("Flight duration")+ labs(title="Scatterplot of ticket prices") + facet_wrap( ~ TravelMonth)
ggplot(airlines, aes(PriceEconomy, FlightDuration)) + geom_point(aes(color = Airline)) + scale_x_continuous("Price of economy ticket") + scale_y_continuous("Flight duration") + labs(title="Scatterplot of ticket prices")
ggplot(airlines, aes(PriceEconomy, FlightDuration)) + geom_point(aes(color = Airline)) + scale_x_continuous("price of economy ticket") + scale_y_continuous("Flight duration") + labs(title="Scatterplot of ticket prices") + facet_wrap( ~ TravelMonth)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(PriceRelative ~PitchDifference, data=airlines,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of price relative vs pitch difference",
xlab="pitch difference",
ylab="price relative")
scatterplot(PriceRelative ~ WidthDifference, data= airlines,
spread=FALSE, smoother.args=list(lty=2), pch=19,
main="Scatter plot of price relative vs Width difference",
xlab="Width difference",
ylab="Price relative")
library(corrgram)
corrgram(airlines, order=NULL, panel=panel.cor,lower.panel=panel.shade, text.panel=panel.txt,main="Correlogram")
### Regression Models
attach(airlines)
cor.test(PriceRelative,WidthPremium)
##
## Pearson's product-moment correlation
##
## data: PriceRelative and WidthPremium
## t = 12.469, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4326084 0.5695593
## sample estimates:
## cor
## 0.5042476
cor.test(PriceEconomy,PitchEconomy)
##
## Pearson's product-moment correlation
##
## data: PriceEconomy and PitchEconomy
## t = 8.469, df = 456, p-value = 3.428e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2867196 0.4452479
## sample estimates:
## cor
## 0.3686612
t.test(airlines$PriceEconomy, airlines$PricePremium)
##
## Welch Two Sample t-test
##
## data: airlines$PriceEconomy and airlines$PricePremium
## t = -6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -667.0831 -369.2793
## sample estimates:
## mean of x mean of y
## 1327.076 1845.258
data1 <- lm(PriceRelative ~ PitchEconomy + PitchPremium + WidthPremium + PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats, data = airlines)
summary(data1)
##
## Call:
## lm(formula = PriceRelative ~ PitchEconomy + PitchPremium + WidthPremium +
## PriceEconomy + PitchDifference + WidthDifference + PercentPremiumSeats,
## data = airlines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.90093 -0.22133 -0.02915 0.15791 1.16165
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.102e+00 1.752e+00 -0.629 0.529437
## PitchEconomy -6.810e-02 4.511e-02 -1.510 0.131826
## PitchPremium 3.359e-02 2.192e-02 1.533 0.126056
## WidthPremium 1.371e-01 3.827e-02 3.583 0.000377 ***
## PriceEconomy -1.056e-04 2.085e-05 -5.064 5.99e-07 ***
## PitchDifference NA NA NA NA
## WidthDifference 7.238e-03 3.769e-02 0.192 0.847790
## PercentPremiumSeats -6.789e-03 4.267e-03 -1.591 0.112312
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3688 on 451 degrees of freedom
## Multiple R-squared: 0.339, Adjusted R-squared: 0.3302
## F-statistic: 38.56 on 6 and 451 DF, p-value: < 2.2e-16
data2 <- lm(PriceRelative ~ PitchDifference + WidthPremium + PriceEconomy)
coefficients(data2)
## (Intercept) PitchDifference WidthPremium PriceEconomy
## -2.5405744540 0.0432138143 0.1484583578 -0.0001144987
Both the Null hypothesis are rejected as the p value << 0.05 and Relative price mainly depends on the width of premium class seats and pitch difference.