mydata <- read.csv(paste("SixAirlinesData.csv"))
head(mydata)
## Airline Aircraft FlightDuration MONTH International SEATS_ECONOMY
## 1 AirFrance 0 6.91 2 1 216
## 2 AirFrance 0 6.91 3 1 216
## 3 AirFrance 1 9.50 3 1 147
## 4 AirFrance 0 6.91 1 1 216
## 5 AirFrance 1 9.50 2 1 147
## 6 AirFrance 1 13.00 2 1 389
## SEATS_PREMIUM PITCH_ECONOMY PITCH_PREMIUM WIDTH_ECONOMY WIDTH_PREMIUM
## 1 24 32 38 17 19
## 2 24 32 38 17 19
## 3 21 32 38 18 19
## 4 24 32 38 17 19
## 5 21 32 38 18 19
## 6 38 32 38 18 19
## PRICE_ECONOMY PRICE_PREMIUM PriceRelative N LAMBDA QUALITY
## 1 648 1710 1.64 240 0.10 6
## 2 648 1710 1.64 240 0.10 6
## 3 630 1611 1.56 168 0.13 6
## 4 700 1710 1.44 240 0.10 6
## 5 743 1611 1.17 168 0.13 6
## 6 1522 3289 1.16 427 0.09 6
attach(mydata)
summary(mydata)
## Airline Aircraft FlightDuration MONTH
## AirFrance: 74 Min. :0.0000 Min. : 1.250 Min. :0.000
## British :175 1st Qu.:0.0000 1st Qu.: 4.250 1st Qu.:1.000
## Delta : 46 Median :0.0000 Median : 7.750 Median :2.000
## Jet : 65 Mean :0.3268 Mean : 7.549 Mean :1.671
## Singapore: 40 3rd Qu.:1.0000 3rd Qu.:10.500 3rd Qu.:3.000
## Virgin : 62 Max. :1.0000 Max. :14.660 Max. :3.000
## International SEATS_ECONOMY SEATS_PREMIUM PITCH_ECONOMY
## Min. :0.0000 Min. : 17.0 Min. : 8.00 Min. :30.00
## 1st Qu.:1.0000 1st Qu.:127.0 1st Qu.:21.00 1st Qu.:31.00
## Median :1.0000 Median :185.0 Median :36.00 Median :31.00
## Mean :0.9134 Mean :200.7 Mean :33.54 Mean :31.21
## 3rd Qu.:1.0000 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :1.0000 Max. :389.0 Max. :66.00 Max. :33.00
## PITCH_PREMIUM WIDTH_ECONOMY WIDTH_PREMIUM PRICE_ECONOMY
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65.0
## 1st Qu.:38.00 1st Qu.:17.00 1st Qu.:19.00 1st Qu.: 404.8
## Median :38.00 Median :18.00 Median :19.00 Median :1224.0
## Mean :37.92 Mean :17.83 Mean :19.48 Mean :1317.1
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1903.0
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593.0
## PRICE_PREMIUM PriceRelative N LAMBDA
## Min. : 86 Min. :0.0200 Min. : 38.0 Min. :0.0500
## 1st Qu.: 524 1st Qu.:0.1000 1st Qu.:162.0 1st Qu.:0.1200
## Median :1710 Median :0.3800 Median :227.0 Median :0.1300
## Mean :1832 Mean :0.4926 Mean :234.2 Mean :0.1503
## 3rd Qu.:2989 3rd Qu.:0.7475 3rd Qu.:279.0 3rd Qu.:0.1500
## Max. :7414 Max. :1.8900 Max. :441.0 Max. :0.5500
## QUALITY
## Min. : 2.000
## 1st Qu.: 6.000
## Median : 7.000
## Mean : 6.716
## 3rd Qu.: 7.000
## Max. :10.000
str(mydata)
## 'data.frame': 462 obs. of 17 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Aircraft : int 0 0 1 0 1 1 1 1 0 0 ...
## $ FlightDuration: num 6.91 6.91 9.5 6.91 9.5 13 13 9.5 6.91 8.33 ...
## $ MONTH : int 2 3 3 1 2 2 3 1 0 1 ...
## $ International : int 1 1 1 1 1 1 1 1 1 1 ...
## $ SEATS_ECONOMY : int 216 216 147 216 147 389 389 147 216 200 ...
## $ SEATS_PREMIUM : int 24 24 21 24 21 38 38 21 24 28 ...
## $ PITCH_ECONOMY : int 32 32 32 32 32 32 32 32 32 32 ...
## $ PITCH_PREMIUM : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WIDTH_ECONOMY : int 17 17 18 17 18 18 18 18 17 17 ...
## $ WIDTH_PREMIUM : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PRICE_ECONOMY : int 648 648 630 700 743 1522 1522 990 1094 2918 ...
## $ PRICE_PREMIUM : int 1710 1710 1611 1710 1611 3289 3289 1611 1710 3972 ...
## $ PriceRelative : num 1.64 1.64 1.56 1.44 1.17 1.16 1.16 0.63 0.56 0.36 ...
## $ N : int 240 240 168 240 168 427 427 168 240 228 ...
## $ LAMBDA : num 0.1 0.1 0.13 0.1 0.13 0.09 0.09 0.13 0.1 0.12 ...
## $ QUALITY : int 6 6 6 6 6 6 6 6 6 6 ...
Plot of price with airlines
library(lattice)
histogram(~mydata$PriceRelative|mydata$Airline+Aircraft,
main="Price",
type="count",
breaks=4,
col=c("red","blue","green","yellow"))
library(lattice)
histogram(~PriceRelative|International,
type="count",
breaks =5,
col = c("red","blue","green","yellow"),
xlab ="Relative price")
bwplot(~PriceRelative | International, horizontal=TRUE,
xlab = "Relative price",col=c("yellow"))
library(car)
scatterplotMatrix(formula = ~ PriceRelative + FlightDuration +SEATS_PREMIUM , cex=0.6,
diagonal="histogram")
Corrplot
library("corrgram")
corrgram(mydata, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Correlation of relative price!!")
t-tests
t.test(PriceRelative,FlightDuration)
##
## Welch Two Sample t-test
##
## data: PriceRelative and FlightDuration
## t = -42.497, df = 476.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.382659 -6.730112
## sample estimates:
## mean of x mean of y
## 0.4926407 7.5490260
t.test(PriceRelative~International)
##
## Welch Two Sample t-test
##
## data: PriceRelative by International
## t = -19.696, df = 450.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4911097 -0.4019970
## sample estimates:
## mean in group 0 mean in group 1
## 0.0847500 0.5313033
t.test(PriceRelative,SEATS_PREMIUM)
##
## Welch Two Sample t-test
##
## data: PriceRelative and SEATS_PREMIUM
## t = -53.553, df = 462.08, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -34.25894 -31.83370
## sample estimates:
## mean of x mean of y
## 0.4926407 33.5389610
t.test(PriceRelative,SEATS_ECONOMY)
##
## Welch Two Sample t-test
##
## data: PriceRelative and SEATS_ECONOMY
## t = -55.2, df = 461.03, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -207.3428 -193.0875
## sample estimates:
## mean of x mean of y
## 0.4926407 200.7077922
Regression
model <- lm(PriceRelative~FlightDuration+International+SEATS_PREMIUM+SEATS_ECONOMY+Airline+Aircraft+MONTH)
summary(model)
##
## Call:
## lm(formula = PriceRelative ~ FlightDuration + International +
## SEATS_PREMIUM + SEATS_ECONOMY + Airline + Aircraft + MONTH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8501 -0.2031 -0.0444 0.1136 1.4355
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2689491 0.1690644 -1.591 0.11235
## FlightDuration 0.0263041 0.0063674 4.131 4.31e-05 ***
## International 0.2243637 0.1706504 1.315 0.18926
## SEATS_PREMIUM -0.0073778 0.0027652 -2.668 0.00790 **
## SEATS_ECONOMY 0.0009377 0.0003176 2.952 0.00332 **
## AirlineBritish 0.3673914 0.0682004 5.387 1.16e-07 ***
## AirlineDelta 0.2774200 0.1555867 1.783 0.07525 .
## AirlineJet 0.8539656 0.0763329 11.187 < 2e-16 ***
## AirlineSingapore 0.2890546 0.0722569 4.000 7.39e-05 ***
## AirlineVirgin 0.6562015 0.0743168 8.830 < 2e-16 ***
## Aircraft -0.0539372 0.0418085 -1.290 0.19768
## MONTH 0.0208982 0.0160257 1.304 0.19289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3584 on 450 degrees of freedom
## Multiple R-squared: 0.3891, Adjusted R-squared: 0.3741
## F-statistic: 26.05 on 11 and 450 DF, p-value: < 2.2e-16
So, from the regression model we can see the FlightDuration, AirLine significantly effect the relative price of premium with respect to economy. But, IsInternation, Seats in premium and economy and travel month have no effect on relative price of premimum economy.