mydata <- read.csv(paste("SixAirlinesDataV2.csv"))
head(mydata)
## Airline Aircraft FlightDuration TravelMonth IsInternational SeatsEconomy
## 1 British Boeing 12.25 Jul International 122
## 2 British Boeing 12.25 Aug International 122
## 3 British Boeing 12.25 Sep International 122
## 4 British Boeing 12.25 Oct International 122
## 5 British Boeing 8.16 Aug International 122
## 6 British Boeing 8.16 Sep International 122
## SeatsPremium PitchEconomy PitchPremium WidthEconomy WidthPremium
## 1 40 31 38 18 19
## 2 40 31 38 18 19
## 3 40 31 38 18 19
## 4 40 31 38 18 19
## 5 40 31 38 18 19
## 6 40 31 38 18 19
## PriceEconomy PricePremium PriceRelative SeatsTotal PitchDifference
## 1 2707 3725 0.38 162 7
## 2 2707 3725 0.38 162 7
## 3 2707 3725 0.38 162 7
## 4 2707 3725 0.38 162 7
## 5 1793 2999 0.67 162 7
## 6 1793 2999 0.67 162 7
## WidthDifference PercentPremiumSeats
## 1 1 24.69
## 2 1 24.69
## 3 1 24.69
## 4 1 24.69
## 5 1 24.69
## 6 1 24.69
attach(mydata)
summary(mydata)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
str(mydata)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
library(lattice)
## Warning: package 'lattice' was built under R version 3.3.3
histogram(~mydata$PriceRelative | mydata$Airline+Aircraft,
main="Price",
type="count",
breaks=4,
col=c("red","blue","green","yellow"))
library(lattice)
histogram(~PriceRelative|IsInternational,
type="count",
breaks =5,
col = c("red","blue","green","yellow"),
xlab ="Relative price")
As expected all of the domestic flights lies in the region of 0-0.1 in relative price between economy and premimum economy. But, there is a gradual change in case of Internationl flights; even though most of the flights are between 0.0
bwplot(~PriceRelative | IsInternational, horizontal=TRUE,
xlab = "Relative price",col=c("yellow"))
library(car)
## Warning: package 'car' was built under R version 3.3.3
scatterplotMatrix(formula = ~ PriceRelative + FlightDuration +SeatsPremium , cex=0.6,
diagonal="histogram")
library("corrgram")
## Warning: package 'corrgram' was built under R version 3.3.3
corrgram(mydata, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Correlation of relative price!!")
t.test(PriceRelative,FlightDuration)
##
## Welch Two Sample t-test
##
## data: PriceRelative and FlightDuration
## t = -42.499, df = 471.79, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.418482 -6.762785
## sample estimates:
## mean of x mean of y
## 0.4872052 7.5778384
t.test(PriceRelative~IsInternational)
##
## Welch Two Sample t-test
##
## data: PriceRelative by IsInternational
## t = -19.451, df = 446.12, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4855215 -0.3964139
## sample estimates:
## mean in group Domestic mean in group International
## 0.0847500 0.5257177
t.test(PriceRelative,SeatsPremium)
##
## Welch Two Sample t-test
##
## data: PriceRelative and SeatsPremium
## t = -53.484, df = 458.06, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -34.37971 -31.94282
## sample estimates:
## mean of x mean of y
## 0.4872052 33.6484716
t.test(PriceRelative,SeatsEconomy)
##
## Welch Two Sample t-test
##
## data: PriceRelative and SeatsEconomy
## t = -56.553, df = 457.03, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -208.8382 -194.8118
## sample estimates:
## mean of x mean of y
## 0.4872052 202.3122271
model <- lm(PriceRelative~FlightDuration+IsInternational+SeatsPremium+SeatsEconomy+Airline+Aircraft+TravelMonth)
summary(model)
##
## Call:
## lm(formula = PriceRelative ~ FlightDuration + IsInternational +
## SeatsPremium + SeatsEconomy + Airline + Aircraft + TravelMonth)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85489 -0.21057 -0.04338 0.11264 1.46013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3037490 0.1809687 -1.678 0.09396 .
## FlightDuration 0.0259562 0.0063694 4.075 5.45e-05 ***
## IsInternationalInternational 0.2335790 0.1708493 1.367 0.17227
## SeatsPremium -0.0084740 0.0028235 -3.001 0.00284 **
## SeatsEconomy 0.0010863 0.0003282 3.310 0.00101 **
## AirlineBritish 0.3857159 0.0688152 5.605 3.66e-08 ***
## AirlineDelta 0.2883726 0.1557100 1.852 0.06469 .
## AirlineJet 0.8296645 0.0773443 10.727 < 2e-16 ***
## AirlineSingapore 0.2893955 0.0722322 4.006 7.23e-05 ***
## AirlineVirgin 0.6694843 0.0747211 8.960 < 2e-16 ***
## AircraftBoeing 0.0526236 0.0418191 1.258 0.20892
## TravelMonthJul -0.0123777 0.0528732 -0.234 0.81501
## TravelMonthOct 0.0511200 0.0449784 1.137 0.25634
## TravelMonthSep -0.0131970 0.0448421 -0.294 0.76867
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3583 on 444 degrees of freedom
## Multiple R-squared: 0.3857, Adjusted R-squared: 0.3677
## F-statistic: 21.44 on 13 and 444 DF, p-value: < 2.2e-16
From the regression model we can see that IsInternation, Seats in premium and economy and travel month have no effect on relative price of premimum economy. Where as FlightDuration, AirLine significantly effect the relative price of premium with respect to economy.