datadescp <- describe(AirlinesRaw)
datadescp
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## IsInternationalFlag 6 458 0.91 0.28 1.00 1.00 0.00 0.00
## SeatsEconomy 7 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 8 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 9 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 10 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 11 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 12 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 13 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 14 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 15 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 16 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 17 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 18 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 19 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## IsInternationalFlag 1.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
datasummary <- summary(AirlinesRaw)
datasummary
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational IsInternationalFlag SeatsEconomy SeatsPremium
## Domestic : 40 Min. :0.0000 Min. : 78.0 Min. : 8.00
## International:418 1st Qu.:1.0000 1st Qu.:133.0 1st Qu.:21.00
## Median :1.0000 Median :185.0 Median :36.00
## Mean :0.9127 Mean :202.3 Mean :33.65
## 3rd Qu.:1.0000 3rd Qu.:243.0 3rd Qu.:40.00
## Max. :1.0000 Max. :389.0 Max. :66.00
## PitchEconomy PitchPremium WidthEconomy WidthPremium
## Min. :30.00 Min. :34.00 Min. :17.00 Min. :17.00
## 1st Qu.:31.00 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00
## Median :31.00 Median :38.00 Median :18.00 Median :19.00
## Mean :31.22 Mean :37.91 Mean :17.84 Mean :19.47
## 3rd Qu.:32.00 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00
## Max. :33.00 Max. :40.00 Max. :19.00 Max. :21.00
## PriceEconomy PricePremium PriceRelative SeatsTotal
## Min. : 65 Min. : 86.0 Min. :0.0200 Min. : 98
## 1st Qu.: 413 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166
## Median :1242 Median :1737.0 Median :0.3650 Median :227
## Mean :1327 Mean :1845.3 Mean :0.4872 Mean :236
## 3rd Qu.:1909 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279
## Max. :3593 Max. :7414.0 Max. :1.8900 Max. :441
## PitchDifference WidthDifference PercentPremiumSeats
## Min. : 2.000 Min. :0.000 Min. : 4.71
## 1st Qu.: 6.000 1st Qu.:1.000 1st Qu.:12.28
## Median : 7.000 Median :1.000 Median :13.21
## Mean : 6.688 Mean :1.633 Mean :14.65
## 3rd Qu.: 7.000 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :10.000 Max. :4.000 Max. :24.69
summarydf <- AirlinesRaw %>% group_by(PitchEconomy, PitchPremium, WidthEconomy, WidthPremium) %>% summarise(AverageDuration = mean(FlightDuration))
summarydf
## # A tibble: 11 x 5
## # Groups: PitchEconomy, PitchPremium, WidthEconomy [?]
## PitchEconomy PitchPremium WidthEconomy WidthPremium AverageDuration
## <int> <int> <int> <int> <dbl>
## 1 30 40 17 21 3.482407
## 2 31 34 17 17 4.700000
## 3 31 34 18 18 2.142500
## 4 31 38 18 19 7.854971
## 5 31 38 18 21 9.220882
## 6 32 34 17 17 3.465789
## 7 32 35 18 18 4.420000
## 8 32 38 17 19 8.843125
## 9 32 38 18 19 9.120408
## 10 32 38 19 20 10.481000
## 11 33 35 17 17 2.474000
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
scatterplotPitchEcnmyPriceEcnmy <- scatterplot(PriceEconomy ~ PitchEconomy , data = AirlinesRaw, main = "scatter plot of PriceEconomy vs. PitchEconomy")
library(car)
scatterplotPitchEcnmyPriceEcnmy <- scatterplot(PricePremium ~ PitchPremium , data = AirlinesRaw, main = "scatter plot of PricePremium vs. PitchPremium")
library(lattice)
histogram(~PitchDifference, data = AirlinesRaw,
main = "Distribution of Pitch Difference", xlab="Difference in Pitch", col='blue' )
correlationPMT <- cor(AirlinesRaw$PitchPremium, AirlinesRaw$PricePremium)
round(correlationPMT, 2)
## [1] 0.09
correlationPCT <- cor(AirlinesRaw$PitchEconomy, AirlinesRaw$PriceEconomy)
round(correlationPCT, 2)
## [1] 0.37
correlationPMTWidth <- cor(AirlinesRaw$PricePremium, AirlinesRaw$WidthPremium)
round(correlationPMTWidth, 2)
## [1] 0.06
correlationPCTWidth <- cor(AirlinesRaw$PriceEconomy, AirlinesRaw$WidthEconomy)
round(correlationPCTWidth, 2)
## [1] 0.07
PitchDifference = aggregate(cbind(PriceEconomy, PricePremium, PriceRelative) ~ PitchDifference,
data = AirlinesRaw, mean)
PitchDifference
## PitchDifference PriceEconomy PricePremium PriceRelative
## 1 2 348.0000 377.3333 0.08708333
## 2 3 369.5625 398.7500 0.08125000
## 3 6 2008.6942 2333.7438 0.34082645
## 4 7 1388.1317 2155.4897 0.51888889
## 5 10 243.8519 435.6481 0.97074074
correlationDurationPrice <- cor(AirlinesRaw$PricePremium, AirlinesRaw$FlightDuration)
round(correlationDurationPrice, 2)
## [1] 0.65
correlationDurationPriceEcnmy <- cor(AirlinesRaw$PriceEconomy, AirlinesRaw$FlightDuration)
round(correlationDurationPrice, 2)
## [1] 0.65
scatterplottEcnmy <- scatterplot(AirlinesRaw$FlightDuration, AirlinesRaw$PriceEconomy)
scatterplottEcnmy
## NULL
scatterplottPrm <- scatterplot(AirlinesRaw$FlightDuration, AirlinesRaw$PricePremium)
scatterplottPrm
## NULL
scatterplotPriceRelative <- scatterplot(AirlinesRaw$FlightDuration, AirlinesRaw$PriceRelative)
scatterplotPriceRelative
## NULL
library(corrgram)
corrgram(AirlinesRaw, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of Airline")
AirlineFlightduration <- AirlinesRaw %>% group_by(Airline) %>% summarise(avgDuration = mean(FlightDuration))
AirlineFlightduration
## # A tibble: 6 x 2
## Airline avgDuration
## <fctr> <dbl>
## 1 AirFrance 8.988514
## 2 British 7.854971
## 3 Delta 4.028913
## 4 Jet 4.143934
## 5 Singapore 10.481000
## 6 Virgin 9.250484
CrosstableAirlinePlane <- xtabs(~Airline+Aircraft, data=AirlinesRaw)
ftable(CrosstableAirlinePlane)
## Aircraft AirBus Boeing
## Airline
## AirFrance 36 38
## British 47 128
## Delta 12 34
## Jet 7 54
## Singapore 16 24
## Virgin 33 29
prop.table(ftable(CrosstableAirlinePlane),1)*100 # print table
## Aircraft AirBus Boeing
## Airline
## AirFrance 48.64865 51.35135
## British 26.85714 73.14286
## Delta 26.08696 73.91304
## Jet 11.47541 88.52459
## Singapore 40.00000 60.00000
## Virgin 53.22581 46.77419
AirlinePricerelative <- AirlinesRaw %>% group_by(Airline) %>% summarise(avgPR = mean(PriceRelative))
AirlinePricerelative
## # A tibble: 6 x 2
## Airline avgPR
## <fctr> <dbl>
## 1 AirFrance 0.2047297
## 2 British 0.4375429
## 3 Delta 0.1250000
## 4 Jet 0.9396721
## 5 Singapore 0.5297500
## 6 Virgin 0.7606452
cor.test(AirlinesRaw$PriceRelative,AirlinesRaw$FlightDuration)
##
## Pearson's product-moment correlation
##
## data: AirlinesRaw$PriceRelative and AirlinesRaw$FlightDuration
## t = 2.6046, df = 456, p-value = 0.009498
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02977856 0.21036806
## sample estimates:
## cor
## 0.121075
Low Correlation
modelPPW <- lm(PriceRelative ~ PitchDifference + WidthDifference , data=AirlinesRaw)
summary(modelPPW)
##
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference,
## data = AirlinesRaw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84163 -0.28484 -0.07241 0.17698 1.18778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10514 0.08304 -1.266 0.206077
## PitchDifference 0.06019 0.01590 3.785 0.000174 ***
## WidthDifference 0.11621 0.02356 4.933 1.14e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared: 0.2593, Adjusted R-squared: 0.2561
## F-statistic: 79.65 on 2 and 455 DF, p-value: < 2.2e-16
The p-values and the coefficients suggest that the model is a good fit.