This the RMarkdown file on the case study of Airlines mini project.
airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airlines.df)
library(psych)
describe(airlines.df)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
boxplot(airlines.df$PriceRelative ~ airlines.df$PitchDifference, horizontal=TRUE,
xlab = "Price Relative", ylab = "Pitch Difference",
main = "Relative price changes according to pitch difference"
)
boxplot(airlines.df$PriceRelative ~ airlines.df$WidthDifference, horizontal=TRUE,
xlab = "Price Relative", ylab = "Width Difference",
main = "Relative price changes according to width difference"
)
boxplot(airlines.df$FlightDuration, horizontal=TRUE,
main="Flight duration of all airlines")
boxplot(airlines.df$SeatsEconomy, horizontal=TRUE,
main="Seats Economy of all airlines")
boxplot(airlines.df$SeatsPremium, horizontal=TRUE,
main="Seats Premium of all airlines")
boxplot(airlines.df$PriceEconomy, horizontal=TRUE,
main="Price Economy of all airlines")
boxplot(airlines.df$PricePremium, horizontal=TRUE,
main="Price Premium of all airlines")
boxplot(airlines.df$PriceRelative, horizontal=TRUE,
main="Price Relative of all airlines")
boxplot(airlines.df$SeatsTotal, horizontal=TRUE,
main="Total seats of all airlines")
cor.test(airlines.df$PriceRelative, airlines.df$PitchDifference)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PriceRelative and airlines.df$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3940262 0.5372817
## sample estimates:
## cor
## 0.4687302
As p<0.05, these 2 variables are strongly corelated.
cor.test(airlines.df$PriceRelative, airlines.df$WidthDifference)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PriceRelative and airlines.df$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4125388 0.5528218
## sample estimates:
## cor
## 0.4858024
As p<0.05, these 2 variables are strongly corelated.
cor.test(airlines.df$PriceRelative, airlines.df$PercentPremiumSeats)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PriceRelative and airlines.df$PercentPremiumSeats
## t = -3.496, df = 456, p-value = 0.0005185
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.24949885 -0.07098966
## sample estimates:
## cor
## -0.1615656
As p>0.05, these 2 variables are not corelated at all.
library(corrgram)
corrgram(airlines.df, order=FALSE,
lower.panel=panel.shade,
upper.panel=panel.pie,
diag.panel=panel.minmax,
text.panel=panel.txt,
main="Corrgram of airlines intercorrelations")
PriceRelative = b0 + b1PitchDifference + b2WidthDifference
mn <- lm(PriceRelative ~
PitchDifference
+ WidthDifference,
data=airlines.df)
summary(mn)
##
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference,
## data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84163 -0.28484 -0.07241 0.17698 1.18778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10514 0.08304 -1.266 0.206077
## PitchDifference 0.06019 0.01590 3.785 0.000174 ***
## WidthDifference 0.11621 0.02356 4.933 1.14e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared: 0.2593, Adjusted R-squared: 0.2561
## F-statistic: 79.65 on 2 and 455 DF, p-value: < 2.2e-16
INFERENCE * An increase in PitchDifference by 1 unit, increases PriceRelative by $0.0602
* An increase in WidthDifference by 1 unit, increases PriceRelative by $0.1162
so these 2 x variables PitchDifference and WidthDifference explain the difference in the price between premium-economy airline ticket.
# beta coefficients
mn$coefficients
## (Intercept) PitchDifference WidthDifference
## -0.10514235 0.06019158 0.11621441
# confidence intervals
confint(mn)
## 2.5 % 97.5 %
## (Intercept) -0.26832278 0.05803808
## PitchDifference 0.02893838 0.09144479
## WidthDifference 0.06991835 0.16251047
# Visualize
library(coefplot)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
# 1. PitchDifference and WidthDifference are statistically significant
coefplot(mn, predictors=c("PitchDifference","WidthDifference"))
PitchDifference and WidthDifference are statistically significant, since their Confidence Inteval do not include 0.
# Compare the PriceRelative with the fitted values
# Here is the actual PriceRelative
airlines.df$PriceRelative
## [1] 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 0.75 0.56 0.26 0.52
## [15] 0.52 0.52 0.38 0.38 0.38 0.34 0.34 0.34 0.33 0.33 0.33 0.35 0.33 0.33
## [29] 0.34 0.34 0.34 0.42 0.42 0.42 0.42 0.65 0.65 0.65 0.24 0.24 0.24 0.24
## [43] 0.17 0.17 0.17 0.08 0.08 0.08 0.52 0.52 0.52 1.03 0.36 0.36 0.36 0.34
## [57] 0.34 0.34 0.21 0.21 0.61 0.73 0.73 0.73 0.73 0.39 0.39 0.39 0.39 0.26
## [71] 0.26 0.26 0.10 0.09 0.08 0.07 0.07 0.07 0.04 0.04 0.03 1.07 1.07 1.07
## [85] 1.07 0.40 0.40 0.40 0.40 0.48 0.48 0.48 0.48 0.33 0.33 0.33 0.26 0.09
## [99] 0.49 0.49 0.49 0.49 0.91 0.91 0.91 0.91 0.47 0.47 0.47 1.27 1.27 0.36
## [113] 0.06 0.10 0.10 0.04 0.11 0.11 0.08 0.09 0.05 0.05 0.11 0.14 0.17 0.16
## [127] 0.15 0.07 0.17 0.18 0.14 0.13 0.16 0.18 0.18 0.25 0.20 0.26 0.19 0.23
## [141] 0.23 0.30 0.30 0.30 0.25 0.29 0.29 0.29 0.40 0.31 0.33 0.13 0.10 0.09
## [155] 0.06 1.82 1.82 1.82 1.82 1.73 1.73 1.73 1.38 0.97 0.97 0.97 0.97 0.91
## [169] 0.91 0.91 0.91 0.84 0.56 0.51 0.51 0.51 0.51 0.50 0.49 0.40 0.40 0.40
## [183] 0.40 0.26 0.46 0.46 0.38 0.38 0.38 0.30 1.08 1.08 1.08 1.08 1.03 1.03
## [197] 1.03 1.03 0.84 0.84 0.84 0.49 0.49 0.41 0.41 0.41 0.41 0.26 0.10 0.10
## [211] 0.10 1.56 1.17 0.63 0.08 0.08 0.08 0.08 0.08 0.08 0.08 0.08 0.08 0.08
## [225] 0.08 0.07 0.07 0.07 0.07 0.07 0.04 0.03 0.03 0.03 0.03 0.03 0.03 0.03
## [239] 0.03 1.13 1.13 0.26 0.45 0.45 0.45 0.36 0.36 0.36 0.36 0.98 0.98 0.98
## [253] 0.33 0.33 0.33 0.33 0.36 0.36 0.36 1.13 0.42 0.42 0.42 0.40 0.40 0.40
## [267] 0.80 0.07 0.07 0.07 1.11 1.11 0.91 0.20 0.80 0.17 0.17 0.17 0.21 0.57
## [281] 0.14 0.14 0.12 0.12 0.12 0.11 0.11 0.11 0.11 0.11 0.11 0.10 0.10 0.10
## [295] 0.09 0.09 0.08 0.08 0.08 0.07 0.07 0.05 0.05 0.05 0.04 0.04 0.04 1.50
## [309] 0.96 0.82 0.42 0.42 0.40 0.38 1.11 0.83 0.83 0.77 0.60 0.60 0.60 0.55
## [323] 0.48 0.48 0.13 0.13 0.13 0.13 0.13 0.13 0.10 0.10 0.10 0.10 0.09 0.09
## [337] 0.09 0.09 0.36 0.36 0.36 0.08 0.07 0.07 0.07 0.07 0.04 0.04 0.04 0.03
## [351] 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03 0.03
## [365] 0.03 0.03 1.39 1.39 1.39 0.14 0.14 0.14 0.77 0.48 0.48 0.04 0.52 0.37
## [379] 1.89 1.89 1.89 1.87 1.67 1.64 1.53 1.29 1.26 1.26 1.26 1.11 1.11 1.11
## [393] 1.09 1.06 1.04 1.04 0.91 0.81 0.79 0.74 0.74 0.74 0.74 0.50 0.17 1.64
## [407] 1.64 1.44 0.56 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.61 0.61 0.61
## [421] 0.61 0.61 0.61 0.61 0.61 1.16 1.16 0.08 0.08 0.07 0.07 0.07 0.04 0.04
## [435] 0.04 0.04 0.03 0.03 0.02 1.71 1.68 1.68 1.30 1.30 1.30 1.30 1.22 1.07
## [449] 0.77 0.77 0.77 0.65 0.60 0.58 0.45 0.45 0.38 0.12
# Here is the PriceRelative, as predicted by the OLS model
fitted(mn)
## 1 2 3 4 5 6
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 7 8 9 10 11 12
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 13 14 15 16 17 18
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 19 20 21 22 23 24
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 25 26 27 28 29 30
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 31 32 33 34 35 36
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 37 38 39 40 41 42
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 43 44 45 46 47 48
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 49 50 51 52 53 54
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 55 56 57 58 59 60
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 61 62 63 64 65 66
## 0.43241314 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 67 68 69 70 71 72
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 73 74 75 76 77 78
## 0.66484196 0.07543240 0.07543240 0.07543240 0.07543240 0.07543240
## 79 80 81 82 83 84
## 0.07543240 0.07543240 0.07543240 0.43241314 0.43241314 0.43241314
## 85 86 87 88 89 90
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.96163112
## 91 92 93 94 95 96
## 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112
## 97 98 99 100 101 102
## 0.96163112 0.01524082 0.43241314 0.43241314 0.43241314 0.43241314
## 103 104 105 106 107 108
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 109 110 111 112 113 114
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 115 116 117 118 119 120
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 121 122 123 124 125 126
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 127 128 129 130 131 132
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 133 134 135 136 137 138
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 139 140 141 142 143 144
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 145 146 147 148 149 150
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 151 152 153 154 155 156
## 0.43241314 0.07543240 0.07543240 0.07543240 0.07543240 0.66484196
## 157 158 159 160 161 162
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 163 164 165 166 167 168
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 169 170 171 172 173 174
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 175 176 177 178 179 180
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 181 182 183 184 185 186
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 187 188 189 190 191 192
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 193 194 195 196 197 198
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 199 200 201 202 203 204
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 205 206 207 208 209 210
## 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196 0.66484196
## 211 212 213 214 215 216
## 0.66484196 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 217 218 219 220 221 222
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 223 224 225 226 227 228
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 229 230 231 232 233 234
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 235 236 237 238 239 240
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.43241314
## 241 242 243 244 245 246
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 247 248 249 250 251 252
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 253 254 255 256 257 258
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 259 260 261 262 263 264
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 265 266 267 268 269 270
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 271 272 273 274 275 276
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 277 278 279 280 281 282
## 0.43241314 0.43241314 0.43241314 0.43241314 0.01524082 0.01524082
## 283 284 285 286 287 288
## 0.01524082 0.01524082 0.01524082 0.07543240 0.07543240 0.07543240
## 289 290 291 292 293 294
## 0.01524082 0.01524082 0.01524082 0.07543240 0.01524082 0.01524082
## 295 296 297 298 299 300
## 0.01524082 0.01524082 0.01524082 0.01524082 0.01524082 0.01524082
## 301 302 303 304 305 306
## 0.01524082 0.01524082 0.01524082 0.01524082 0.01524082 0.01524082
## 307 308 309 310 311 312
## 0.01524082 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 313 314 315 316 317 318
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 319 320 321 322 323 324
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 325 326 327 328 329 330
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 331 332 333 334 335 336
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 337 338 339 340 341 342
## 0.37222156 0.37222156 0.48843597 0.48843597 0.48843597 0.48843597
## 343 344 345 346 347 348
## 0.48843597 0.48843597 0.48843597 0.48843597 0.48843597 0.48843597
## 349 350 351 352 353 354
## 0.48843597 0.48843597 0.48843597 0.48843597 0.48843597 0.48843597
## 355 356 357 358 359 360
## 0.48843597 0.48843597 0.48843597 0.48843597 0.48843597 0.48843597
## 361 362 363 364 365 366
## 0.48843597 0.48843597 0.48843597 0.48843597 0.48843597 0.48843597
## 367 368 369 370 371 372
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 373 374 375 376 377 378
## 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314 0.43241314
## 379 380 381 382 383 384
## 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112
## 385 386 387 388 389 390
## 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112
## 391 392 393 394 395 396
## 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112
## 397 398 399 400 401 402
## 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112
## 403 404 405 406 407 408
## 0.96163112 0.96163112 0.96163112 0.48843597 0.48843597 0.48843597
## 409 410 411 412 413 414
## 0.48843597 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 415 416 417 418 419 420
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 421 422 423 424 425 426
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 427 428 429 430 431 432
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 433 434 435 436 437 438
## 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156 0.37222156
## 439 440 441 442 443 444
## 0.37222156 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112
## 445 446 447 448 449 450
## 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112
## 451 452 453 454 455 456
## 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112 0.96163112
## 457 458
## 0.96163112 0.96163112
# Compare PriceRelative predicted by the model with the actual PriceRelative given in the data
predictedPriceRelative = data.frame(fitted(mn))
actualPriceRelative = data.frame(airlines.df$PriceRelative)
PriceRelativeComparison = cbind(actualPriceRelative, predictedPriceRelative)
View(PriceRelativeComparison)