bomdelbom <- read.csv("AirlinePricingData.csv")
attach(bomdelbom)
head(bomdelbom)
## FlightNumber Airline DepartureCityCode ArrivalCityCode DepartureTime
## 1 9W 313 Jet DEL BOM 225
## 2 9W 339 Jet BOM DEL 300
## 3 SG 161 Spice Jet DEL BOM 350
## 4 6E 171 IndiGo DEL BOM 455
## 5 SG 160 Spice Jet BOM DEL 555
## 6 9W 762 Jet BOM DEL 605
## ArrivalTime Departure FlyingMinutes Aircraft PlaneModel Capacity
## 1 435 AM 130 Boeing 738 156
## 2 505 AM 125 Boeing 738 156
## 3 605 AM 135 Boeing 738 189
## 4 710 AM 135 Airbus A320 180
## 5 805 AM 130 Boeing 738 189
## 6 815 AM 130 Boeing 738 156
## SeatPitch SeatWidth DataCollectionDate DateDeparture IsWeekend Price
## 1 30 17 Sep 13 2018 Nov 6 2018 No 4051
## 2 30 17 Sep 15 2018 Nov 6 2018 No 11587
## 3 29 17 Sep 19 2018 Nov 6 2018 No 3977
## 4 30 18 Sep 8 2018 Nov 6 2018 No 4234
## 5 29 17 Sep 19 2018 Nov 6 2018 No 6837
## 6 30 17 Sep 15 2018 Nov 6 2018 No 6518
## AdvancedBookingDays IsDiwali DayBeforeDiwali DayAfterDiwali
## 1 54 1 1 0
## 2 52 1 1 0
## 3 48 1 1 0
## 4 59 1 1 0
## 5 48 1 1 0
## 6 52 1 1 0
## MetroDeparture MetroArrival MarketShare LoadFactor
## 1 1 1 15.4 83.32
## 2 1 1 15.4 83.32
## 3 1 1 13.2 94.06
## 4 1 1 39.6 87.20
## 5 1 1 13.2 94.06
## 6 1 1 15.4 83.32
model1 <- Price ~ AdvancedBookingDays + Airline + Departure + IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + SeatPitch + SeatWidth
fit1 <- lm(model1, data = bomdelbom)
summary(fit1)
##
## Call:
## lm(formula = model1, data = bomdelbom)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2671.2 -1266.2 -456.4 517.4 11953.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4292.94 8897.87 -0.482 0.6298
## AdvancedBookingDays -87.70 12.47 -7.033 1.43e-11 ***
## AirlineIndiGo -577.17 778.64 -0.741 0.4591
## AirlineJet -120.75 436.69 -0.277 0.7823
## AirlineSpice Jet -1118.38 697.85 -1.603 0.1101
## DeparturePM -589.79 275.23 -2.143 0.0329 *
## IsWeekendYes -345.92 408.06 -0.848 0.3973
## IsDiwali 4346.80 568.14 7.651 2.90e-13 ***
## DepartureCityCodeDEL -1413.46 351.54 -4.021 7.38e-05 ***
## FlyingMinutes 38.97 29.27 1.331 0.1841
## SeatPitch -279.19 226.64 -1.232 0.2190
## SeatWidth 868.58 507.54 1.711 0.0881 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2079 on 293 degrees of freedom
## Multiple R-squared: 0.2695, Adjusted R-squared: 0.2421
## F-statistic: 9.828 on 11 and 293 DF, p-value: 3.604e-15
model2 <- log(Price) ~ AdvancedBookingDays + Airline + Departure + IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + SeatPitch + SeatWidth
fit2 <- lm(model2, data = bomdelbom)
summary(fit2)
##
## Call:
## lm(formula = model2, data = bomdelbom)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57006 -0.19770 -0.05792 0.12935 1.24672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.549474 1.243788 5.266 2.71e-07 ***
## AdvancedBookingDays -0.014639 0.001743 -8.399 1.97e-15 ***
## AirlineIndiGo -0.098622 0.108842 -0.906 0.3656
## AirlineJet 0.001113 0.061043 0.018 0.9855
## AirlineSpice Jet -0.127169 0.097548 -1.304 0.1934
## DeparturePM -0.055844 0.038473 -1.452 0.1477
## IsWeekendYes -0.036748 0.057041 -0.644 0.5199
## IsDiwali 0.744738 0.079418 9.377 < 2e-16 ***
## DepartureCityCodeDEL -0.264017 0.049140 -5.373 1.58e-07 ***
## FlyingMinutes 0.008717 0.004092 2.131 0.0340 *
## SeatPitch -0.032824 0.031681 -1.036 0.3010
## SeatWidth 0.122364 0.070947 1.725 0.0856 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2906 on 293 degrees of freedom
## Multiple R-squared: 0.3671, Adjusted R-squared: 0.3433
## F-statistic: 15.45 on 11 and 293 DF, p-value: < 2.2e-16
plot(fit1, 2)
plot(fit2, 2)
# In both plots, all the points does not fall approximately along this reference line, so we cannot assume normality
shapiro.test(fit1$fitted.values)
##
## Shapiro-Wilk normality test
##
## data: fit1$fitted.values
## W = 0.96562, p-value = 1.224e-06
shapiro.test(fit2$fitted.values)
##
## Shapiro-Wilk normality test
##
## data: fit2$fitted.values
## W = 0.96081, p-value = 2.589e-07
plot(fit1, 1)
plot(fit2, 1)
# Line is not horizontal for either model
model3 <- PriceNew ~ AdvancedBookingDays + Airline + Departure + IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + SeatPitch + SeatWidth
fit3 <- lm(model1, data = bomdelbom)
summary(fit3)
##
## Call:
## lm(formula = model1, data = bomdelbom)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2671.2 -1266.2 -456.4 517.4 11953.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4292.94 8897.87 -0.482 0.6298
## AdvancedBookingDays -87.70 12.47 -7.033 1.43e-11 ***
## AirlineIndiGo -577.17 778.64 -0.741 0.4591
## AirlineJet -120.75 436.69 -0.277 0.7823
## AirlineSpice Jet -1118.38 697.85 -1.603 0.1101
## DeparturePM -589.79 275.23 -2.143 0.0329 *
## IsWeekendYes -345.92 408.06 -0.848 0.3973
## IsDiwali 4346.80 568.14 7.651 2.90e-13 ***
## DepartureCityCodeDEL -1413.46 351.54 -4.021 7.38e-05 ***
## FlyingMinutes 38.97 29.27 1.331 0.1841
## SeatPitch -279.19 226.64 -1.232 0.2190
## SeatWidth 868.58 507.54 1.711 0.0881 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2079 on 293 degrees of freedom
## Multiple R-squared: 0.2695, Adjusted R-squared: 0.2421
## F-statistic: 9.828 on 11 and 293 DF, p-value: 3.604e-15
model4 <- log(PriceNew) ~ AdvancedBookingDays + Airline + Departure + IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + SeatPitch + SeatWidth
fit4 <- lm(model1, data = bomdelbom)
summary(fit4)
##
## Call:
## lm(formula = model1, data = bomdelbom)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2671.2 -1266.2 -456.4 517.4 11953.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4292.94 8897.87 -0.482 0.6298
## AdvancedBookingDays -87.70 12.47 -7.033 1.43e-11 ***
## AirlineIndiGo -577.17 778.64 -0.741 0.4591
## AirlineJet -120.75 436.69 -0.277 0.7823
## AirlineSpice Jet -1118.38 697.85 -1.603 0.1101
## DeparturePM -589.79 275.23 -2.143 0.0329 *
## IsWeekendYes -345.92 408.06 -0.848 0.3973
## IsDiwali 4346.80 568.14 7.651 2.90e-13 ***
## DepartureCityCodeDEL -1413.46 351.54 -4.021 7.38e-05 ***
## FlyingMinutes 38.97 29.27 1.331 0.1841
## SeatPitch -279.19 226.64 -1.232 0.2190
## SeatWidth 868.58 507.54 1.711 0.0881 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2079 on 293 degrees of freedom
## Multiple R-squared: 0.2695, Adjusted R-squared: 0.2421
## F-statistic: 9.828 on 11 and 293 DF, p-value: 3.604e-15
plot(fit3, 2)
plot(fit4, 2)
# In both plots, all the points does not fall approximately along this reference line, so we cannot assume normality
shapiro.test(fit3$fitted.values)
##
## Shapiro-Wilk normality test
##
## data: fit3$fitted.values
## W = 0.96562, p-value = 1.224e-06
shapiro.test(fit4$fitted.values)
##
## Shapiro-Wilk normality test
##
## data: fit4$fitted.values
## W = 0.96562, p-value = 1.224e-06
plot(fit3, 1)
plot(fit4, 1)
# Line is not horizontal for either model