Reading the data

Bomdelbom <- read.csv("BOMDELBOM.csv")
attach(Bomdelbom)
head(Bomdelbom)
##   FlightNumber   Airline DepartureCityCode ArrivalCityCode DepartureTime
## 1       9W 313       Jet               DEL             BOM           225
## 2       9W 339       Jet               BOM             DEL           300
## 3       SG 161 Spice Jet               DEL             BOM           350
## 4       6E 171    IndiGo               DEL             BOM           455
## 5       SG 160 Spice Jet               BOM             DEL           555
## 6       9W 762       Jet               BOM             DEL           605
##   ArrivalTime Departure FlyingMinutes Aircraft PlaneModel Capacity
## 1         435        AM           130   Boeing        738      156
## 2         505        AM           125   Boeing        738      156
## 3         605        AM           135   Boeing        738      189
## 4         710        AM           135   Airbus       A320      180
## 5         805        AM           130   Boeing        738      189
## 6         815        AM           130   Boeing        738      156
##   SeatPitch SeatWidth DataCollectionDate DateDeparture IsWeekend Price
## 1        30        17        Sep 13 2018    Nov 6 2018        No  4051
## 2        30        17        Sep 15 2018    Nov 6 2018        No 11587
## 3        29        17        Sep 19 2018    Nov 6 2018        No  3977
## 4        30        18         Sep 8 2018    Nov 6 2018        No  4234
## 5        29        17        Sep 19 2018    Nov 6 2018        No  6837
## 6        30        17        Sep 15 2018    Nov 6 2018        No  6518
##   AdvancedBookingDays IsDiwali DayBeforeDiwali DayAfterDiwali MarketShare
## 1                  54      Yes             Yes             No        15.4
## 2                  52      Yes             Yes             No        15.4
## 3                  48      Yes             Yes             No        13.2
## 4                  59      Yes             Yes             No        39.6
## 5                  48      Yes             Yes             No        13.2
## 6                  52      Yes             Yes             No        15.4
##   LoadFactor
## 1      83.32
## 2      83.32
## 3      94.06
## 4      87.20
## 5      94.06
## 6      83.32
summary(Bomdelbom)
##   FlightNumber      Airline    DepartureCityCode ArrivalCityCode
##  6E 129 :  5   Air India: 41   BOM:130           BOM:175        
##  6E 155 :  5   IndiGo   : 80   DEL:175           DEL:130        
##  6E 167 :  5   Jet      :144                                    
##  6E 171 :  5   Spice Jet: 40                                    
##  6E 179 :  5                                                    
##  6E 181 :  5                                                    
##  (Other):275                                                    
##  DepartureTime   ArrivalTime   Departure FlyingMinutes   Aircraft  
##  Min.   : 225   Min.   :  20   AM:169    Min.   :125   Airbus:140  
##  1st Qu.: 755   1st Qu.: 935   PM:136    1st Qu.:135   Boeing:165  
##  Median :1035   Median :1215             Median :135               
##  Mean   :1250   Mean   :1329             Mean   :136               
##  3rd Qu.:1800   3rd Qu.:1925             3rd Qu.:140               
##  Max.   :2320   Max.   :2345             Max.   :145               
##                                                                    
##    PlaneModel     Capacity       SeatPitch       SeatWidth    
##  738    :113   Min.   :138.0   Min.   :29.00   Min.   :17.00  
##  A320   : 80   1st Qu.:156.0   1st Qu.:30.00   1st Qu.:17.00  
##  739    : 36   Median :180.0   Median :30.00   Median :17.00  
##  A321   : 25   Mean   :176.4   Mean   :30.26   Mean   :17.41  
##  A332   : 25   3rd Qu.:189.0   3rd Qu.:30.00   3rd Qu.:18.00  
##  77W    : 10   Max.   :303.0   Max.   :33.00   Max.   :18.00  
##  (Other): 16                                                  
##    DataCollectionDate     DateDeparture IsWeekend     Price      
##  Sep 10 2018:40       Nov 8 2018 : 62   No :264   Min.   : 2607  
##  Sep 13 2018:30       Nov 6 2018 : 59   Yes: 41   1st Qu.: 4051  
##  Sep 14 2018:30       Sep 21 2018: 23             Median : 4681  
##  Sep 15 2018:45       Sep 17 2018: 17             Mean   : 5395  
##  Sep 17 2018:39       Oct 19 2018: 16             3rd Qu.: 5725  
##  Sep 19 2018:81       Sep 26 2018: 16             Max.   :18015  
##  Sep 8 2018 :40       (Other)    :112                            
##  AdvancedBookingDays IsDiwali  DayBeforeDiwali DayAfterDiwali
##  Min.   : 2.0        No :184   No :246         No :243       
##  1st Qu.: 7.0        Yes:121   Yes: 59         Yes: 62       
##  Median :30.0                                                
##  Mean   :28.9                                                
##  3rd Qu.:52.0                                                
##  Max.   :61.0                                                
##                                                              
##   MarketShare      LoadFactor   
##  Min.   :13.20   Min.   :78.73  
##  1st Qu.:13.30   1st Qu.:83.32  
##  Median :15.40   Median :83.32  
##  Mean   :21.18   Mean   :85.13  
##  3rd Qu.:39.60   3rd Qu.:87.20  
##  Max.   :39.60   Max.   :94.06  
## 

Q1 Multiple linear regression

Fitmodel1 <- lm(Price ~ AdvancedBookingDays + Airline + Departure + IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + SeatPitch + SeatWidth, data=Bomdelbom)
summary(Fitmodel1)
## 
## Call:
## lm(formula = Price ~ AdvancedBookingDays + Airline + Departure + 
##     IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + 
##     SeatPitch + SeatWidth, data = Bomdelbom)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2671.2 -1266.2  -456.4   517.4 11953.9 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -4292.94    8897.87  -0.482   0.6298    
## AdvancedBookingDays    -87.70      12.47  -7.033 1.43e-11 ***
## AirlineIndiGo         -577.17     778.64  -0.741   0.4591    
## AirlineJet            -120.75     436.69  -0.277   0.7823    
## AirlineSpice Jet     -1118.38     697.85  -1.603   0.1101    
## DeparturePM           -589.79     275.23  -2.143   0.0329 *  
## IsWeekendYes          -345.92     408.06  -0.848   0.3973    
## IsDiwaliYes           4346.80     568.14   7.651 2.90e-13 ***
## DepartureCityCodeDEL -1413.46     351.54  -4.021 7.38e-05 ***
## FlyingMinutes           38.97      29.27   1.331   0.1841    
## SeatPitch             -279.19     226.64  -1.232   0.2190    
## SeatWidth              868.58     507.54   1.711   0.0881 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2079 on 293 degrees of freedom
## Multiple R-squared:  0.2695, Adjusted R-squared:  0.2421 
## F-statistic: 9.828 on 11 and 293 DF,  p-value: 3.604e-15
plot(Fitmodel1)

Q2 Log Linear Regression

Fitmodel2 <- lm(log(Price) ~ AdvancedBookingDays + Airline + Departure + IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + SeatPitch + SeatWidth, data=Bomdelbom )
summary(Fitmodel2)
## 
## Call:
## lm(formula = log(Price) ~ AdvancedBookingDays + Airline + Departure + 
##     IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + 
##     SeatPitch + SeatWidth, data = Bomdelbom)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57006 -0.19770 -0.05792  0.12935  1.24672 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           6.549474   1.243788   5.266 2.71e-07 ***
## AdvancedBookingDays  -0.014639   0.001743  -8.399 1.97e-15 ***
## AirlineIndiGo        -0.098622   0.108842  -0.906   0.3656    
## AirlineJet            0.001113   0.061043   0.018   0.9855    
## AirlineSpice Jet     -0.127169   0.097548  -1.304   0.1934    
## DeparturePM          -0.055844   0.038473  -1.452   0.1477    
## IsWeekendYes         -0.036748   0.057041  -0.644   0.5199    
## IsDiwaliYes           0.744738   0.079418   9.377  < 2e-16 ***
## DepartureCityCodeDEL -0.264017   0.049140  -5.373 1.58e-07 ***
## FlyingMinutes         0.008717   0.004092   2.131   0.0340 *  
## SeatPitch            -0.032824   0.031681  -1.036   0.3010    
## SeatWidth             0.122364   0.070947   1.725   0.0856 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2906 on 293 degrees of freedom
## Multiple R-squared:  0.3671, Adjusted R-squared:  0.3433 
## F-statistic: 15.45 on 11 and 293 DF,  p-value: < 2.2e-16
plot(Fitmodel2)

Q3 Model 2 is better than Model 1 if we look at Adjusted R² value

Q4 QQplot for normality : model 1

library("car")
## Loading required package: carData
qqPlot(Fitmodel1)

## [1] 182 183

Q4 QQplot for normality : model 2

library("car")
qqPlot(Fitmodel2)

## [1] 182 183

Q5 Now let’s test the model’s normality with :

Shapiro-Wilks test:

library(nortest)
shapiro.test(Bomdelbom$Price)
## 
##  Shapiro-Wilk normality test
## 
## data:  Bomdelbom$Price
## W = 0.77653, p-value < 2.2e-16

Anderson-Darling

library(nortest)
ad.test(Bomdelbom$Price)
## 
##  Anderson-Darling normality test
## 
## data:  Bomdelbom$Price
## A = 19.412, p-value < 2.2e-16

Q6 Box-cox Transformation

library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
PriceTrans <- BoxCoxTrans(Bomdelbom$Price)
PriceTrans
## Box-Cox Transformation
## 
## 305 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2607    4051    4681    5395    5725   18015 
## 
## Largest/Smallest: 6.91 
## Sample Skewness: 2.26 
## 
## Estimated Lambda: -0.8

We can see that lambda = -0.8

Apply transformed variable to model

PriceNew = predict(PriceTrans, Bomdelbom$Price)
head(PriceNew)
## [1] 1.248375 1.249299 1.248351 1.248431 1.248931 1.248889

Integration of PriceNew into dataset

Bomdelbom <- cbind(Bomdelbom, PriceNew)
head(Bomdelbom)
##   FlightNumber   Airline DepartureCityCode ArrivalCityCode DepartureTime
## 1       9W 313       Jet               DEL             BOM           225
## 2       9W 339       Jet               BOM             DEL           300
## 3       SG 161 Spice Jet               DEL             BOM           350
## 4       6E 171    IndiGo               DEL             BOM           455
## 5       SG 160 Spice Jet               BOM             DEL           555
## 6       9W 762       Jet               BOM             DEL           605
##   ArrivalTime Departure FlyingMinutes Aircraft PlaneModel Capacity
## 1         435        AM           130   Boeing        738      156
## 2         505        AM           125   Boeing        738      156
## 3         605        AM           135   Boeing        738      189
## 4         710        AM           135   Airbus       A320      180
## 5         805        AM           130   Boeing        738      189
## 6         815        AM           130   Boeing        738      156
##   SeatPitch SeatWidth DataCollectionDate DateDeparture IsWeekend Price
## 1        30        17        Sep 13 2018    Nov 6 2018        No  4051
## 2        30        17        Sep 15 2018    Nov 6 2018        No 11587
## 3        29        17        Sep 19 2018    Nov 6 2018        No  3977
## 4        30        18         Sep 8 2018    Nov 6 2018        No  4234
## 5        29        17        Sep 19 2018    Nov 6 2018        No  6837
## 6        30        17        Sep 15 2018    Nov 6 2018        No  6518
##   AdvancedBookingDays IsDiwali DayBeforeDiwali DayAfterDiwali MarketShare
## 1                  54      Yes             Yes             No        15.4
## 2                  52      Yes             Yes             No        15.4
## 3                  48      Yes             Yes             No        13.2
## 4                  59      Yes             Yes             No        39.6
## 5                  48      Yes             Yes             No        13.2
## 6                  52      Yes             Yes             No        15.4
##   LoadFactor PriceNew
## 1      83.32 1.248375
## 2      83.32 1.249299
## 3      94.06 1.248351
## 4      87.20 1.248431
## 5      94.06 1.248931
## 6      83.32 1.248889

Re-do the regression model using the transformed variable

TransModel <- lm(PriceNew ~ AdvancedBookingDays + Airline + Departure + IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + SeatPitch + SeatWidth, data=Bomdelbom)
summary(TransModel)
## 
## Call:
## lm(formula = PriceNew ~ AdvancedBookingDays + Airline + Departure + 
##     IsWeekend + IsDiwali + DepartureCityCode + FlyingMinutes + 
##     SeatPitch + SeatWidth, data = Bomdelbom)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -7.868e-04 -1.930e-04 -2.246e-05  1.777e-04  9.443e-04 
## 
## Coefficients:
##                        Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)           1.246e+00  1.218e-03 1022.900  < 2e-16 ***
## AdvancedBookingDays  -1.551e-05  1.707e-06   -9.084  < 2e-16 ***
## AirlineIndiGo        -1.190e-04  1.066e-04   -1.117  0.26509    
## AirlineJet            6.273e-06  5.979e-05    0.105  0.91652    
## AirlineSpice Jet     -1.075e-04  9.555e-05   -1.125  0.26147    
## DeparturePM          -3.419e-05  3.769e-05   -0.907  0.36506    
## IsWeekendYes         -2.680e-05  5.587e-05   -0.480  0.63183    
## IsDiwaliYes           7.987e-04  7.779e-05   10.267  < 2e-16 ***
## DepartureCityCodeDEL -2.844e-04  4.813e-05   -5.909 9.53e-09 ***
## FlyingMinutes         1.056e-05  4.008e-06    2.635  0.00887 ** 
## SeatPitch            -2.475e-05  3.103e-05   -0.798  0.42579    
## SeatWidth             1.143e-04  6.950e-05    1.645  0.10106    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002847 on 293 degrees of freedom
## Multiple R-squared:  0.4183, Adjusted R-squared:  0.3965 
## F-statistic: 19.16 on 11 and 293 DF,  p-value: < 2.2e-16

Q7-a Testing normality with qqplot

plot(TransModel, 2)

plot(Fitmodel1, 2)

Q7-b testing normality with SW and AD

library(nortest)
shapiro.test(Bomdelbom$PriceNew)
## 
##  Shapiro-Wilk normality test
## 
## data:  Bomdelbom$PriceNew
## W = 0.98545, p-value = 0.003551
library(nortest)
ad.test(Bomdelbom$PriceNew)
## 
##  Anderson-Darling normality test
## 
## data:  Bomdelbom$PriceNew
## A = 1.6763, p-value = 0.0002619

Q7-c Testing linearity of new model compared to first model

plot(TransModel, 1)

plot(Fitmodel1, 1)