airline <- read.csv(paste0("SixAirlinesDataV2.csv",sp="")) 
summary(airline)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
hist(airline$PriceEconomy, main="Dependence on airlines",xlab="price of economy class",ylab="frequeny",col="blue")

hist(airline$SeatsEconomy, main="bar plot  to show no of economy seats",xlab="seats of economy class",ylab="frequeny",col="lightblue")

hist(airline$SeatsPremium, main="no of premium seats",xlab="seats of premium class",ylab="frequeny",col="blue")

hist(airline$SeatsTotal, main="total no seats",xlab="total seats",ylab="count",col="lightblue")

boxplot(airline$PriceEconomy,xlab="price of economy class", ylab="economy class", main="economy class prices", horizontal = TRUE)

boxplot(airline$PriceEconomy~ airline$Airline,ylab="price of economy class", xlab="airline", main="economy class prices, all airlines", vertical = TRUE)

boxplot(airline$PriceEconomy~airline$IsInternational, xlab="Type of flight", ylab="Price of economy seats", main="economy class prices, all types", vertical= TRUE)

boxplot(airline$PricePremium~airline$IsInternational, xlab="Type of flight", ylab="Price of premium seats", main="premium class prices, all types", vertical= TRUE)

boxplot(airline$PriceEconomy~airline$TravelMonth, xlab="Month of the year", ylab="Price of economy seats", main="economy class prices, month wise", vertical= TRUE)

boxplot(airline$PricePremium~airline$TravelMonth, xlab="Month of the year", ylab="Price of premium seats", main="premium class prices, month wise", vertical= TRUE)

plot(airline$Airline, airline$PitchEconomy,col="blue",main="Pitch of economy class, as per airline", ylab="Pitch of economy class ", xlab="airline")

plot(airline$PitchEconomy, airline$PriceEconomy,col="blue",main="Relation ebtween pitch and price of economy class", ylab="Price of economy class ", xlab="pitch of economy class")

plot(airline$PitchPremium, airline$PricePremium,col="blue",main="Relation between pitch and price of premium", ylab="Price of premium class ", xlab="pitch of premium class")

library(car)
scatterplotMatrix(formula = ~ airline$PriceRelative + airline$FlightDuration +airline$SeatsPremium , cex=0.6,diagonal="histogram")

library(corrgram)
corrgram(airline, order=TRUE, lower.panel=panel.shade,
  upper.panel=panel.pie, text.panel=panel.txt,
  main="Correlation of relative price!!") 

t-tests

t.test(airline$PriceEconomy,airline$PriceRelative)
## 
##  Welch Two Sample t-test
## 
## data:  airline$PriceEconomy and airline$PriceRelative
## t = 28.727, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1235.840 1417.339
## sample estimates:
##    mean of x    mean of y 
## 1327.0764192    0.4872052
t.test(airline$PricePremium,airline$PriceRelative)
## 
##  Welch Two Sample t-test
## 
## data:  airline$PricePremium and airline$PriceRelative
## t = 30.649, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1726.486 1963.055
## sample estimates:
##    mean of x    mean of y 
## 1845.2576419    0.4872052
t.test(airline$PriceRelative~ airline$IsInternational)
## 
##  Welch Two Sample t-test
## 
## data:  airline$PriceRelative by airline$IsInternational
## t = -19.451, df = 446.12, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4855215 -0.3964139
## sample estimates:
##      mean in group Domestic mean in group International 
##                   0.0847500                   0.5257177
t.test(airline$FlightDuration, airline$PriceRelative)
## 
##  Welch Two Sample t-test
## 
## data:  airline$FlightDuration and airline$PriceRelative
## t = 42.499, df = 471.79, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  6.762785 7.418482
## sample estimates:
## mean of x mean of y 
## 7.5778384 0.4872052

Linear Regression Model

Hypothesis 1: Economy price is independent of airline, aircraft, isInternational, Month of travelling, flightduration
model <-lm(airline$PriceEconomy~ airline$Airline+airline$Aircraft+airline$IsInternational+airline$TravelMonth+airline$FlightDuration)
summary(model)
## 
## Call:
## lm(formula = airline$PriceEconomy ~ airline$Airline + airline$Aircraft + 
##     airline$IsInternational + airline$TravelMonth + airline$FlightDuration)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2030.72  -240.11    81.98   328.30  1189.83 
## 
## Coefficients:
##                                        Estimate Std. Error t value
## (Intercept)                            508.4191   254.9164   1.994
## airline$AirlineBritish               -1408.2061    75.2220 -18.721
## airline$AirlineDelta                  -696.8587   225.1777  -3.095
## airline$AirlineJet                   -2110.6421   105.5350 -19.999
## airline$AirlineSingapore             -2083.4785   103.9639 -20.040
## airline$AirlineVirgin                -1187.7312    90.6334 -13.105
## airline$AircraftBoeing                 242.4410    57.5816   4.210
## airline$IsInternationalInternational  1239.4329   243.7106   5.086
## airline$TravelMonthJul                  93.9861    77.4558   1.213
## airline$TravelMonthOct                 -46.0443    65.9322  -0.698
## airline$TravelMonthSep                   0.8635    65.7224   0.013
## airline$FlightDuration                  99.5027     9.0612  10.981
##                                      Pr(>|t|)    
## (Intercept)                           0.04671 *  
## airline$AirlineBritish                < 2e-16 ***
## airline$AirlineDelta                  0.00209 ** 
## airline$AirlineJet                    < 2e-16 ***
## airline$AirlineSingapore              < 2e-16 ***
## airline$AirlineVirgin                 < 2e-16 ***
## airline$AircraftBoeing               3.08e-05 ***
## airline$IsInternationalInternational 5.40e-07 ***
## airline$TravelMonthJul                0.22561    
## airline$TravelMonthOct                0.48532    
## airline$TravelMonthSep                0.98952    
## airline$FlightDuration                < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 525.3 on 446 degrees of freedom
## Multiple R-squared:  0.7243, Adjusted R-squared:  0.7175 
## F-statistic: 106.5 on 11 and 446 DF,  p-value: < 2.2e-16

p-value of airlines are less than 0.05. So, price of economy class depends on airlines. p-value of aircrafts < 0.05. So, price of economy class depends on aircrafts. p-value of travel month > 0.05. So, economy price does not depend on travelling month. p-value of duration of flight <0.05. So, economy price depends on duration of flight. p-value of isInternational <0.05. So, price of economy class depends on whether the flight is international or not.

Hypothesis 2: Premium price is independent of airline, aircraft, isInternational, Month of travelling, flightduration
model <-lm(airline$PricePremium~ airline$Airline+airline$Aircraft+airline$IsInternational+airline$TravelMonth+airline$FlightDuration)
summary(model)
## 
## Call:
## lm(formula = airline$PricePremium ~ airline$Airline + airline$Aircraft + 
##     airline$IsInternational + airline$TravelMonth + airline$FlightDuration)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2083.7  -359.3    66.5   353.1  4401.1 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                           -228.096    366.139  -0.623 0.533618
## airline$AirlineBritish                -988.647    108.042  -9.151  < 2e-16
## airline$AirlineDelta                  -210.972    323.425  -0.652 0.514540
## airline$AirlineJet                   -1865.891    151.581 -12.310  < 2e-16
## airline$AirlineSingapore             -2110.724    149.325 -14.135  < 2e-16
## airline$AirlineVirgin                 -381.859    130.178  -2.933 0.003526
## airline$AircraftBoeing                 287.947     82.705   3.482 0.000548
## airline$IsInternationalInternational  1596.275    350.044   4.560 6.61e-06
## airline$TravelMonthJul                  93.894    111.251   0.844 0.399132
## airline$TravelMonthOct                 -29.274     94.699  -0.309 0.757369
## airline$TravelMonthSep                  -5.118     94.398  -0.054 0.956783
## airline$FlightDuration                 171.706     13.015  13.193  < 2e-16
##                                         
## (Intercept)                             
## airline$AirlineBritish               ***
## airline$AirlineDelta                    
## airline$AirlineJet                   ***
## airline$AirlineSingapore             ***
## airline$AirlineVirgin                ** 
## airline$AircraftBoeing               ***
## airline$IsInternationalInternational ***
## airline$TravelMonthJul                  
## airline$TravelMonthOct                  
## airline$TravelMonthSep                  
## airline$FlightDuration               ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 754.4 on 446 degrees of freedom
## Multiple R-squared:  0.6652, Adjusted R-squared:  0.657 
## F-statistic: 80.57 on 11 and 446 DF,  p-value: < 2.2e-16

P-Value < 0.05 for AirlineBritish , AirlineJet , AirlineSingapore and AirlineVirgin . Therefore PricePremium DEPENDS on them . P-Value > 0.05 for AirlineDelta . Therefore PricePremium is INDEPENDENT of it . P-Value < 0.05 for AircraftBoeing . Therefore PricePremium DEPENDS on it . P-Value > 0.05 for TravelMonthJul , TravelMonthOct and TravelMonthSep . Therefore PricePremium is INDEPENDENT of them . P-Value < 0.05 for FlightDuration . Therefore PricePremium is INDEPENDENT of them . P-Value < 0.05 for IsInternational. Therefore PricePremium DEPENDS on it .

model$coefficients
##                          (Intercept)               airline$AirlineBritish 
##                          -228.096293                          -988.646678 
##                 airline$AirlineDelta                   airline$AirlineJet 
##                          -210.972401                         -1865.891108 
##             airline$AirlineSingapore                airline$AirlineVirgin 
##                         -2110.723652                          -381.858647 
##               airline$AircraftBoeing airline$IsInternationalInternational 
##                           287.946791                          1596.275072 
##               airline$TravelMonthJul               airline$TravelMonthOct 
##                            93.893594                           -29.274049 
##               airline$TravelMonthSep               airline$FlightDuration 
##                            -5.118392                           171.705863
Hypothesis 3: Relative price of premium and economy is independent of airline, aircraft, isInternational, Month of travelling, flightduration.
model <-lm(airline$PriceRelative~ airline$Airline+airline$Aircraft+airline$IsInternational+airline$TravelMonth+airline$FlightDuration)
summary(model)
## 
## Call:
## lm(formula = airline$PriceRelative ~ airline$Airline + airline$Aircraft + 
##     airline$IsInternational + airline$TravelMonth + airline$FlightDuration)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76042 -0.19556 -0.06043  0.10136  1.49537 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                          -0.269549   0.175974  -1.532 0.126293
## airline$AirlineBritish                0.254538   0.051927   4.902 1.33e-06
## airline$AirlineDelta                  0.208889   0.155445   1.344 0.179692
## airline$AirlineJet                    0.863926   0.072853  11.858  < 2e-16
## airline$AirlineSingapore              0.278125   0.071768   3.875 0.000122
## airline$AirlineVirgin                 0.551030   0.062566   8.807  < 2e-16
## airline$AircraftBoeing                0.044134   0.039750   1.110 0.267473
## airline$IsInternationalInternational  0.174883   0.168239   1.039 0.299138
## airline$TravelMonthJul               -0.009557   0.053469  -0.179 0.858223
## airline$TravelMonthOct                0.053414   0.045514   1.174 0.241195
## airline$TravelMonthSep               -0.009941   0.045370  -0.219 0.826662
## airline$FlightDuration                0.029682   0.006255   4.745 2.81e-06
##                                         
## (Intercept)                             
## airline$AirlineBritish               ***
## airline$AirlineDelta                    
## airline$AirlineJet                   ***
## airline$AirlineSingapore             ***
## airline$AirlineVirgin                ***
## airline$AircraftBoeing                  
## airline$IsInternationalInternational    
## airline$TravelMonthJul                  
## airline$TravelMonthOct                  
## airline$TravelMonthSep                  
## airline$FlightDuration               ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3626 on 446 degrees of freedom
## Multiple R-squared:  0.368,  Adjusted R-squared:  0.3524 
## F-statistic: 23.61 on 11 and 446 DF,  p-value: < 2.2e-16

p-value of flight duration <0.05 . So, relative price depends on flight duration. p-value of travel months> 0.05. So, it does not depend on travelling month. p-value of isInternational > 0.05. So, it does not matter whether the flight is internationa- international or not.

Hypothesis 4: PriceRelative is independent of PitchEconomy , PitchPremium and PitchDifference
model <-lm(airline$PriceRelative~ airline$PitchEconomy+airline$PitchPremium+airline$PitchDifference)
summary(model)
## 
## Call:
## lm(formula = airline$PriceRelative ~ airline$PitchEconomy + airline$PitchPremium + 
##     airline$PitchDifference)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7897 -0.2773 -0.1373  0.2003  1.2933 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2.99346    1.51094   1.981   0.0482 *  
## airline$PitchEconomy    -0.19060    0.03394  -5.616 3.41e-08 ***
## airline$PitchPremium     0.09086    0.01692   5.369 1.27e-07 ***
## airline$PitchDifference       NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3968 on 455 degrees of freedom
## Multiple R-squared:  0.2279, Adjusted R-squared:  0.2245 
## F-statistic: 67.14 on 2 and 455 DF,  p-value: < 2.2e-16

p-value of pitchEconomy and pitchPremium < 0.05. So, relative price is dependennt on both of thses factors.

model <-lm(airline$PriceRelative~ airline$PitchDifference)
summary(model)
## 
## Call:
## lm(formula = airline$PriceRelative ~ airline$PitchDifference)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7643 -0.3247 -0.1146  0.2052  1.2954 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -0.31456    0.07317  -4.299  2.1e-05 ***
## airline$PitchDifference  0.11989    0.01058  11.331  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3985 on 456 degrees of freedom
## Multiple R-squared:  0.2197, Adjusted R-squared:  0.218 
## F-statistic: 128.4 on 1 and 456 DF,  p-value: < 2.2e-16

p-value <0.05. So, relative price depends on pitch difference.

Hypothesis 5: PriceRelative is independent of WidthEconomy , WidthPremium and WidthDifference
model <- lm(airline$PriceRelative~ airline$WidthEconomy+airline$WidthPremium+airline$WidthDifference)
summary(model)
## 
## Call:
## lm(formula = airline$PriceRelative ~ airline$WidthEconomy + airline$WidthPremium + 
##     airline$WidthDifference)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74629 -0.30691 -0.04226  0.14303  1.19365 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -2.36366    0.64308  -3.676 0.000266 ***
## airline$WidthEconomy    -0.06938    0.03267  -2.124 0.034229 *  
## airline$WidthPremium     0.20997    0.01660  12.649  < 2e-16 ***
## airline$WidthDifference       NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.388 on 455 degrees of freedom
## Multiple R-squared:  0.2616, Adjusted R-squared:  0.2583 
## F-statistic: 80.59 on 2 and 455 DF,  p-value: < 2.2e-16

p-value of widthPremium < 0.05. So, relative price depends on it.

model <-lm(airline$PriceRelative~ airline$WidthDifference)
summary(model)
## 
## Call:
## lm(formula = airline$PriceRelative ~ airline$WidthDifference)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8028 -0.2907 -0.0766  0.1852  1.1893 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.18660    0.03132   5.958 5.11e-09 ***
## airline$WidthDifference  0.18406    0.01551  11.869  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3943 on 456 degrees of freedom
## Multiple R-squared:  0.236,  Adjusted R-squared:  0.2343 
## F-statistic: 140.9 on 1 and 456 DF,  p-value: < 2.2e-16

p-value < 0.05. So, it depends on width difference. #####Hypothesis 6: PriceRelative is independent of SeatsEconomy , SeatsPremium and PercentPremiumSeats

model <-lm( airline$PriceRelative ~ airline$SeatsEconomy + airline$SeatsPremium + airline$PercentPremiumSeats)
summary(model)
## 
## Call:
## lm(formula = airline$PriceRelative ~ airline$SeatsEconomy + airline$SeatsPremium + 
##     airline$PercentPremiumSeats)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56643 -0.38832 -0.09288  0.28291  1.32335 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  1.134300   0.196296   5.779  1.4e-08 ***
## airline$SeatsEconomy        -0.002131   0.000940  -2.267  0.02386 *  
## airline$SeatsPremium         0.011769   0.005842   2.015  0.04452 *  
## airline$PercentPremiumSeats -0.041789   0.013233  -3.158  0.00169 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4435 on 454 degrees of freedom
## Multiple R-squared:  0.03746,    Adjusted R-squared:  0.0311 
## F-statistic:  5.89 on 3 and 454 DF,  p-value: 0.0005977

p-value for seatsEconomy, seatsPremium and percentage of premium seats is less than 0.05. Hence, it depends on all three.