airlines <- read.csv("C:/Program Files/RStudio/files/SixAirlinesDataV2.csv")
View(airlines)
summary(airlines)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
library(psych)
describe(airlines)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23

==> Month wise distribution of flights

plot(airlines$TravelMonth, xlab = "Month", ylab = "Flights")

==> Duration wise distribution of flights

hist(airlines$FlightDuration, main="Flight Duration plot", xlab = "Duration(hours)", ylab = "Flights", col = "cyan")

==> Seats distribution.

par(mfrow=c(1,2))
with(airlines, hist(airlines$SeatsEconomy,
      main = "Seats distribution in Economy",
      ylab = "Flight",
      xlab = "NO. of seats",
      col = "cyan",
      breaks = 10))
with(airlines, hist(airlines$SeatsPremium,
      main = "Seats distribution in Premium",
      ylab = "Flight",
      xlab = "NO. of seats",
      col = "cyan",
      breaks = 10))

==> Picth distribution for economy and premium

par(mfrow=c(1,2))
with(airlines, hist(airlines$PitchEconomy,
      main = "Economy",
      ylab = "Flight",
      xlab = "Pitch",
      col = "cyan",
      breaks = 10))
with(airlines, hist(airlines$PitchPremium,
      main = "Premium",
      ylab = "Flight",
      xlab = "Pitch",
      col = "cyan",
      breaks = 10))

==> Width distribution in Economy and Premium

par(mfrow=c(1,2))
with(airlines, hist(airlines$WidthEconomy,
      main = "Economy",
      ylab = "Flight",
      xlab = "Width",
      col = "cyan",
      breaks = 10))
with(airlines, hist(airlines$WidthPremium,
      main = "Premium",
      ylab = "Flight",
      xlab = "Width",
      col = "cyan",
      breaks = 10))

==> Price distribution in Economy and Premium

par(mfrow=c(1,2))
with(airlines, hist(airlines$PriceEconomy,
      main = "Economy",
      ylab = "Flight",
      xlab = "Price",
      col = "cyan",
      breaks = 10))
with(airlines, hist(airlines$PricePremium,
      main = "Premium",
      ylab = "Flight",
      xlab = "Price",
      col = "cyan",
      breaks = 10))

==> Pitch difference between Economy and Premium

hist(airlines$PitchDifference, ylab = "Flight", xlab = "Difference", col = "cyan", main = "Pitch Difference")

==> Width difference between Economy and Premium

hist(airlines$WidthDifference, xlab = "Flight", ylab = "Difference", col = "cyan", main = "Width Difference")

==> Relative price distribution for Economy and Premium

plot(airlines$PriceRelative, xlab = "Flight", ylab = "Price Relative")

==> Plot of relative price and width difference

plot(airlines$PriceRelative ~ airlines$WidthDifference, main="Plot of Relative price and Width difference",xlab="Width difference",ylab="Relative Price")

==> Plot of relative price and pitch difference

plot(airlines$PriceRelative ~ airlines$PitchDifference, main="Plot of Relative price and Width difference",xlab="Pitch difference",ylab="Relative Price")

==> Corrgram representing the distributions

library(corrgram)
corrgram ( airlines , order = TRUE , lower.panel = panel.shade , upper.panel = panel.pie , text.panel = panel.txt , main = " corrgram for the distributions in the dataset ")

==> Hypothesis: There is no significant change in economy price when there is change in seats, pitch or width

fit <- lm( PriceEconomy ~ SeatsEconomy + PitchEconomy + WidthEconomy , data = airlines)
summary(fit)
## 
## Call:
## lm(formula = PriceEconomy ~ SeatsEconomy + PitchEconomy + WidthEconomy, 
##     data = airlines)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2209.23  -762.84   -39.25   727.96  1922.01 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.414e+04  2.250e+03  -6.283 7.79e-10 ***
## SeatsEconomy  1.352e+00  6.051e-01   2.234   0.0260 *  
## PitchEconomy  5.700e+02  6.846e+01   8.325 1.00e-15 ***
## WidthEconomy -1.459e+02  8.584e+01  -1.700   0.0898 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 915.7 on 454 degrees of freedom
## Multiple R-squared:  0.1471, Adjusted R-squared:  0.1415 
## F-statistic:  26.1 on 3 and 454 DF,  p-value: 1.366e-15

As we can see, p-value < 0.05 for both seats and pitch so the hypothesis is true for them. But p-value > 0.05 in the case of width so the hypothesis is rejected and there is a significant change in price when width is changed.

==> Hypothesis: There is no significant change in premium price when there is change in seats, pitch or width

fit <- lm( PricePremium ~ SeatsPremium + PitchPremium + WidthPremium , data = airlines)
summary(fit)
## 
## Call:
## lm(formula = PricePremium ~ SeatsPremium + PitchPremium + WidthPremium, 
##     data = airlines)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2219.2  -936.9  -120.4  1078.6  5762.8 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2127.171   1736.937  -1.225    0.221    
## SeatsPremium    21.095      4.432   4.760 2.61e-06 ***
## PitchPremium    87.481     67.656   1.293    0.197    
## WidthPremium    -2.744     81.021  -0.034    0.973    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1256 on 454 degrees of freedom
## Multiple R-squared:  0.05501,    Adjusted R-squared:  0.04877 
## F-statistic: 8.809 on 3 and 454 DF,  p-value: 1.094e-05

As we can see, p-value > 0.05 for both width and pitch so the hypothesis is rejected for them. But p-value < 0.05 in the case of seats so the hypothesis is accepted and there is no significant change in price when no. of seats are changed.

==> Hypothesis: There is no significant change in relative price with the difference of width

fit <- lm( PriceRelative ~ WidthDifference  , data = airlines  )
summary(fit)
## 
## Call:
## lm(formula = PriceRelative ~ WidthDifference, data = airlines)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8028 -0.2907 -0.0766  0.1852  1.1893 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.18660    0.03132   5.958 5.11e-09 ***
## WidthDifference  0.18406    0.01551  11.869  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3943 on 456 degrees of freedom
## Multiple R-squared:  0.236,  Adjusted R-squared:  0.2343 
## F-statistic: 140.9 on 1 and 456 DF,  p-value: < 2.2e-16

As the p-value < 0.05 so there is no significant change and the hypothesis is accepted.