airlines <- read.csv("C:/Program Files/RStudio/files/SixAirlinesDataV2.csv")
View(airlines)
summary(airlines)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
library(psych)
describe(airlines)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
==> Month wise distribution of flights
plot(airlines$TravelMonth, xlab = "Month", ylab = "Flights")
==> Duration wise distribution of flights
hist(airlines$FlightDuration, main="Flight Duration plot", xlab = "Duration(hours)", ylab = "Flights", col = "cyan")
==> Seats distribution.
par(mfrow=c(1,2))
with(airlines, hist(airlines$SeatsEconomy,
main = "Seats distribution in Economy",
ylab = "Flight",
xlab = "NO. of seats",
col = "cyan",
breaks = 10))
with(airlines, hist(airlines$SeatsPremium,
main = "Seats distribution in Premium",
ylab = "Flight",
xlab = "NO. of seats",
col = "cyan",
breaks = 10))
==> Picth distribution for economy and premium
par(mfrow=c(1,2))
with(airlines, hist(airlines$PitchEconomy,
main = "Economy",
ylab = "Flight",
xlab = "Pitch",
col = "cyan",
breaks = 10))
with(airlines, hist(airlines$PitchPremium,
main = "Premium",
ylab = "Flight",
xlab = "Pitch",
col = "cyan",
breaks = 10))
==> Width distribution in Economy and Premium
par(mfrow=c(1,2))
with(airlines, hist(airlines$WidthEconomy,
main = "Economy",
ylab = "Flight",
xlab = "Width",
col = "cyan",
breaks = 10))
with(airlines, hist(airlines$WidthPremium,
main = "Premium",
ylab = "Flight",
xlab = "Width",
col = "cyan",
breaks = 10))
==> Price distribution in Economy and Premium
par(mfrow=c(1,2))
with(airlines, hist(airlines$PriceEconomy,
main = "Economy",
ylab = "Flight",
xlab = "Price",
col = "cyan",
breaks = 10))
with(airlines, hist(airlines$PricePremium,
main = "Premium",
ylab = "Flight",
xlab = "Price",
col = "cyan",
breaks = 10))
==> Pitch difference between Economy and Premium
hist(airlines$PitchDifference, ylab = "Flight", xlab = "Difference", col = "cyan", main = "Pitch Difference")
==> Width difference between Economy and Premium
hist(airlines$WidthDifference, xlab = "Flight", ylab = "Difference", col = "cyan", main = "Width Difference")
==> Relative price distribution for Economy and Premium
plot(airlines$PriceRelative, xlab = "Flight", ylab = "Price Relative")
==> Plot of relative price and width difference
plot(airlines$PriceRelative ~ airlines$WidthDifference, main="Plot of Relative price and Width difference",xlab="Width difference",ylab="Relative Price")
==> Plot of relative price and pitch difference
plot(airlines$PriceRelative ~ airlines$PitchDifference, main="Plot of Relative price and Width difference",xlab="Pitch difference",ylab="Relative Price")
==> Corrgram representing the distributions
library(corrgram)
corrgram ( airlines , order = TRUE , lower.panel = panel.shade , upper.panel = panel.pie , text.panel = panel.txt , main = " corrgram for the distributions in the dataset ")
==> Hypothesis: There is no significant change in economy price when there is change in seats, pitch or width
fit <- lm( PriceEconomy ~ SeatsEconomy + PitchEconomy + WidthEconomy , data = airlines)
summary(fit)
##
## Call:
## lm(formula = PriceEconomy ~ SeatsEconomy + PitchEconomy + WidthEconomy,
## data = airlines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2209.23 -762.84 -39.25 727.96 1922.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.414e+04 2.250e+03 -6.283 7.79e-10 ***
## SeatsEconomy 1.352e+00 6.051e-01 2.234 0.0260 *
## PitchEconomy 5.700e+02 6.846e+01 8.325 1.00e-15 ***
## WidthEconomy -1.459e+02 8.584e+01 -1.700 0.0898 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 915.7 on 454 degrees of freedom
## Multiple R-squared: 0.1471, Adjusted R-squared: 0.1415
## F-statistic: 26.1 on 3 and 454 DF, p-value: 1.366e-15
As we can see, p-value < 0.05 for both seats and pitch so the hypothesis is true for them. But p-value > 0.05 in the case of width so the hypothesis is rejected and there is a significant change in price when width is changed.
==> Hypothesis: There is no significant change in premium price when there is change in seats, pitch or width
fit <- lm( PricePremium ~ SeatsPremium + PitchPremium + WidthPremium , data = airlines)
summary(fit)
##
## Call:
## lm(formula = PricePremium ~ SeatsPremium + PitchPremium + WidthPremium,
## data = airlines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2219.2 -936.9 -120.4 1078.6 5762.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2127.171 1736.937 -1.225 0.221
## SeatsPremium 21.095 4.432 4.760 2.61e-06 ***
## PitchPremium 87.481 67.656 1.293 0.197
## WidthPremium -2.744 81.021 -0.034 0.973
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1256 on 454 degrees of freedom
## Multiple R-squared: 0.05501, Adjusted R-squared: 0.04877
## F-statistic: 8.809 on 3 and 454 DF, p-value: 1.094e-05
As we can see, p-value > 0.05 for both width and pitch so the hypothesis is rejected for them. But p-value < 0.05 in the case of seats so the hypothesis is accepted and there is no significant change in price when no. of seats are changed.
==> Hypothesis: There is no significant change in relative price with the difference of width
fit <- lm( PriceRelative ~ WidthDifference , data = airlines )
summary(fit)
##
## Call:
## lm(formula = PriceRelative ~ WidthDifference, data = airlines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8028 -0.2907 -0.0766 0.1852 1.1893
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.18660 0.03132 5.958 5.11e-09 ***
## WidthDifference 0.18406 0.01551 11.869 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3943 on 456 degrees of freedom
## Multiple R-squared: 0.236, Adjusted R-squared: 0.2343
## F-statistic: 140.9 on 1 and 456 DF, p-value: < 2.2e-16
As the p-value < 0.05 so there is no significant change and the hypothesis is accepted.