->Reading and Viewing the dataset into R
airline.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airline.df)
-> Summarizing the data
library(psych)
describe(airline.df)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
->Vector of variables
str(airline.df)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
-> About the data
table(airline.df$Airline)
##
## AirFrance British Delta Jet Singapore Virgin
## 74 175 46 61 40 62
table(airline.df$Aircraft)
##
## AirBus Boeing
## 151 307
table(airline.df$TravelMonth)
##
## Aug Jul Oct Sep
## 127 75 127 129
table(airline.df$IsInternational)
##
## Domestic International
## 40 418
mean(airline.df$PriceRelative)
## [1] 0.4872052
max(airline.df$SeatsTotal)
## [1] 441
min(airline.df$SeatsTotal)
## [1] 98
->Creating a new column in the dataset
percent.economy <-( (airline.df$SeatsEconomy/airline.df$SeatsTotal)*100 )
airline.df$PercentEconomySeats <- percent.economy
View(airline.df)
-> Visualization of variables independently
par(mfrow=c(3,2))
with(airline.df, boxplot(airline.df$PriceEconomy , horizontal = TRUE, col = "yellow",
main="boxplot of prices in economoy airlines",
xlab="PriceEconomy"), cex=0.5)
with(airline.df , boxplot(airline.df$PricePremium , horizontal = TRUE, col = "yellow",
main="boxplot of prices in premium economoy airlines",
xlab="PricePremium"),cex=0.5)
with(airline.df , boxplot(airline.df$PitchDifference , horizontal = TRUE, col = "yellow",
main="boxplot of pitch difference in airlines",
xlab="Pitchdifference"),cex=0.5)
with(airline.df , boxplot(airline.df$WidthDifference , horizontal = TRUE, col = "yellow",
main="boxplot of width difference in airlines",
xlab="Widthdifference"),cex=0.5)
with(airline.df , boxplot(airline.df$PercentEconomySeats , horizontal = TRUE, col = "yellow",
main="boxplot of percentage of economy seats ",
xlab="Percent Economy seats"),cex=0.5)
with(airline.df , boxplot(airline.df$PercentPremiumSeats , horizontal = TRUE, col = "yellow",
main="boxplot of percentage of premium economy seats",
xlab="Percent premium seats"),cex=0.5)
->Visualization of variables correlated pair-wise
1)Boxplots
par(mfrow=c(2,1))
with(airline.df ,boxplot(PriceRelative~PitchDifference,data=airline.df,
main="Relative Price Difference vs. Pitch", ylab="Pitch Difference",
xlab="Relative Price ", horizontal=TRUE))
with(airline.df , boxplot(PriceRelative~WidthDifference,data=airline.df,
main="Relative Price Difference vs. Pitch", ylab="Width Difference",
xlab="Relative Price ", horizontal=TRUE))
2)Scatterplots
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(SeatsEconomy ~ SeatsPremium,data=airline.df , spread=FALSE,
smoother.args=list(lty=2), pch=19,
main= "Scatterplot of number of seats in economy and
premium economy airline" )
scatterplot(PriceEconomy ~ PricePremium,data=airline.df , spread=FALSE,
smoother.args=list(lty=2), pch=19 ,
main= "Scatterplot of price of economy and
premium economy airline" )
plot(PriceRelative~ PitchDifference,data=airline.df)
plot(PriceRelative~ WidthDifference,data=airline.df )
3)histograms
library(lattice)
histogram(~PricePremium | Airline , data=airline.df)
histogram(~PriceEconomy | Airline , data=airline.df)
histogram(~PricePremium | Aircraft , data=airline.df)
histogram(~PriceEconomy | Aircraft , data=airline.df)
->Corrogram
library(corrgram)
corrgram(airline.df, order=FALSE,
lower.panel=panel.shade,
upper.panel=panel.pie,
diag.panel=panel.minmax,
text.panel=panel.txt,
main="Corrgram of all the intercorrelations")
->Correlation Visualization
library(corrplot)
## corrplot 0.84 loaded
corrplot(corr=cor(airline.df[ ,6:19 ], use="complete.obs"),
method ="ellipse")
->Regression model
fit1<-lm(PricePremium~FlightDuration+PriceEconomy+SeatsTotal+PitchDifference+
WidthDifference+PercentEconomySeats+PercentPremiumSeats
+ Airline+Aircraft+IsInternational+TravelMonth,data=airline.df)
summary(fit1)
##
## Call:
## lm(formula = PricePremium ~ FlightDuration + PriceEconomy + SeatsTotal +
## PitchDifference + WidthDifference + PercentEconomySeats +
## PercentPremiumSeats + Airline + Aircraft + IsInternational +
## TravelMonth, data = airline.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -932.39 -222.10 -55.12 134.47 2916.42
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.685e+06 1.839e+06 -2.548 0.01119 *
## FlightDuration 5.746e+01 9.240e+00 6.218 1.17e-09 ***
## PriceEconomy 1.171e+00 3.989e-02 29.350 < 2e-16 ***
## SeatsTotal -2.433e-01 4.149e-01 -0.586 0.55792
## PitchDifference -4.552e+01 8.352e+01 -0.545 0.58603
## WidthDifference 1.047e+02 1.063e+02 0.985 0.32510
## PercentEconomySeats 4.685e+04 1.839e+04 2.547 0.01120 *
## PercentPremiumSeats 4.683e+04 1.839e+04 2.546 0.01122 *
## AirlineBritish 9.823e+02 1.848e+02 5.315 1.70e-07 ***
## AirlineDelta 5.972e+02 2.303e+02 2.593 0.00983 **
## AirlineJet 4.074e+02 1.935e+02 2.105 0.03586 *
## AirlineSingapore 2.380e+02 1.335e+02 1.783 0.07532 .
## AirlineVirgin 1.001e+03 1.521e+02 6.578 1.36e-10 ***
## AircraftBoeing -5.078e+01 6.338e+01 -0.801 0.42346
## IsInternationalInternational 1.215e+02 3.456e+02 0.351 0.72545
## TravelMonthJul -3.210e+01 6.494e+01 -0.494 0.62135
## TravelMonthOct 2.430e+01 5.505e+01 0.441 0.65918
## TravelMonthSep -5.074e+00 5.485e+01 -0.093 0.92633
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 438.3 on 440 degrees of freedom
## Multiple R-squared: 0.8886, Adjusted R-squared: 0.8842
## F-statistic: 206.4 on 17 and 440 DF, p-value: < 2.2e-16
fit<-lm(PricePremium~FlightDuration+PriceEconomy+SeatsTotal+PitchDifference+
WidthDifference+PercentEconomySeats+PercentPremiumSeats,data=airline.df)
summary(fit)
##
## Call:
## lm(formula = PricePremium ~ FlightDuration + PriceEconomy + SeatsTotal +
## PitchDifference + WidthDifference + PercentEconomySeats +
## PercentPremiumSeats, data = airline.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -865.4 -258.1 -23.9 171.0 3491.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.463e+06 1.114e+06 3.107 0.00201 **
## FlightDuration 7.423e+01 7.898e+00 9.398 < 2e-16 ***
## PriceEconomy 9.980e-01 2.844e-02 35.095 < 2e-16 ***
## SeatsTotal 1.193e+00 2.946e-01 4.051 6.01e-05 ***
## PitchDifference -3.681e+01 2.109e+01 -1.746 0.08156 .
## WidthDifference 1.814e+02 3.328e+01 5.450 8.32e-08 ***
## PercentEconomySeats -3.463e+04 1.114e+04 -3.108 0.00200 **
## PercentPremiumSeats -3.461e+04 1.114e+04 -3.106 0.00202 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 481.1 on 450 degrees of freedom
## Multiple R-squared: 0.8626, Adjusted R-squared: 0.8605
## F-statistic: 403.7 on 7 and 450 DF, p-value: < 2.2e-16
-Model-2 isthe best fit model since most of the independent variables in thismodel are statistically significant ( p-value <0.05)
Hence from the model, it is evident that the price of premium economy airlines are dependent on price of economy seats , flight duration , total number of seats , pitch and width difference , percentage of Premium economy seats and percentage of Economy seats either positively or negatively .
Therefore , the differnce in prices of premium economu airlines and economy airlines is attributed by :
-flight duration
-Pitch difference
-width difference
-total seats
-Percent Premium seats
in a more significant way.