airline.df<-read.csv(paste("SixAirlinesDataV2.csv"))
View(airline.df)
summary(airline.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
str(airline.df)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
Calculating the means for all variables
mean(airline.df$FlightDuration)
## [1] 7.577838
mean(airline.df$SeatsEconomy)
## [1] 202.3122
mean(airline.df$SeatsPremium)
## [1] 33.64847
mean(airline.df$PitchEconomy)
## [1] 31.21834
mean(airline.df$WidthEconomy)
## [1] 17.83843
mean(airline.df$WidthPremium)
## [1] 19.47162
mean(airline.df$PriceEconomy)
## [1] 1327.076
mean(airline.df$PricePremium)
## [1] 1845.258
mean(airline.df$PriceRelative)
## [1] 0.4872052
mean(airline.df$SeatsTotal)
## [1] 235.9607
mean(airline.df$PitchDifference)
## [1] 6.687773
mean(airline.df$WidthDifference)
## [1] 1.633188
mean(airline.df$PercentPremiumSeats)
## [1] 14.64541
Calculating medians for all variables
median(airline.df$FlightDuration)
## [1] 7.79
median(airline.df$SeatsEconomy)
## [1] 185
median(airline.df$SeatsPremium)
## [1] 36
median(airline.df$PitchEconomy)
## [1] 31
median(airline.df$WidthEconomy)
## [1] 18
median(airline.df$WidthPremium)
## [1] 19
median(airline.df$PriceEconomy)
## [1] 1242
median(airline.df$PricePremium)
## [1] 1737
median(airline.df$PriceRelative)
## [1] 0.365
median(airline.df$SeatsTotal)
## [1] 227
median(airline.df$PitchDifference)
## [1] 7
median(airline.df$WidthDifference)
## [1] 1
median(airline.df$PercentPremiumSeats)
## [1] 13.21
Calculating the standard deviations for all variables
sd(airline.df$FlightDuration)
## [1] 3.542064
sd(airline.df$SeatsEconomy)
## [1] 76.37353
sd(airline.df$SeatsPremium)
## [1] 13.26142
sd(airline.df$PitchEconomy)
## [1] 0.6551695
sd(airline.df$WidthEconomy)
## [1] 0.5575102
sd(airline.df$WidthPremium)
## [1] 1.097173
sd(airline.df$PriceEconomy)
## [1] 988.2733
sd(airline.df$PricePremium)
## [1] 1288.136
sd(airline.df$PriceRelative)
## [1] 0.4505873
sd(airline.df$SeatsTotal)
## [1] 85.29315
sd(airline.df$PitchDifference)
## [1] 1.761708
sd(airline.df$WidthDifference)
## [1] 1.189281
sd(airline.df$PercentPremiumSeats)
## [1] 4.842451
boxplot(FlightDuration~Airline,data = airline.df,
xlab="Airlines",ylab="FlightDuration",
main="FlightDuration for Different Airlines")
boxplot(FlightDuration~Aircraft,data = airline.df,
xlab="Aircraft",ylab="FlightDuration",
main="FlightDuration for Different Aircrafts")
boxplot(SeatsEconomy~Airline,data = airline.df,
xlab="Airlines",ylab="Economy Seats",
main="Economy seats for different airlines")
boxplot(SeatsEconomy~Aircraft,data = airline.df,
xlab="Aircraft",ylab="Economy Seats",
main="Economy seats for different aircraft")
boxplot(SeatsPremium~Airline,data = airline.df,
xlab="Airline",ylab="Premium Economy Seats",
main="Premium Economy seats for different airlines")
boxplot(SeatsPremium~Aircraft,data = airline.df,
xlab="Aircraft",ylab="Premium Economy Seats",
main="Premium Economy seats for different aircrafts")
boxplot(PriceEconomy~Airline,data = airline.df,
xlab="Airlines",ylab="Price for Economy Seats",
main="Price for Economy seats in different airlines")
boxplot(PriceEconomy~Aircraft,data = airline.df,
xlab="Aircraft",ylab="Price for Economy Seats",
main="Price for Economy seats for different aircraft")
boxplot(PricePremium~Airline,data = airline.df,
xlab="Airline",ylab="Price for Premium Economy Seats",
main="Price for Premium Economy seats in different airlines")
boxplot(PricePremium~Aircraft,data = airline.df,
xlab="Aircraft",ylab="Price for Premium Economy Seats",
main="Price for Premium Economy seats in different aircrafts")
library(car)
scatterplotMatrix(formula=~SeatsEconomy+SeatsPremium+PriceEconomy+PricePremium,cex=0.6,data = airline.df)
library(car)
scatterplotMatrix(formula=~PriceRelative+PitchDifference+WidthDifference+PercentPremiumSeats,cex=0.6,data = airline.df)
library(corrgram)
corrgram(airline.df, order = T, text.panel=panel.txt,
lower.panel = panel.shade,
upper.panel = panel.pie, main="Corrgram of all variables")
Null Hypothesis - There is no correlation between the relative prices of Economy and premium classes and PitchDifference between them.
cor.test(airline.df$PriceRelative,airline.df$PitchDifference)
##
## Pearson's product-moment correlation
##
## data: airline.df$PriceRelative and airline.df$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3940262 0.5372817
## sample estimates:
## cor
## 0.4687302
Since p<0.05 thus we can reject our null hypothesis.
Null Hypothesis - There is no correlation between the relative prices of Economy and premium classes and WidthDifference between them.
cor.test(airline.df$PriceRelative,airline.df$WidthDifference)
##
## Pearson's product-moment correlation
##
## data: airline.df$PriceRelative and airline.df$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4125388 0.5528218
## sample estimates:
## cor
## 0.4858024
SInce p<0.05 we cab reject our null hypothesis.
regmodel<-lm(airline.df$PriceRelative~airline.df$PitchDifference+airline.df$WidthDifference)
summary(regmodel)
##
## Call:
## lm(formula = airline.df$PriceRelative ~ airline.df$PitchDifference +
## airline.df$WidthDifference)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84163 -0.28484 -0.07241 0.17698 1.18778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10514 0.08304 -1.266 0.206077
## airline.df$PitchDifference 0.06019 0.01590 3.785 0.000174 ***
## airline.df$WidthDifference 0.11621 0.02356 4.933 1.14e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared: 0.2593, Adjusted R-squared: 0.2561
## F-statistic: 79.65 on 2 and 455 DF, p-value: < 2.2e-16