boxplot(sixAirlines$SeatsEconomy, xlab= "Seats Economy", ylab = "Seats Economy", main= "Seats Economy distribution", horizontal = TRUE)
boxplot(sixAirlines$SeatsPremium, xlab= "Seats Premium", ylab = "Seats Premium", main= "Seats Premium distribution", horizontal = TRUE)
boxplot(sixAirlines$PitchEconomy, xlab= "Pitch Economy", ylab = "Pitch Economy", main= "Pitch Economy distribution", horizontal = TRUE)
boxplot(sixAirlines$PitchPremium, xlab= "PitchPremium", ylab = "PitchPremium", main= "PitchPremium distribution", horizontal = TRUE)
boxplot(sixAirlines$WidthEconomy, xlab= "WidthEconomy", ylab = "WidthEconomy", main= "WidthEconomy distribution", horizontal = TRUE)
boxplot(sixAirlines$WidthPremium, xlab= "WidthPremium", ylab = "WidthPremium", main= "WidthPremium distribution", horizontal = TRUE)
boxplot(sixAirlines$WidthPremium, xlab= "WidthPremium", ylab = "WidthPremium", main= "WidthPremium distribution", horizontal = TRUE)
boxplot(sixAirlines$PricePremium , xlab= "PricePremium ", ylab = "PricePremium ", main= "PricePremium distribution", horizontal = TRUE)
boxplot(sixAirlines$PriceRelative, xlab= "PriceRelative", ylab = "PriceRelative", main= "PriceRelative distribution", horizontal = TRUE)
boxplot(sixAirlines$SeatsTotal, xlab= "SeatsTotal", ylab = "SeatsTotal", main= "SeatsTotal distribution", horizontal = TRUE)
boxplot(sixAirlines$PitchDifference, xlab= "PitchDifference", ylab = "PitchDifference", main= "PitchDifference distribution", horizontal = TRUE)
boxplot(sixAirlines$WidthDifference, xlab= "WidthDifference", ylab = "WidthDifference", main= "WidthDifference distribution", horizontal = TRUE)
boxplot(sixAirlines$PercentPremiumSeats, xlab= "PercentPremiumSeats", ylab = "PercentPremiumSeats", main= "PercentPremiumSeats distribution", horizontal = TRUE)
#Scatter Plots We will see how variables are correlated pair wise
plot(sixAirlines$Airline,sixAirlines$SeatsPremium,col="red",main="Airline vs Economy Seats",ylab="Mean Economy Seats")
plot(sixAirlines$Airline,sixAirlines$SeatsEconomy,col='blue',main="Airline vs Premium Seats",ylab="Mean PremiumSeats")
#Correlation between Variables
correlation_01 <-cor(sixAirlines[,6:18])
correlation_01[,7:8]
## PriceEconomy PricePremium
## SeatsEconomy 0.12816722 0.17700093
## SeatsPremium 0.11364218 0.21761238
## PitchEconomy 0.36866123 0.22614179
## PitchPremium 0.05038455 0.08853915
## WidthEconomy 0.06799061 0.15054837
## WidthPremium -0.05704522 0.06402004
## PriceEconomy 1.00000000 0.90138870
## PricePremium 0.90138870 1.00000000
## PriceRelative -0.28856711 0.03184654
## SeatsTotal 0.13243313 0.19232533
## PitchDifference -0.09952511 -0.01806629
## WidthDifference -0.08449975 -0.01151218
## PercentPremiumSeats 0.06532232 0.11639097
There is a correlation between all rows and Price of Economy and Price Of Premium Seats.
corr_01<-sixAirlines[,3]+sixAirlines[,6:14]
corrr<-round(cor(corr_01),2)
corrr
## SeatsEconomy SeatsPremium PitchEconomy PitchPremium
## SeatsEconomy 1.00 0.64 0.25 0.26
## SeatsPremium 0.64 1.00 0.38 0.37
## PitchEconomy 0.25 0.38 1.00 0.90
## PitchPremium 0.26 0.37 0.90 1.00
## WidthEconomy 0.28 0.45 0.98 0.93
## WidthPremium 0.25 0.38 0.92 0.97
## PriceEconomy 0.15 0.25 0.60 0.53
## PricePremium 0.21 0.36 0.65 0.62
## PriceRelative 0.24 0.38 0.97 0.95
## WidthEconomy WidthPremium PriceEconomy PricePremium
## SeatsEconomy 0.28 0.25 0.15 0.21
## SeatsPremium 0.45 0.38 0.25 0.36
## PitchEconomy 0.98 0.92 0.60 0.65
## PitchPremium 0.93 0.97 0.53 0.62
## WidthEconomy 1.00 0.95 0.54 0.62
## WidthPremium 0.95 1.00 0.51 0.62
## PriceEconomy 0.54 0.51 1.00 0.90
## PricePremium 0.62 0.62 0.90 1.00
## PriceRelative 0.98 0.97 0.52 0.64
## PriceRelative
## SeatsEconomy 0.24
## SeatsPremium 0.38
## PitchEconomy 0.97
## PitchPremium 0.95
## WidthEconomy 0.98
## WidthPremium 0.97
## PriceEconomy 0.52
## PricePremium 0.64
## PriceRelative 1.00
library(corrplot)
## corrplot 0.84 loaded
corrplot(corrr, method = "square")
myt<-table(sixAirlines$PriceEconomy)
myp<-table(sixAirlines$PricePremium)
myx<-table(sixAirlines$Airline)
t.test(myp,myx)
##
## Welch Two Sample t-test
##
## data: myp and myx
## t = -3.6197, df = 5.0004, p-value = 0.01522
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -125.9611 -21.3488
## sample estimates:
## mean of x mean of y
## 2.678363 76.333333
t.test(myt,myx)
##
## Welch Two Sample t-test
##
## data: myt and myx
## t = -3.6329, df = 5.0003, p-value = 0.01501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -126.22904 -21.61658
## sample estimates:
## mean of x mean of y
## 2.410526 76.333333
Since the value of p is < 0.05, we can reject our null hypothesis that airlines do not play a significant role in pricing.Amd here is a significant difference between the PriceRelative in case of Beoing vs Airbus aircrafts.
Model1=PricePremium~SeatsEconomy+FlightDuration+SeatsPremium+PitchEconomy+PitchPremium+WidthEconomy+WidthPremium+PriceEconomy+PriceRelative+SeatsTotal+PitchDifference+WidthDifference+PercentPremiumSeats
fit<-lm(Model1,data=sixAirlines)
summary(fit)
##
## Call:
## lm(formula = Model1, data = sixAirlines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -855.46 -127.12 -8.66 89.60 2164.59
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.114e+04 1.467e+03 7.597 1.80e-13 ***
## SeatsEconomy -2.479e+00 7.231e-01 -3.429 0.000662 ***
## FlightDuration 9.836e+00 6.312e+00 1.558 0.119830
## SeatsPremium 2.308e+01 4.275e+00 5.399 1.09e-07 ***
## PitchEconomy -2.601e+02 3.748e+01 -6.939 1.40e-11 ***
## PitchPremium -1.861e+02 1.794e+01 -10.373 < 2e-16 ***
## WidthEconomy 2.172e+02 4.035e+01 5.384 1.18e-07 ***
## WidthPremium -8.098e+00 2.236e+01 -0.362 0.717363
## PriceEconomy 1.359e+00 2.292e-02 59.307 < 2e-16 ***
## PriceRelative 1.039e+03 4.255e+01 24.410 < 2e-16 ***
## SeatsTotal NA NA NA NA
## PitchDifference NA NA NA NA
## WidthDifference NA NA NA NA
## PercentPremiumSeats -3.407e+01 1.025e+01 -3.323 0.000965 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 300.6 on 447 degrees of freedom
## Multiple R-squared: 0.9467, Adjusted R-squared: 0.9455
## F-statistic: 794.5 on 10 and 447 DF, p-value: < 2.2e-16
Model2=PriceEconomy~SeatsEconomy+FlightDuration+SeatsPremium+PitchEconomy+PitchPremium+WidthEconomy+WidthPremium+PricePremium+PriceRelative+SeatsTotal+PitchDifference+WidthDifference+PercentPremiumSeats
fit1<-lm(Model2,data=sixAirlines)
summary(fit)
##
## Call:
## lm(formula = Model1, data = sixAirlines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -855.46 -127.12 -8.66 89.60 2164.59
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.114e+04 1.467e+03 7.597 1.80e-13 ***
## SeatsEconomy -2.479e+00 7.231e-01 -3.429 0.000662 ***
## FlightDuration 9.836e+00 6.312e+00 1.558 0.119830
## SeatsPremium 2.308e+01 4.275e+00 5.399 1.09e-07 ***
## PitchEconomy -2.601e+02 3.748e+01 -6.939 1.40e-11 ***
## PitchPremium -1.861e+02 1.794e+01 -10.373 < 2e-16 ***
## WidthEconomy 2.172e+02 4.035e+01 5.384 1.18e-07 ***
## WidthPremium -8.098e+00 2.236e+01 -0.362 0.717363
## PriceEconomy 1.359e+00 2.292e-02 59.307 < 2e-16 ***
## PriceRelative 1.039e+03 4.255e+01 24.410 < 2e-16 ***
## SeatsTotal NA NA NA NA
## PitchDifference NA NA NA NA
## WidthDifference NA NA NA NA
## PercentPremiumSeats -3.407e+01 1.025e+01 -3.323 0.000965 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 300.6 on 447 degrees of freedom
## Multiple R-squared: 0.9467, Adjusted R-squared: 0.9455
## F-statistic: 794.5 on 10 and 447 DF, p-value: < 2.2e-16
We test for all Numeric values and on the basis of above information of Regression Analysis on both PricePremium and PriceEconomic, we infer that:
SeatsEconomy , SeatsPremium ,PitchEconomy ,PitchPremium ,WidthEconomy ,PriceEconomy ,PriceRelative ,PercentPremiumSeats are highly significant Independent Variables.
They are all highly significant and highly correlated to Price of the Seats. FlightDuration ,WidthPremium ,SeatsTotal, PitchDifference, WidthDifference are not significant at all having p>0.05 The ‘negative’ effect on PriceRelative is put most by the PriceEconomy, and it is almost ‘8’ times. This inference can infact be well validated by the formula where the term of PriceEconomy is in the denominator, and that’s how increasing the value of PriceEconomy decreases the value of PriceRelative.