mydata <- read.csv(paste("SixAirlinesDataV2.csv", sep= ""))
View(mydata)
attach(mydata)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(mydata)
## vars n mean sd median trimmed mad min
## Airline* 1 458 3.01 1.65 2.00 2.89 1.48 1.00
## Aircraft* 2 458 1.67 0.47 2.00 1.71 0.00 1.00
## FlightDuration 3 458 7.58 3.54 7.79 7.57 4.81 1.25
## TravelMonth* 4 458 2.56 1.17 3.00 2.58 1.48 1.00
## IsInternational* 5 458 1.91 0.28 2.00 2.00 0.00 1.00
## SeatsEconomy 6 458 202.31 76.37 185.00 194.64 85.99 78.00
## SeatsPremium 7 458 33.65 13.26 36.00 33.35 11.86 8.00
## PitchEconomy 8 458 31.22 0.66 31.00 31.26 0.00 30.00
## PitchPremium 9 458 37.91 1.31 38.00 38.05 0.00 34.00
## WidthEconomy 10 458 17.84 0.56 18.00 17.81 0.00 17.00
## WidthPremium 11 458 19.47 1.10 19.00 19.53 0.00 17.00
## PriceEconomy 12 458 1327.08 988.27 1242.00 1244.40 1159.39 65.00
## PricePremium 13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative 14 458 0.49 0.45 0.36 0.42 0.41 0.02
## SeatsTotal 15 458 235.96 85.29 227.00 228.73 90.44 98.00
## PitchDifference 16 458 6.69 1.76 7.00 6.76 0.00 2.00
## WidthDifference 17 458 1.63 1.19 1.00 1.53 0.00 0.00
## PercentPremiumSeats 18 458 14.65 4.84 13.21 14.31 2.68 4.71
## max range skew kurtosis se
## Airline* 6.00 5.00 0.61 -0.95 0.08
## Aircraft* 2.00 1.00 -0.72 -1.48 0.02
## FlightDuration 14.66 13.41 -0.07 -1.12 0.17
## TravelMonth* 4.00 3.00 -0.14 -1.46 0.05
## IsInternational* 2.00 1.00 -2.91 6.50 0.01
## SeatsEconomy 389.00 311.00 0.72 -0.36 3.57
## SeatsPremium 66.00 58.00 0.23 -0.46 0.62
## PitchEconomy 33.00 3.00 -0.03 -0.35 0.03
## PitchPremium 40.00 6.00 -1.51 3.52 0.06
## WidthEconomy 19.00 2.00 -0.04 -0.08 0.03
## WidthPremium 21.00 4.00 -0.08 -0.31 0.05
## PriceEconomy 3593.00 3528.00 0.51 -0.88 46.18
## PricePremium 7414.00 7328.00 0.50 0.43 60.19
## PriceRelative 1.89 1.87 1.17 0.72 0.02
## SeatsTotal 441.00 343.00 0.70 -0.53 3.99
## PitchDifference 10.00 8.00 -0.54 1.78 0.08
## WidthDifference 4.00 4.00 0.84 -0.53 0.06
## PercentPremiumSeats 24.69 19.98 0.71 0.28 0.23
str(mydata)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
library(psych)
describe(mydata$FlightDuration)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 458 7.58 3.54 7.79 7.57 4.81 1.25 14.66 13.41 -0.07 -1.12
## se
## X1 0.17
describe(mydata$SeatsEconomy)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 458 202.31 76.37 185 194.64 85.99 78 389 311 0.72 -0.36
## se
## X1 3.57
describe(mydata$PriceRelative)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 458 0.49 0.45 0.36 0.42 0.41 0.02 1.89 1.87 1.17 0.72
## se
## X1 0.02
describe(mydata$PriceEconomy)
## vars n mean sd median trimmed mad min max range skew
## X1 1 458 1327.08 988.27 1242 1244.4 1159.39 65 3593 3528 0.51
## kurtosis se
## X1 -0.88 46.18
describe(mydata$PitchDifference)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 458 6.69 1.76 7 6.76 0 2 10 8 -0.54 1.78 0.08
describe(mydata$WidthDifference)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 458 1.63 1.19 1 1.53 0 0 4 4 0.84 -0.53 0.06
counts <- table(mydata$FlightDuration)
barplot(counts, main="Flight Duration",
xlab="FlightDuration")
counts <- table(mydata$SeatsEconomy)
barplot(counts, main="Economy Seats",
xlab="SeatsEconomy")
counts <- table(mydata$PriceRelative)
barplot(counts, main="PriceRelative",
xlab="PriceRelative")
counts <- table(mydata$WidthDifference)
barplot(counts, main="WidthDifference",
xlab="WidthDifference")
counts <- table(mydata$PitchDifference)
barplot(counts, main="WidthDifference",
xlab="PitchDifference")
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(PriceEconomy ~ PriceRelative, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PriceEconomy vs. PriceRelative", xlab="PriceRelative", ylab="PriceEconomy")
scatterplot(PriceEconomy ~ WidthDifference, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PriceEconomy vs. WidthDifference", xlab="WidthDifference", ylab="PriceEconomy")
scatterplot(PriceEconomy ~ PitchDifference, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PriceEconomy vs. pitchDifference", xlab="PitchDifference", ylab="PriceEconomy")
scatterplot(PriceEconomy ~ FlightDuration, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PriceEconomy vs. Flight Duration", xlab="FlightDuration", ylab="PriceEconomy")
cor(mydata$PriceEconomy, mydata$PriceRelative)
## [1] -0.2885671
cor(mydata$PriceEconomy, mydata$PitchDifference)
## [1] -0.09952511
cor(mydata$PriceEconomy, mydata$WidthDifference)
## [1] -0.08449975
cor(mydata$PriceEconomy, mydata$FlightDuration)
## [1] 0.5666404
x <- mydata[,c("PriceRelative", "WidthDifference", "PitchDifference", "FlightDuration", "PercentPremiumSeats")]
y <- mydata[,c("PriceEconomy", "PricePremium")]
cor(x,y)
## PriceEconomy PricePremium
## PriceRelative -0.28856711 0.03184654
## WidthDifference -0.08449975 -0.01151218
## PitchDifference -0.09952511 -0.01806629
## FlightDuration 0.56664039 0.64873981
## PercentPremiumSeats 0.06532232 0.11639097
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(mydata, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of mydata intercorrelations")
PitchPremium, widthPremium, Pitch Difference & Width Difference have stong positive associaton There is a strong positive association between PriceEconomy & Price Premium There is a strong positive association between flightduration and price premium and flight duration and price economy. There is a negative relationship between seats total and price economy and seats total and price premium ====================================================
x <- mydata[,c("PriceRelative", "WidthDifference", "PitchDifference", "FlightDuration", "PercentPremiumSeats")]
y <- mydata[,c("PriceEconomy", "PricePremium")]
cov(x,y)
## PriceEconomy PricePremium
## PriceRelative -128.49992 18.48429
## WidthDifference -99.31545 -17.63614
## PitchDifference -173.27806 -40.99816
## FlightDuration 1983.54017 2959.97830
## PercentPremiumSeats 312.61077 726.01582
Is there a relationship between Price Economy and PriceRelative, Flight Duration, Pitch Diffdrence and Width Difference? Is there a relationship between Price Premium and PriceRelative, Flight Duration, Pitch Diffdrence and Width Difference?
PriceEconomy= beta_0 + beta_1 PriceRelative + beta_2 FlightDuration + beta_3 WidthDifference + beta_4 PitchDifference PricePremium= beta_0 + beta_1 PriceRelative + beta_2 FlightDuration + beta_3 WidthDifference + beta_4 PitchDifference
Null Hypotheses= beta_0+ beta_1 + beta_2 + beta_3 + beta_4 = 0 Alternative Hypotheses= beta_0+ beta_1 + beta_2 + beta_3 + beta_4 ≠ 0 atleast one of the independent variable affects PriceEconomy
m1 <- lm(PriceEconomy ~ PriceRelative + FlightDuration + PitchDifference + WidthDifference, data=mydata)
summary(m1)
##
## Call:
## lm(formula = PriceEconomy ~ PriceRelative + FlightDuration +
## PitchDifference + WidthDifference, data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1578.18 -457.08 -2.81 550.14 1714.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 298.44 166.78 1.789 0.0742 .
## PriceRelative -1029.69 87.31 -11.793 < 2e-16 ***
## FlightDuration 182.07 9.66 18.849 < 2e-16 ***
## PitchDifference -30.80 29.54 -1.043 0.2977
## WidthDifference 218.31 44.91 4.861 1.61e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 709.9 on 453 degrees of freedom
## Multiple R-squared: 0.4885, Adjusted R-squared: 0.484
## F-statistic: 108.2 on 4 and 453 DF, p-value: < 2.2e-16
We reject the null hypothesis and estabilish that PriceRelative, FlightDuration,and Width Difference affect the Price of an economy ticket as P-value of F statistic is very low. While Pitch Difference has statistically insignicant influence on price of economy tickets.
m1$coefficients
## (Intercept) PriceRelative FlightDuration PitchDifference
## 298.4373 -1029.6893 182.0740 -30.7959
## WidthDifference
## 218.3078
confint(m1)
## 2.5 % 97.5 %
## (Intercept) -29.33082 626.20546
## PriceRelative -1201.28243 -858.09613
## FlightDuration 163.09080 201.05722
## PitchDifference -88.84314 27.25133
## WidthDifference 130.04826 306.56739
library(coefplot)
## Warning: package 'coefplot' was built under R version 3.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
coefplot(m1, predictors=c("PriceRelative", "FlightDuration", "PitchDifference", "WidthDifference"))
## Warning: Ignoring unknown aesthetics: xmin, xmax
We can infer that only pitch difference is statistically insignificant as it includes zero while others as in WifthDifference, Price Relative and Flight Duration are statistically significant for influencing price of economy ticket.
m2$coefficients
## (Intercept) PriceRelative FlightDuration PitchDifference
## 201.89111 -282.78902 246.93562 -61.30538
## WidthDifference
## 195.87525
confint(m2)
## 2.5 % 97.5 %
## (Intercept) -246.41009 650.19231
## PriceRelative -517.48363 -48.09442
## FlightDuration 220.97155 272.89969
## PitchDifference -140.69884 18.08808
## WidthDifference 75.15922 316.59128
coefplot(m2, predictors=c("PriceRelative", "FlightDuration", "PitchDifference", "WidthDifference"))
## Warning: Ignoring unknown aesthetics: xmin, xmax
We can infer that only pitch difference is statistically insignificant as it includes zero while others as in WifthDifference, Price Relative and Flight Duration are statistically significant for influencing price of Premium ticket.
Finally we can conclude, according to this analysis the factors explaining the difference between the price of economy tickets and price of premium tickets.