air.df<-read.csv("SixAirlinesDataV2.csv", sep = ",")
View(air.df)
summary(air.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
hist(air.df$FlightDuration,
main = "Scatter Plot Diagram for Flight Duration",
xlab = "Flight Duration")
plot(air.df$TravelMonth,
main = "Scatter Plot Diagram for Travel Month",
xlab = "Travel Months")
hist(air.df$PriceEconomy,
main = "Scatter Plot Diagram for Price Economy",
xlab = "Price Economy")
hist(air.df$PricePremium,
main = "Scatter Plot Diagram for Price Premium",
xlab = "Price Premium")
par(mfrow=c(1, 2))
plot(air.df$FlightDuration,air.df$PriceEconomy,
main = "Flight Duration vs Economic Price",
xlab = "Flight Duration",
ylab = "Price Economy")
plot(air.df$FlightDuration,air.df$PricePremium,
main = " Flight Duration vs Premium Price",
xlab = "Flight Duration",
ylab = "Price Premium")
par(mfrow=c(1, 1))
par(mfrow=c(1, 2))
plot(air.df$TravelMonth,air.df$PriceEconomy,
main = "Flight Duration vs Economic Price",
xlab = "Travel Month",
ylab = "Price Economy")
plot(air.df$TravelMonth,air.df$PricePremium,
main = " Flight Duration vs Premium Price",
xlab = "Travel Month",
ylab = "Price Premium")
par(mfrow=c(1, 1))
library(car)
scatterplotMatrix(air.df[,c("FlightDuration","TravelMonth","PriceEconomy","PricePremium")],
spread=FALSE, smoother.args=list(lty=2),
main="Scatter Plot Matrix")
library(corrgram)
corrgram(air.df, order=FALSE,
lower.panel=panel.shade,
upper.panel=panel.pie,
diag.panel=panel.minmax,
text.panel=panel.txt,
main="Corrgram of Airlines Data")
options(digits=2)
cor(air.df$FlightDuration, air.df$PriceEconomy)
## [1] 0.57
cor(air.df$FlightDuration, air.df$PricePremium)
## [1] 0.65
cor.test(air.df[,"FlightDuration"], air.df[,"PriceEconomy"])
##
## Pearson's product-moment correlation
##
## data: air.df[, "FlightDuration"] and air.df[, "PriceEconomy"]
## t = 10, df = 500, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.50 0.63
## sample estimates:
## cor
## 0.57
cor.test(air.df[,"FlightDuration"], air.df[,"PricePremium"])
##
## Pearson's product-moment correlation
##
## data: air.df[, "FlightDuration"] and air.df[, "PricePremium"]
## t = 20, df = 500, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.59 0.70
## sample estimates:
## cor
## 0.65
```
m1 <- lm(PriceEconomy ~
FlightDuration
+ TravelMonth,
data=air.df)
summary(m1)
##
## Call:
## lm(formula = PriceEconomy ~ FlightDuration + TravelMonth, data = air.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1770 -502 -169 471 1902
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 127.6 110.4 1.16 0.25
## FlightDuration 158.3 10.8 14.63 <2e-16 ***
## TravelMonthJul 36.3 119.2 0.30 0.76
## TravelMonthOct -39.6 102.6 -0.39 0.70
## TravelMonthSep 16.6 102.2 0.16 0.87
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 817 on 453 degrees of freedom
## Multiple R-squared: 0.322, Adjusted R-squared: 0.316
## F-statistic: 53.7 on 4 and 453 DF, p-value: <2e-16
m1 <- lm(PricePremium ~
FlightDuration
+ TravelMonth,
data=air.df)
summary(m1)
##
## Call:
## lm(formula = PricePremium ~ FlightDuration + TravelMonth, data = air.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2300 -664 -112 793 4113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.48 132.91 0.42 0.67
## FlightDuration 236.08 13.03 18.12 <2e-16 ***
## TravelMonthJul 22.14 143.61 0.15 0.88
## TravelMonthOct -20.55 123.55 -0.17 0.87
## TravelMonthSep 6.76 123.07 0.05 0.96
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 984 on 453 degrees of freedom
## Multiple R-squared: 0.421, Adjusted R-squared: 0.416
## F-statistic: 82.3 on 4 and 453 DF, p-value: <2e-16
``` Inferences: Though Both premium and economy seats are equal, people prefer economy more. Travel Month and Travel Duration are the prime two factors which effect the prices of the tickets.