setwd("C:/Users/saihe/Desktop/Hemanth")
SixAirlines <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
summary(SixAirlines)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
head(SixAirlines,10)
## Airline Aircraft FlightDuration TravelMonth IsInternational
## 1 British Boeing 12.25 Jul International
## 2 British Boeing 12.25 Aug International
## 3 British Boeing 12.25 Sep International
## 4 British Boeing 12.25 Oct International
## 5 British Boeing 8.16 Aug International
## 6 British Boeing 8.16 Sep International
## 7 British Boeing 8.16 Oct International
## 8 British Boeing 6.50 Aug International
## 9 British Boeing 6.50 Sep International
## 10 British Boeing 11.50 Sep International
## SeatsEconomy SeatsPremium PitchEconomy PitchPremium WidthEconomy
## 1 122 40 31 38 18
## 2 122 40 31 38 18
## 3 122 40 31 38 18
## 4 122 40 31 38 18
## 5 122 40 31 38 18
## 6 122 40 31 38 18
## 7 122 40 31 38 18
## 8 122 40 31 38 18
## 9 122 40 31 38 18
## 10 122 40 31 38 18
## WidthPremium PriceEconomy PricePremium PriceRelative SeatsTotal
## 1 19 2707 3725 0.38 162
## 2 19 2707 3725 0.38 162
## 3 19 2707 3725 0.38 162
## 4 19 2707 3725 0.38 162
## 5 19 1793 2999 0.67 162
## 6 19 1793 2999 0.67 162
## 7 19 1793 2999 0.67 162
## 8 19 1476 2997 1.03 162
## 9 19 1476 2997 1.03 162
## 10 19 1705 2989 0.75 162
## PitchDifference WidthDifference PercentPremiumSeats
## 1 7 1 24.69
## 2 7 1 24.69
## 3 7 1 24.69
## 4 7 1 24.69
## 5 7 1 24.69
## 6 7 1 24.69
## 7 7 1 24.69
## 8 7 1 24.69
## 9 7 1 24.69
## 10 7 1 24.69
plot(SixAirlines$Airline, SixAirlines$SeatsEconomy, main="Airline vs No. of seats in Economy Class",col = c("pink","blue","green","yellow","violet","grey"))

plot(SixAirlines$Airline, SixAirlines$SeatsPremium, main="Airline vs No. of seats in Premium Class",col = c("pink","blue","green","yellow","violet","grey"))

plot(SixAirlines$TravelMonth,main = "Monthwise Travel",col="red")

plot(SixAirlines$IsInternational,main = "Graph showing number of domestic and international flights",col="grey")

par(mfrow=c(1,2))
hist(SixAirlines$PitchEconomy, xlab="Economy seats Pitch",col = "sky blue",main="Economy class ")
hist(SixAirlines$PitchPremium, xlab="Premium seats Pitch",col = "dark blue",main="Premium class ")

par(mfrow=c(1,2))
hist(SixAirlines$WidthEconomy, xlab="Economy seats width",col = "sky blue",main="Economy class")
hist(SixAirlines$WidthPremium, xlab="Premium seats width",col = "dark blue",main="Premium class")

par(mfrow=c(1,2))
hist(SixAirlines$PriceEconomy, xlab="Economy seats price",col = "sky blue",main="Economy class")
hist(SixAirlines$PricePremium, xlab="Premium seats price",col = "dark blue",main="Premium class")

boxplot(FlightDuration~Airline,data=SixAirlines,xlab="Airline", ylab="Flight duration",col = c("pink","blue","green","yellow","violet","grey"))

library(car)
scatterplot(PriceRelative ~PitchDifference, data=SixAirlines,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of price relative vs pitch difference",
xlab="pitch difference",
ylab="price relative")

scatterplot(PriceRelative ~ WidthDifference, data= SixAirlines,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of price relative vs Width difference",
xlab="Width difference",
ylab="Price relative")

library(corrgram)
corrgram(SixAirlines, order=TRUE, upper.panel=panel.pie,lower.panel=panel.shade, text.panel=panel.txt,main="Correlogram")

## T-test H1: There is no relation between relative price and width difference. H2: There is no relation between relative price and pitch difference.
t.test(SixAirlines$PriceRelative,SixAirlines$WidthDifference)
##
## Welch Two Sample t-test
##
## data: SixAirlines$PriceRelative and SixAirlines$WidthDifference
## t = -19.284, df = 585.55, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.262697 -1.029268
## sample estimates:
## mean of x mean of y
## 0.4872052 1.6331878
t.test(SixAirlines$PriceRelative,SixAirlines$PitchDifference)
##
## Welch Two Sample t-test
##
## data: SixAirlines$PriceRelative and SixAirlines$PitchDifference
## t = -72.974, df = 516.54, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.367495 -6.033640
## sample estimates:
## mean of x mean of y
## 0.4872052 6.6877729
##From the above T-Tests we can observe that, p-value for both the tests are less than 0.05, hence we can conclude that both the hypothesis are rejected.
##Regression Model
##Building a regression model of relative price with variables width difference and pitch difference.
model <- lm(formula = SixAirlines$PriceRelative ~ SixAirlines$WidthDifference + SixAirlines$PitchDifference, data = SixAirlines)
summary(model)
##
## Call:
## lm(formula = SixAirlines$PriceRelative ~ SixAirlines$WidthDifference +
## SixAirlines$PitchDifference, data = SixAirlines)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84163 -0.28484 -0.07241 0.17698 1.18778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10514 0.08304 -1.266 0.206077
## SixAirlines$WidthDifference 0.11621 0.02356 4.933 1.14e-06 ***
## SixAirlines$PitchDifference 0.06019 0.01590 3.785 0.000174 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared: 0.2593, Adjusted R-squared: 0.2561
## F-statistic: 79.65 on 2 and 455 DF, p-value: < 2.2e-16
##The model is a good fit model as p < 0.05. equation:- y(Relative Price) = -0.105 + 0.11(width difference) + 0.06(pitch difference)