This Rmd file is about the comparison of Premium class Tickets and Economy class airlines across a variety of airlines during various months.
#Reading into R:
setwd("F:/R-Internship/Course related files")
airlines.df<-read.csv(paste("AirlinesData.csv",sep=""))
View(airlines.df)
#Summarizing the data to find mean,median and standard deviation:
summary(airlines.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
#Box plots and bar plots for data visualization:
airlines.df$pricediff<-(airlines.df$PricePremium-airlines.df$PriceEconomy)
boxplot(airlines.df$pricediff~airlines.df$TravelMonth,main="Price Difference",xlab="Price Difference",
ylab="Travel Month",las=1,,horizontal=TRUE,
col=c("red","blue","green","yellow"))
library(lattice)
price1<-aggregate(pricediff~Airline,data=airlines.df,FUN=mean)
barchart(pricediff~Airline,data=price1,col="Yellow",
main="Price Difference vs Airline Type",xlab="Airline",ylab="Price Difference")
price2<-aggregate(pricediff~Aircraft,data=airlines.df,FUN=mean)
barchart(pricediff~Aircraft,data=price2,col="lightskyblue",main="Price Difference vs Aircraft type",xlab="Aircraft",ylab="Price Difference")
#Scatterplots for data visualization
library(car)
scatterplot(PitchDifference~pricediff,data=airlines.df,spread=FALSE,pch=19,
main="Scatterplot of Pitch difference vs Price difference",
xlab="Price Difference",ylab="Pitch Difference")
scatterplot(WidthDifference~pricediff,data=airlines.df,spread=FALSE,pch=19,
main="Scatterplot of Width difference vs Price difference",
xlab="Price Difference",ylab="Width Difference")
scatterplotMatrix(airlines.df[,c("FlightDuration","PitchDifference","WidthDifference","pricediff")],spread = FALSE,smoother.args = list(lty=2),main="Airlines")
#Corrgram, variance and covariance matrix:
library(corrgram)
corrgram(airlines.df,order=TRUE,lower.panel = panel.shade,upper.panel = panel.pie,
text.panel = panel.txt,main="Corrgram of airlines.df ")
#Linear Regression model for hypothesis
model=lm(pricediff~WidthDifference+PitchDifference+FlightDuration,data=airlines.df)
summary(model)
##
## Call:
## lm(formula = pricediff ~ WidthDifference + PitchDifference +
## FlightDuration, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -859.4 -324.7 -62.7 150.1 3331.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -286.933 117.833 -2.435 0.0153 *
## WidthDifference 74.641 30.977 2.410 0.0164 *
## PitchDifference 10.387 20.779 0.500 0.6174
## FlightDuration 80.992 6.754 11.992 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 506.1 on 454 degrees of freedom
## Multiple R-squared: 0.2538, Adjusted R-squared: 0.2489
## F-statistic: 51.48 on 3 and 454 DF, p-value: < 2.2e-16
#T-tests
t.test(airlines.df$PitchEconomy,airlines.df$pricediff)
##
## Welch Two Sample t-test
##
## data: airlines.df$PitchEconomy and airlines.df$pricediff
## t = -17.847, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -540.5841 -433.3417
## sample estimates:
## mean of x mean of y
## 31.21834 518.18122
#Correlation Test
cor.test(airlines.df$FlightDuration,airlines.df$pricediff)
##
## Pearson's product-moment correlation
##
## data: airlines.df$FlightDuration and airlines.df$pricediff
## t = 11.435, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3976578 0.5403379
## sample estimates:
## cor
## 0.4720837
cor.test(airlines.df$WidthDifference,airlines.df$pricediff)
##
## Pearson's product-moment correlation
##
## data: airlines.df$WidthDifference and airlines.df$pricediff
## t = 2.5291, df = 456, p-value = 0.01177
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02627012 0.20700978
## sample estimates:
## cor
## 0.1176138