setwd("~/Desktop/5 SRM Kashish Mukheja/Downoad content")
air<-read.csv(paste("SixAirlinesDataV2.csv",sep=""))
View(air)
summary(air)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
str(air)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
air$airline_code[air$Airline=="British"]<-1
air$airline_code[air$Airline=="Virgin"]<-2
air$airline_code[air$Airline=="Delta"]<-3
air$airline_code[air$Airline=="Jet"]<-4
air$airline_code[air$Airline=="Singapore"]<-5
air$airline_code[air$Airline=="AirFrance"]<-6
View(air)
library(psych)
describe(air$airline_code)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 458 2.89 1.9 2 2.74 1.48 1 6 5 0.46 -1.3 0.09
air$Aircraft_code[air$Aircraft=="AirBus"]<-1
air$Aircraft_code[air$Aircraft=="Boeing"]<-2
View(air)
1:-For flight duration less than or equal to 4 hours,the flight is “short haul” and denoted by 1
2:-For flight duration greater than 4 hours and less than or equal to 8 hours,the flight is “medium haul” and denoted by 2
3:-For flight duration greater than 8 hours,the flight is “long haul” and denoted by 3
air$fd[air$FlightDuration<=4]<-1
air$fd[air$FlightDuration>4 &air$FlightDuration<=8]<-2
air$fd[air$FlightDuration>8]<-3
View(air)
air$monthnum[air$TravelMonth=="Jul"]<-7
air$monthnum[air$TravelMonth=="Aug"]<-8
air$monthnum[air$TravelMonth=="Sep"]<-9
air$monthnum[air$TravelMonth=="Oct"]<-10
air$intnum[air$IsInternational=="International"]<-1
air$intnum[air$IsInternational=="Domestic"]<-0
colnames(air)
## [1] "Airline" "Aircraft" "FlightDuration"
## [4] "TravelMonth" "IsInternational" "SeatsEconomy"
## [7] "SeatsPremium" "PitchEconomy" "PitchPremium"
## [10] "WidthEconomy" "WidthPremium" "PriceEconomy"
## [13] "PricePremium" "PriceRelative" "SeatsTotal"
## [16] "PitchDifference" "WidthDifference" "PercentPremiumSeats"
## [19] "airline_code" "Aircraft_code" "fd"
## [22] "monthnum" "intnum"
air$PitchRelative<-air$PitchDifference/air$PitchEconomy
air$WidthRelative<-air$WidthDifference/air$WidthEconomy
airnum<-air[,c(3,6:18,24,25)]
View(airnum)
colnames(airnum)
## [1] "FlightDuration" "SeatsEconomy" "SeatsPremium"
## [4] "PitchEconomy" "PitchPremium" "WidthEconomy"
## [7] "WidthPremium" "PriceEconomy" "PricePremium"
## [10] "PriceRelative" "SeatsTotal" "PitchDifference"
## [13] "WidthDifference" "PercentPremiumSeats" "PitchRelative"
## [16] "WidthRelative"
library(lattice)
histogram(~PriceEconomy,
data=air,
type="count",
nint=10,
xlab="Economy Price", main="Price of Economy")
histogram(~PricePremium,
data=air,
type="count",
nint=10,
xlab="Premium Price", main="Price of Premium Economy")
histogram(~PercentPremiumSeats,
data=air,
type="count",
nint=10,
xlab="PercentPremiumSeats", main="Percentage of Premium Seats")
histogram(~airline_code,
data=air,
type="count",
nint=6,
xlab="AirlineCode", main="Airline Company")
histogram(~Aircraft_code,
data=air,
type="count",
nint=2,
xlab="AircraftCode", main="Airbus Vs Boeing")
histogram(~monthnum,
data=air,
type="count",
nint=4,
xlab="monthnum", main="Month number")
histogram(~intnum,
data=air,
type="count",
nint=4,
xlab="IsInternational", main="Domestic VS International")
par(mfrow = c(1,2))
barplot(air$PitchDifference)
boxplot(air$PitchDifference)
par(mfrow = c(1,2))
barplot(air$WidthDifference)
boxplot(air$WidthDifference)
par(mfrow = c(1,2))
barplot(air$PriceRelative)
boxplot(air$PriceRelative)
pairs(formula=~PriceEconomy+PitchEconomy+WidthEconomy,data=airnum)
Ho:-There is no significant difference between the price of Economy and Premium Economy ticket H1:-There is a significant difference between the price of Economy and Premium Economy ticket
t.test(air$PriceEconomy,air$PricePremium)
##
## Welch Two Sample t-test
##
## data: air$PriceEconomy and air$PricePremium
## t = -6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -667.0831 -369.2793
## sample estimates:
## mean of x mean of y
## 1327.076 1845.258
We reject the Null Hypothesis,since p-vale<0.05.Hence there is a significant difference between the price of Economy and Prmium Economy tickets.
What factors explain the difference in price between an economy ticket and a Premium Economy airline ticket?
Ho:-There is no significant difference between the pitch of Economy and Pitch of Premium Economy H1:-There is a significant difference between the pitch of Economy and Pitch of Premium Economy
t.test(air$PitchPremium,air$PitchEconomy)
##
## Welch Two Sample t-test
##
## data: air$PitchPremium and air$PitchEconomy
## t = 97.482, df = 671.02, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.553067 6.822479
## sample estimates:
## mean of x mean of y
## 37.90611 31.21834
We reject the Null Hypothesis,since p-vale<0.05.Hence there is a significant difference between the price of Economy and Prmium Economy tickets.
Ho:-There is no significant difference between the width of Economy and width of Premium Economy H1:-There is a significant difference between the width of Economy and width of Premium Economy
t.test(air$WidthPremium,air$WidthEconomy)
##
## Welch Two Sample t-test
##
## data: air$WidthPremium and air$WidthEconomy
## t = 28.4, df = 678.24, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.520276 1.746100
## sample estimates:
## mean of x mean of y
## 19.47162 17.83843
airint<-subset(air, intnum==1)
airintnum<-airint[,c(3,6:18,24,25)]
colnames(airintnum)
## [1] "FlightDuration" "SeatsEconomy" "SeatsPremium"
## [4] "PitchEconomy" "PitchPremium" "WidthEconomy"
## [7] "WidthPremium" "PriceEconomy" "PricePremium"
## [10] "PriceRelative" "SeatsTotal" "PitchDifference"
## [13] "WidthDifference" "PercentPremiumSeats" "PitchRelative"
## [16] "WidthRelative"
View(airint)
library(corrgram)
library(ellipse)
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
##
## pairs
corrgram(airintnum, order = FALSE, lower.panel = panel.shade, upper.panel = panel.pie, text.panel = panel.txt,main = "Corrgram of Airline for International flights only")
corrgram(airnum, order = FALSE, lower.panel = panel.shade, upper.panel = panel.pie, text.panel = panel.txt,main = "Corrgram of Airline for all flights")
1.There is a strong positive correlation between PricePremium and FlightDuration.Which means,more is the duration of flight,greater will the price of Premium Economy.
2.There is a positive correlation between PitchEconomy and WidthEconomy.
3.Similarly, there is a positive correlation between PitchPremium and WidthPremium.
4.We can observe a positive correlation between PriceRelative and PitchDifference as well as PriceRelative and WidthDifference
fit2<-lm(PriceEconomy ~ FlightDuration +WidthDifference + PitchDifference + intnum + SeatsEconomy, data = air)
summary(fit2)
##
## Call:
## lm(formula = PriceEconomy ~ FlightDuration + WidthDifference +
## PitchDifference + intnum + SeatsEconomy, data = air)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1677.72 -451.00 40.66 458.05 1816.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1493.074 206.245 7.239 1.95e-12 ***
## FlightDuration 83.062 12.396 6.701 6.18e-11 ***
## WidthDifference 238.459 47.608 5.009 7.87e-07 ***
## PitchDifference -502.240 50.647 -9.917 < 2e-16 ***
## intnum 2751.655 271.104 10.150 < 2e-16 ***
## SeatsEconomy -1.667 0.506 -3.295 0.00106 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 732.7 on 452 degrees of freedom
## Multiple R-squared: 0.4563, Adjusted R-squared: 0.4503
## F-statistic: 75.88 on 5 and 452 DF, p-value: < 2.2e-16
fit3<-lm(PriceRelative ~ FlightDuration + WidthDifference + PitchDifference + intnum , data = air)
summary(fit3)
##
## Call:
## lm(formula = PriceRelative ~ FlightDuration + WidthDifference +
## PitchDifference + intnum, data = air)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.82785 -0.24889 -0.06653 0.13341 1.30701
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.415845 0.092414 -4.500 8.66e-06 ***
## FlightDuration 0.040569 0.006253 6.488 2.29e-10 ***
## WidthDifference 0.089529 0.024166 3.705 0.000238 ***
## PitchDifference 0.152867 0.024902 6.139 1.82e-09 ***
## intnum -0.627755 0.125854 -4.988 8.71e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3719 on 453 degrees of freedom
## Multiple R-squared: 0.3246, Adjusted R-squared: 0.3187
## F-statistic: 54.44 on 4 and 453 DF, p-value: < 2.2e-16
Hence we may infer that the model is statistically significant since the p-value<0.05.
1.PricePremium = 176.53FlightDuration + 332.107WidthDifference -415.877PitchDifference -2021.74.89intnum + Intercept
2.PriceEconomy = 83.06FlightDuration + 238.46WidthDifference -502.24PitchDifference -2751.65intnum -1.67SeatsEconomy+Intercept
3.PriceRelative = 0.040569FlightDuration + 0.089529WidthDifference +0.152867PitchDifference +0.627755intnum +Intercept
Conclusion:-
1.We can infer from the plots that
The major aircraft type was Boeing.
The flights which flew most were the British Airline.
Most flights flew in the month of September
Almost 90% of the flights were International.
2.Hence from the overall T-test analysis we may conclude that the there there is a significant difference between the pitch and width of Economy and Premium Economy seats.This justifies why the rates of Premium Economy tickets are higher than Economy tickets mainly in International Flights.