#To Read the data#
airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airlines.df)
#To Summarize the data#
library(psych)
summary(airlines.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
#To find the Data Types#
str(airlines.df)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
#To represent the Air Lines types by country name#
par(mfrow=c(1,1))
pie(table(airlines.df$Airline),
col=c("Violet","blue","green","yellow","red","purple"),main="Airline type by country")
#To find the type of aircraft manufacturer by company #
par(mfrow=c(1,1))
pie(table(airlines.df$Aircraft),col=c("blue","red"),main="Aircraft manufacturer by company ")
#To find the services #
par(mfrow=c(1,1))
pie(table(airlines.df$IsInternational),col=c("yellow","purple"),main="Domesic/International ")
#To plot the travel flow in various months#
pie(table(airlines.df$TravelMonth),main="Analysing peak months",
col=c("orange","purple","blue","red"))
#To plot flight duration of various Airlines#
boxplot(FlightDuration~Airline,data=airlines.df,xlab="Airline", ylab="Flight duration",col = c("green","blue"," red","grey","cyan","purple"))
#To plot the pitch of seats in Economy and Premium class Vs Frequency#
par(mfrow=c(1,2))
hist(airlines.df$PitchEconomy, xlab="Economy Seats Pitch",col = "red",main="Economy class ")
hist(airlines.df$PitchPremium, xlab="Premium Seats Pitch",col = "yellow",main="Premium class ")
#To plot the width of seats in Economy class and Premium class Vs Frequency#
par(mfrow=c(1,2))
hist(airlines.df$WidthEconomy, xlab="Economy Seats Width",col = " blue",main="Economy class")
hist(airlines.df$WidthPremium, xlab="Premium Seats Width",col = " green",main="Premium class")
#To plot the price of seats in Economy class and Premium class Vs Frequency#
par(mfrow=c(1,2))
hist(airlines.df$PriceEconomy, xlab="Economy Seats Price",col = "navy blue",main="Economy class")
hist(airlines.df$PricePremium, xlab="Premium Seats Price",col = "dark green",main="Premium class")
#To plot Scatter plot #
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
library(lattice)
par(mfrow=c(1,1))
#Scatter plot of pricing of no. of seats with price of economy class#
scatterplot(airlines.df$SeatsEconomy,airlines.df$PriceEconomy, main="scatterplot of pricing of no. of seats with price of economy class",
xlab = "Seats Economy",ylab = "Price Economy ")
#scatter plot of no. of seats with price of premium economy class#
scatterplot(airlines.df$SeatsPremium,airlines.df$PricePremium, main="scatterplot of pricing of no. of seats with price of premium economy class",
xlab = "Seats Premium",ylab = "Pricde Premium")
#To scatterplot the price relative and total seats available#
scatterplot(airlines.df$PriceRelative,airlines.df$SeatsTotal, main="scatter plot of Price Relative and Seats Total available",
xlab="Price Relative", ylab="Seats Total")
#To scatterplot the price relative and width difference#
scatterplot(airlines.df$PriceRelative,airlines.df$WidthDifference, main="scatterplot Price Relative and Width Difference",
xlab="Width", ylab="Price")
#To scatterplot the price relative and pitch difference#
scatterplot(airlines.df$PriceRelative,airlines.df$PitchDifference, main="scatter plot of PriceRelative and Pitch Difference",
xlab = "Pitch", ylab="Price")
#To plot the boxplot for pricing of no. of seats with economy class andd premium class#
par(mfrow=c(1, 2))
boxplot(airlines.df$SeatsEconomy, main="pricing of no. of Seats for Economy Class",
xlab="Seats Economy", col="blue")
boxplot(airlines.df$SeatsPremium, main="pricing of no. of Seats for Premium Class",
xlab="Seats Premium", col=" dark green")
#To find the mean and standard deviation #
#seats economy#
mean(airlines.df$SeatsEconomy)
## [1] 202.3122
sd(airlines.df$SeatsEconomy )
## [1] 76.37353
#seats premium#
mean(airlines.df$ SeatsPremium)
## [1] 33.64847
sd(airlines.df$ SeatsPremium)
## [1] 13.26142
#pitch Economy#
mean(airlines.df$PitchEconomy)
## [1] 31.21834
sd(airlines.df$PitchEconomy)
## [1] 0.6551695
#pitch Premium#
mean(airlines.df$ PitchPremium)
## [1] 37.90611
sd(airlines.df$PitchPremium)
## [1] 1.313924
#width Economy#
mean(airlines.df$WidthEconomy)
## [1] 17.83843
sd(airlines.df$WidthEconomy)
## [1] 0.5575102
#width premium#
mean(airlines.df$WidthPremium)
## [1] 19.47162
sd(airlines.df$WidthPremium)
## [1] 1.097173
#price economy#
mean(airlines.df$PriceEconomy)
## [1] 1327.076
sd(airlines.df$PriceEconomy)
## [1] 988.2733
#price premium#
mean(airlines.df$PricePremium)
## [1] 1845.258
sd(airlines.df$PricePremium)
## [1] 1288.136
#price relative#
mean(airlines.df$PriceRelative)
## [1] 0.4872052
sd(airlines.df$PriceRelative)
## [1] 0.4505873
#corrgram of airline industry#
library(corrgram)
cols <- colorRampPalette(c("yellow", "skyblue",
"red", "darkgreen"))
corrgram(airlines.df, order=TRUE,col.regions=cols,
lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of Airlines Industry Analysis")
#T-test#
t.test(airlines.df$PriceEconomy,airlines.df$PricePremium)
##
## Welch Two Sample t-test
##
## data: airlines.df$PriceEconomy and airlines.df$PricePremium
## t = -6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -667.0831 -369.2793
## sample estimates:
## mean of x mean of y
## 1327.076 1845.258
#correlation test#
cor.test(airlines.df$PriceEconomy,airlines.df$PricePremium)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PriceEconomy and airlines.df$PricePremium
## t = 44.452, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8826622 0.9172579
## sample estimates:
## cor
## 0.9013887
#Regression model#
rmodel=lm(PricePremium ~Airline+TravelMonth+FlightDuration+PitchDifference+WidthDifference+PercentPremiumSeats+PriceRelative,airlines.df)
summary(rmodel)
##
## Call:
## lm(formula = PricePremium ~ Airline + TravelMonth + FlightDuration +
## PitchDifference + WidthDifference + PercentPremiumSeats +
## PriceRelative, data = airlines.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2216.1 -408.6 102.1 392.2 4277.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 624.427 534.907 1.167 0.2437
## AirlineBritish -972.777 218.739 -4.447 1.10e-05 ***
## AirlineDelta -1133.805 265.671 -4.268 2.42e-05 ***
## AirlineJet -2497.060 241.586 -10.336 < 2e-16 ***
## AirlineSingapore -2077.175 166.680 -12.462 < 2e-16 ***
## AirlineVirgin -1026.230 210.448 -4.876 1.51e-06 ***
## TravelMonthJul 78.482 111.257 0.705 0.4809
## TravelMonthOct -39.008 94.984 -0.411 0.6815
## TravelMonthSep -4.181 94.494 -0.044 0.9647
## FlightDuration 187.775 12.780 14.693 < 2e-16 ***
## PitchDifference 25.189 116.907 0.215 0.8295
## WidthDifference 259.734 157.784 1.646 0.1004
## PercentPremiumSeats 15.601 9.352 1.668 0.0960 .
## PriceRelative 234.797 100.289 2.341 0.0197 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 755.4 on 444 degrees of freedom
## Multiple R-squared: 0.6658, Adjusted R-squared: 0.6561
## F-statistic: 68.05 on 13 and 444 DF, p-value: < 2.2e-16
From the above analysis we can say that it is a good model as it’s p-value i less than 0.05
rmodel$coefficients
## (Intercept) AirlineBritish AirlineDelta
## 624.426971 -972.777431 -1133.805429
## AirlineJet AirlineSingapore AirlineVirgin
## -2497.059506 -2077.174783 -1026.229834
## TravelMonthJul TravelMonthOct TravelMonthSep
## 78.481517 -39.007936 -4.181169
## FlightDuration PitchDifference WidthDifference
## 187.774965 25.188610 259.734439
## PercentPremiumSeats PriceRelative
## 15.600923 234.797335