Task 1d:

airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
library(psych)
View(airlines.df)

Task 1d: Summary

describe(airlines.df)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23
par(mfrow=c(1,1))
barplot(table(airlines.df$Airline), col=c("blue","green","red","yellow","pink","black"), main="Airline split")

par(mfrow=c(1,1))
barplot(table(airlines.df$Aircraft), col=c("blue","turquoise"), main="Aircraft manufacturer split")

par(mfrow=c(1,1))
barplot(table(airlines.df$TravelMonth), col=c("purple","pink","purple","pink"), main="Travel month analysis")

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
par(mfrow=c(1,1))
scatterplot(airlines.df$SeatsEconomy, airlines.df$PriceEconomy, main="Number of seats in economy class with price of economy class", xlab = "Number of seats in economy class", ylab="Price of economy class")

library(lattice)
scatterplot(airlines.df$SeatsPremium, airlines.df$PricePremium, main="Number of seats in premium class with price of premium class", xlab = "Number of seats in premium class", ylab="Price of premium class")

par(mfrow=c(2,1))
histogram(~PriceEconomy | IsInternational,data=airlines.df, col="orange",main="Price of economy class tickets in international and domestic flights")

boxplot(airlines.df$PriceEconomy~airlines.df$IsInternational,ylab="Price Economy",xlab="Destination",col=c("pink","grey"), main="Price of economy class tickets in international and domestic flights")

par(mfrow=c(2,1))
histogram(~PricePremium | IsInternational,data=airlines.df,col="maroon", main="Price of premium economy class tickets in international and domestic flights")

boxplot(airlines.df$PricePremium~airlines.df$IsInternational,ylab="Price PremiumEconomy",xlab="Destination",col=c("green2","yellow2"), main="Price of premium economy class tickets in international and domestic flights")

par(mfrow=c(2,1))
scatterplot(airlines.df$PricePremium,airlines.df$FlightDuration,main="premium class pricing with flight duration", xlab="Price of premium class", ylab="Duration of flight")

scatterplot(airlines.df$PriceEconomy,airlines.df$FlightDuration,main="economy class pricing with flight duration", xlab="Price of economy class", ylab="Duration of flight")

par(mfrow=c(1,2))
boxplot(airlines.df$PitchDifference~airlines.df$IsInternational,main="Pitch difference with terminal", xlab="Destination", ylab="pitch difference")
barchart(PitchDifference ~ Airline, data=airlines.df, 
         groups=IsInternational, auto.key=TRUE,
         par.settings = simpleTheme(col=c("darkblue", "turquoise")),main="Pitch difference with terminal in each airlines", xlab="airlines")

par(mfrow=c(1,2))
boxplot(airlines.df$WidthDifference~airlines.df$IsInternational,main="Width difference with terminal", xlab="Destination", ylab="pitch difference")
barchart(WidthDifference ~ Airline, data=airlines.df, 
         groups=IsInternational, auto.key=TRUE,
         par.settings = simpleTheme(col=c("orange", "yellow")), main="Width difference with terminal in each airlines", xlab="airlines")

T-Test: Hypothesis: There is no difference between an economy class ticket and a premium economy class ticket

t.test(airlines.df$PriceEconomy,airlines.df$PricePremium,var.equal = TRUE,paired = FALSE)
## 
##  Two Sample t-test
## 
## data:  airlines.df$PriceEconomy and airlines.df$PricePremium
## t = -6.8304, df = 914, p-value = 1.544e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0699 -369.2926
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

Comments: The null hypothesis is rejected because of small p plaue (P=1.544e-11, <0.05)

Pearson’s Correlation Test

  1. Between difference in price with width
cor.test((airlines.df$PricePremium-airlines.df$PriceEconomy),airlines.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  (airlines.df$PricePremium - airlines.df$PriceEconomy) and airlines.df$WidthDifference
## t = 2.5291, df = 456, p-value = 0.01177
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02627012 0.20700978
## sample estimates:
##       cor 
## 0.1176138

Comments: p-value=0.01177 (<0.05)

  1. Between difference in price with pitch
cor.test((airlines.df$PricePremium-airlines.df$PriceEconomy),airlines.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  (airlines.df$PricePremium - airlines.df$PriceEconomy) and airlines.df$PitchDifference
## t = 2.7688, df = 456, p-value = 0.005855
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03739893 0.21764764
## sample estimates:
##       cor 
## 0.1285851

Comments: p-value=0.005855 (<0.05)

  1. Between difference in price with flight duration
cor.test((airlines.df$PricePremium-airlines.df$PriceEconomy),airlines.df$FlightDuration)
## 
##  Pearson's product-moment correlation
## 
## data:  (airlines.df$PricePremium - airlines.df$PriceEconomy) and airlines.df$FlightDuration
## t = 11.435, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3976578 0.5403379
## sample estimates:
##       cor 
## 0.4720837

Comments: p-value< 2.2e-16(<0.05)

Regression Analysis

fit=lm((airlines.df$PricePremium-airlines.df$PriceEconomy) ~ airlines.df$PitchDifference+airlines.df$WidthDifference+airlines.df$FlightDuration)
summary(fit)
## 
## Call:
## lm(formula = (airlines.df$PricePremium - airlines.df$PriceEconomy) ~ 
##     airlines.df$PitchDifference + airlines.df$WidthDifference + 
##         airlines.df$FlightDuration)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -859.4 -324.7  -62.7  150.1 3331.5 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -286.933    117.833  -2.435   0.0153 *  
## airlines.df$PitchDifference   10.387     20.779   0.500   0.6174    
## airlines.df$WidthDifference   74.641     30.977   2.410   0.0164 *  
## airlines.df$FlightDuration    80.992      6.754  11.992   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.1 on 454 degrees of freedom
## Multiple R-squared:  0.2538, Adjusted R-squared:  0.2489 
## F-statistic: 51.48 on 3 and 454 DF,  p-value: < 2.2e-16
fit$coefficients
##                 (Intercept) airlines.df$PitchDifference 
##                  -286.93258                    10.38682 
## airlines.df$WidthDifference  airlines.df$FlightDuration 
##                    74.64098                    80.99227

Comments: The p-values and the coefficients suggest that the model is a good fit and the regression is good and that we have to reject the null hypothesis in case of the 3 variables.

Corrgram

library(corrgram)
par(mfrow=c(1,1))
corrgram(airlines.df, order=TRUE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of Airlines dataset")

Comments: 1. The pricing of tickets is strongly correlated to the flight duration. 2. The price of premium economy tickets is strongly correlated to picth of seats 3. The price of premium economy tickets is strongly correlated to width of seats 4. The price of premium economy tickets is strongly correlated to the number of seats. 5. Same is true with economy seats too.

EXECUTIVE SUMMARY: 1. Variation in prices were more in International flights. 2. Pitch And Width Differences were more in International flights. 3. Prices of premium economy seats is more with increasing width, pitch and flight duration. 4. It is obvious that premium economy class airline tickets are more compared to economy class airline tickets, the contributing factors could be width difference, pitch difference and flight duration.