# Read data into R
setwd("~/R")
airlines.df <- read.csv(paste("SixAirlines.csv",sep = ""))
attach(airlines.df)
# Data summary
library(psych)
describe(airlines.df)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23
# Data types
str(airlines.df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...
# Visualizing single-variable distributions.

par(mfrow=c(1,2))
boxplot(FlightDuration,main="Flight Duration Boxplot")
barplot(FlightDuration,main = "Flight Duration Barplot")

boxplot(SeatsEconomy,main="No. of Economy Seats Boxplot")
barplot(SeatsEconomy,main = "No. of Economy Seats Barplot")

boxplot(SeatsPremium,main="No. of Premium Economy Seats Boxplot")
barplot(SeatsPremium,main = "No. of Premium Economy Seats Barplot")

boxplot(PitchEconomy,main="Economy Pitch Boxplot")
barplot(PitchEconomy,main = "Economy Pitch Barplot")

boxplot(PitchPremium,main="Premium Economy Pitch Boxplot")
barplot(PitchPremium,main = "Premium Economy Pitch Barplot")

boxplot(WidthEconomy,main="Economy Width Boxplot")
barplot(WidthEconomy,main = "Economy Width Barplot")

boxplot(WidthPremium,main="Premium Width Boxplot")
barplot(WidthPremium,main = "Premium Width Barplot")

boxplot(PriceEconomy,main="Economy Seat Price Boxplot")
barplot(PriceEconomy,main = "Economy Seat Price Barplot")

boxplot(PricePremium,main="Premium Seat Price Boxplot")
barplot(PricePremium,main = "Premium Seat Price Barplot")

boxplot(PriceRelative,main="Relative Price Boxplot")
barplot(PriceRelative,main = "Relative Price Barplot")

boxplot(SeatsTotal,main="Total Seats Boxplot")
barplot(SeatsTotal,main = "Total Seats Barplot")

boxplot(PitchDifference,main="Pitch Difference Boxplot")
barplot(PitchDifference,main = "Pitch Difference Barplot")

boxplot(WidthDifference,main="Width Difference Boxplot")
barplot(WidthDifference,main = "Width Difference Barplot")

boxplot(PercentPremiumSeats,main = "Premium Seat Percentage Boxplot")
barplot(PercentPremiumSeats,main = "Premium Seat Percentage Barplot")

par(mfrow=c(1,1))
# Scatter plots to analyse dependency between PriceRelative and WidthDifference, PitchDifference, FlightDuration, No. of Premium Seats, Airline.
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(PriceRelative~WidthDifference,spread=FALSE,smoother.args=list(lty=2),pch=19,main="Scatter plot of Relative Price vs Width Difference",xlab="Width Difference",ylab="Relative Price")

scatterplot(PriceRelative~PitchDifference,spread=FALSE,smoother.args=list(lty=2),pch=19,main="Scatter plot of Relative Price vs Pitch Difference",xlab="Pitch Difference",ylab="Relative Price")

scatterplot(PriceRelative~FlightDuration,spread=FALSE,smoother.args=list(lty=2),pch=19,main="Scatter plot of Relative Price vs FLight Duration",xlab="Flight Duration",ylab="Relative Price")

scatterplot(PriceRelative~SeatsPremium,spread=FALSE,smoother.args=list(lty=2),pch=19,main="Scatter plot of Relative Price vs No. of Premium Seats",xlab="Premium Seats",ylab="Relative Price")

scatterplot(PriceRelative~Airline,spread=FALSE,smoother.args=list(lty=2),pch=19,main="Scatter plot of Relative Price vs No. of Premium Seats",xlab="Premium Seats",ylab="Relative Price")

##  [1] "406" "407" "212" "408" "213" "426" "427" "214" "409" "339" "367"
## [12] "368" "369" "110" "111" "240" "241" "260" "271" "272" "185" "186"
## [23] "187" "188" "189" "190"
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(airlines.df, order=TRUE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of airline dataset")

* From the corrgram, the relevant conclusions are: 1. The prices of the tickets are highly correlated with the Pitch and Width of the Seats, with Pitch having having correlation(both Premium and Economy). 2. The pricing of tickets is highly correlated to the flight duration.

# Variance-Covariance Matrices.
var(1:18,1:18)
## [1] 28.5
cov(1:18,1:18)
## [1] 28.5

Running a t-test,

t.test(PriceEconomy,PricePremium,var.equal = TRUE,paired = FALSE)
## 
##  Two Sample t-test
## 
## data:  PriceEconomy and PricePremium
## t = -6.8304, df = 914, p-value = 1.544e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0699 -369.2926
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

The t-test resulting in a low p-value(<0.01) signifies significant difference between the respective pricings.

m<-lm(PriceRelative~PitchDifference+WidthDifference+FlightDuration+PercentPremiumSeats)
summary(m)
## 
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference + 
##     FlightDuration + PercentPremiumSeats)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79439 -0.29424 -0.03427  0.16197  1.13688 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.179033   0.101492  -1.764  0.07840 .  
## PitchDifference      0.059311   0.015921   3.725  0.00022 ***
## WidthDifference      0.118140   0.024555   4.811 2.05e-06 ***
## FlightDuration       0.021707   0.005085   4.269 2.39e-05 ***
## PercentPremiumSeats -0.005999   0.003898  -1.539  0.12454    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.381 on 453 degrees of freedom
## Multiple R-squared:  0.2913, Adjusted R-squared:  0.285 
## F-statistic: 46.54 on 4 and 453 DF,  p-value: < 2.2e-16

Inference: