# Read the data

mydata <- read.csv(paste("SixAirlinesDataV2.csv", sep= ""))
View(mydata)

# Summarize the data

attach(mydata)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(mydata)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23

# Data Types

str(mydata)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

Understand the mean, median and standard deviation of flight duration

library(psych)
describe(mydata$FlightDuration)
##    vars   n mean   sd median trimmed  mad  min   max range  skew kurtosis
## X1    1 458 7.58 3.54   7.79    7.57 4.81 1.25 14.66 13.41 -0.07    -1.12
##      se
## X1 0.17

Understand the mean, median and standard deviation of seats economy

describe(mydata$SeatsEconomy)
##    vars   n   mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 458 202.31 76.37    185  194.64 85.99  78 389   311 0.72    -0.36
##      se
## X1 3.57

Understand the mean, median and standard deviation of seats premium

describe(mydata$SeatsPremium)
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 458 33.65 13.26     36   33.35 11.86   8  66    58 0.23    -0.46
##      se
## X1 0.62

Understand the mean, median and standard deviation of Price relative

describe(mydata$PriceRelative)
##    vars   n mean   sd median trimmed  mad  min  max range skew kurtosis
## X1    1 458 0.49 0.45   0.36    0.42 0.41 0.02 1.89  1.87 1.17     0.72
##      se
## X1 0.02

Understand the mean, median and standard deviation of Price economy

describe(mydata$PriceEconomy)
##    vars   n    mean     sd median trimmed     mad min  max range skew
## X1    1 458 1327.08 988.27   1242  1244.4 1159.39  65 3593  3528 0.51
##    kurtosis    se
## X1    -0.88 46.18

Understand the mean, median and standard deviation of Price Premium

describe(mydata$PricePremium)
##    vars   n    mean      sd median trimmed     mad min  max range skew
## X1    1 458 1845.26 1288.14   1737 1799.05 1845.84  86 7414  7328  0.5
##    kurtosis    se
## X1     0.43 60.19

Understand the mean, median and standard deviation of Pitch difference

describe(mydata$PitchDifference)
##    vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 458 6.69 1.76      7    6.76   0   2  10     8 -0.54     1.78 0.08

Understand the mean, median and standard deviation of Width Difference

describe(mydata$WidthDifference)
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 458 1.63 1.19      1    1.53   0   0   4     4 0.84    -0.53 0.06

Understand the mean, median and standard deviation of Percent premium seats

describe(mydata$PercentPremiumSeats)
##    vars   n  mean   sd median trimmed  mad  min   max range skew kurtosis
## X1    1 458 14.65 4.84  13.21   14.31 2.68 4.71 24.69 19.98 0.71     0.28
##      se
## X1 0.23

Visualizing The Variables

Bar Plot For Flight Duration

counts <- table(mydata$FlightDuration)
barplot(counts, main="Flight Duration", 
         xlab="FlightDuration")

Bar Plot For Economy seats

counts <- table(mydata$SeatsEconomy)
barplot(counts, main="Economy Seats", 
         xlab="SeatsEconomy")

Bar Plot For Premium Seats

counts <- table(mydata$SeatsPremium)
barplot(counts, main="PremiumSeats", 
         xlab="SeatsPremium")

Bar Plot For Percentage of premium seats

counts <- table(mydata$PercentPremiumSeats)
barplot(counts, main="PercentPremiumSeats", 
         xlab="PercentPremiumSeats")

Bar Plot For for price relative

counts <- table(mydata$PriceRelative)
barplot(counts, main="PriceRelative", 
         xlab="PriceRelative")

Bar Plot for width difference

counts <- table(mydata$WidthDifference)
barplot(counts, main="WidthDifference", 
         xlab="WidthDifference")

Bar Plot For Pitch Difference

counts <- table(mydata$PitchDifference)
barplot(counts, main="WidthDifference", 
         xlab="PitchDifference")

SCATTERPLOT VISUALIZATION

Scatter Plot of Price Economy Vs Price Relative

library(car)
## Warning: package 'car' was built under R version 3.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
 scatterplot(PriceEconomy ~ PriceRelative, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PriceEconomy vs. PriceRelative", xlab="PriceRelative", ylab="PriceEconomy")

Scatter Plot of Price Economy Vs Width Difference

scatterplot(PriceEconomy ~ WidthDifference, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PriceEconomy vs. WidthDifference", xlab="WidthDifference", ylab="PriceEconomy")

Scatter Plot of Price Economy Vs Pitch Difference

scatterplot(PriceEconomy ~ PitchDifference, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PriceEconomy vs. pitchDifference", xlab="PitchDifference", ylab="PriceEconomy")

Scatter Plot Of Price Premium Vs Price Relative

scatterplot(PricePremium ~ PriceRelative, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PricePremium vs. PriceRelative", xlab="PriceRelative", ylab="PricePremium")

Scatter Plot of Price Premium Vs Width Difference

scatterplot(PricePremium ~ WidthDifference, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PricePremium vs. WidthDifference", xlab="WidthDifference", ylab="PricePremium")

Scatter Plot of Price Premium Vs Pitch Difference

scatterplot(PricePremium ~ PitchDifference, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PricePremium vs. PitchDifference", xlab="PitchDifference", ylab="PricePremium")

Scatter plot of price premium Vs flight duration

scatterplot(PricePremium ~ FlightDuration, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PricePremium vs. Flight Duration", xlab="FlightDuration", ylab="PricePremium")

Scatter plot of price economy vs flight duration

scatterplot(PriceEconomy ~ FlightDuration, data=mydata, spread=FALSE, smoother.args=list(lty=2), pch=19, main="Scatterplot of PriceEconomy vs. Flight Duration", xlab="FlightDuration", ylab="PriceEconomy")

Measuring Association

Correlation between Price Economy and Price Relative

cor(mydata$PriceEconomy, mydata$PriceRelative)
## [1] -0.2885671

Correlation Between Price Premium and Price Relative

cor(mydata$PricePremium, mydata$PriceRelative)
## [1] 0.03184654

Correlation Between Price Economy & Pitch Difference

cor(mydata$PriceEconomy, mydata$PitchDifference)
## [1] -0.09952511

Correlation between price premium & Pitch Difference

cor(mydata$PricePremium, mydata$PitchDifference)
## [1] -0.01806629

Correlation between price economy & width difference

cor(mydata$PriceEconomy, mydata$WidthDifference)
## [1] -0.08449975

Correlation between price premium & width difference

cor(mydata$PricePremium, mydata$WidthDifference)
## [1] -0.01151218

Correlation between price economy and flight duration

cor(mydata$PriceEconomy, mydata$FlightDuration)
## [1] 0.5666404

Correlation between price premium and flight duration

cor(mydata$PricePremium, mydata$FlightDuration)
## [1] 0.6487398

# Correlation Matrix

x <- mydata[,c("PriceRelative", "WidthDifference", "PitchDifference", "FlightDuration", "PercentPremiumSeats")]
y <- mydata[,c("PriceEconomy", "PricePremium")]
cor(x,y)
##                     PriceEconomy PricePremium
## PriceRelative        -0.28856711   0.03184654
## WidthDifference      -0.08449975  -0.01151218
## PitchDifference      -0.09952511  -0.01806629
## FlightDuration        0.56664039   0.64873981
## PercentPremiumSeats   0.06532232   0.11639097

# Corrgram Representation

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(mydata, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of mydata intercorrelations")

PitchPremium, widthPremium, Pitch Difference & Width Difference have stong positive associaton There is a strong positive association between PriceEconomy & Price Premium There is a strong positive association between flightduration and price premium and flight duration and price economy. There is a negative relationship between seats total and price economy and seats total and price premium ====================================================

# Variance-Covariance Matrix

x <- mydata[,c("PriceRelative", "WidthDifference", "PitchDifference", "FlightDuration", "PercentPremiumSeats")]
y <- mydata[,c("PriceEconomy", "PricePremium")]
cov(x,y)
##                     PriceEconomy PricePremium
## PriceRelative         -128.49992     18.48429
## WidthDifference        -99.31545    -17.63614
## PitchDifference       -173.27806    -40.99816
## FlightDuration        1983.54017   2959.97830
## PercentPremiumSeats    312.61077    726.01582

Hypothesis Testing

Is Difference between mean of Prices of Economy and Premium class significant?

t.test(PriceEconomy,PricePremium)
## 
##  Welch Two Sample t-test
## 
## data:  PriceEconomy and PricePremium
## t = -6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0831 -369.2793
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

Difference between mean of Prices of Economy and Premium class is significant.

Linear Regression Model

Is there a relationship between Price Economy and PriceRelative, Flight Duration, Pitch Diffdrence and Width Difference? Is there a relationship between Price Premium and PriceRelative, Flight Duration, Pitch Diffdrence and Width Difference?

PriceEconomy= beta_0 + beta_1 PriceRelative + beta_2 FlightDuration + beta_3 WidthDifference + beta_4 PitchDifference PricePremium= beta_0 + beta_1 PriceRelative + beta_2 FlightDuration + beta_3 WidthDifference + beta_4 PitchDifference

For PriceEconomy

Null Hypotheses= beta_0+ beta_1 + beta_2 + beta_3 + beta_4 = 0 Alternative Hypotheses= beta_0+ beta_1 + beta_2 + beta_3 + beta_4 ≠ 0 atleast one of the independent variable affects PriceEconomy

m1 <- lm(PriceEconomy ~ PriceRelative + FlightDuration + PitchDifference + WidthDifference, data=mydata)
summary(m1)
## 
## Call:
## lm(formula = PriceEconomy ~ PriceRelative + FlightDuration + 
##     PitchDifference + WidthDifference, data = mydata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1578.18  -457.08    -2.81   550.14  1714.01 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       298.44     166.78   1.789   0.0742 .  
## PriceRelative   -1029.69      87.31 -11.793  < 2e-16 ***
## FlightDuration    182.07       9.66  18.849  < 2e-16 ***
## PitchDifference   -30.80      29.54  -1.043   0.2977    
## WidthDifference   218.31      44.91   4.861 1.61e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 709.9 on 453 degrees of freedom
## Multiple R-squared:  0.4885, Adjusted R-squared:  0.484 
## F-statistic: 108.2 on 4 and 453 DF,  p-value: < 2.2e-16

We reject the null hypothesis and estabilish that PriceRelative, FlightDuration,and Width Difference affect the Price of an economy ticket as P-value of F statistic is very low. While Pitch Difference has statistically insignicant influence on price of economy tickets.

Beta Coefficients

m1$coefficients
##     (Intercept)   PriceRelative  FlightDuration PitchDifference 
##        298.4373      -1029.6893        182.0740        -30.7959 
## WidthDifference 
##        218.3078

Confidentce Interval

confint(m1)
##                       2.5 %     97.5 %
## (Intercept)       -29.33082  626.20546
## PriceRelative   -1201.28243 -858.09613
## FlightDuration    163.09080  201.05722
## PitchDifference   -88.84314   27.25133
## WidthDifference   130.04826  306.56739

# Visualizing the beta coefficients

library(coefplot)
## Warning: package 'coefplot' was built under R version 3.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.3
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
coefplot(m1, predictors=c("PriceRelative", "FlightDuration", "PitchDifference", "WidthDifference"))
## Warning: Ignoring unknown aesthetics: xmin, xmax

We can infer that only pitch difference is statistically insignificant as it includes zero while others as in WifthDifference, Price Relative and Flight Duration are statistically significant for influencing price of economy ticket.

For Premium Tickets

Null Hypotheses= beta_0 + beta_1 + beta_2 + beta_3 + beta_4 = 0 Alternative Hypotheses= beta_0+ beta_1 + beta_2 + beta_3 + beta_4 ≠ 0 atleast one of the independent variable affects Price Premium.

m2 <- lm(PricePremium ~ PriceRelative + FlightDuration + PitchDifference + WidthDifference, data=mydata)
summary(m2)
## 
## Call:
## lm(formula = PricePremium ~ PriceRelative + FlightDuration + 
##     PitchDifference + WidthDifference, data = mydata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2197.0  -603.2   -31.7   794.7  4423.3 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       201.89     228.12   0.885  0.37661    
## PriceRelative    -282.79     119.42  -2.368  0.01831 *  
## FlightDuration    246.94      13.21  18.690  < 2e-16 ***
## PitchDifference   -61.31      40.40  -1.517  0.12984    
## WidthDifference   195.88      61.43   3.189  0.00153 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 971 on 453 degrees of freedom
## Multiple R-squared:  0.4368, Adjusted R-squared:  0.4318 
## F-statistic: 87.83 on 4 and 453 DF,  p-value: < 2.2e-16

We reject the null hypothesis and estabilish that PriceRelative, FlightDuration,and Width Difference affect the Price of an premium ticket as P-value of F statistic is very low. While Pitch Difference has statistically insignicant influence on price of premium tickets.

Beta Coefficients

m2$coefficients
##     (Intercept)   PriceRelative  FlightDuration PitchDifference 
##       201.89111      -282.78902       246.93562       -61.30538 
## WidthDifference 
##       195.87525

Confidentce Interval

confint(m2)
##                      2.5 %    97.5 %
## (Intercept)     -246.41009 650.19231
## PriceRelative   -517.48363 -48.09442
## FlightDuration   220.97155 272.89969
## PitchDifference -140.69884  18.08808
## WidthDifference   75.15922 316.59128

# Visualizing the beta coefficients

coefplot(m2, predictors=c("PriceRelative", "FlightDuration", "PitchDifference", "WidthDifference"))
## Warning: Ignoring unknown aesthetics: xmin, xmax

We can infer that only pitch difference is statistically insignificant as it includes zero while others as in WifthDifference, Price Relative and Flight Duration are statistically significant for influencing price of Premium ticket.

Finally we can conclude, according to this analysis the factors explaining the difference between the price of economy tickets and price of premium tickets.