Reading database

setwd("C:/Users/Jaya/Downloads")
airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))

Viewing Database

library(psych)
View(airlines.df)

column Summary

summary(airlines.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Structure of data set

str(airlines.df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

Descirbing dataset

library(psych)
describe(airlines.df)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23

Comparing the effects of variables on relative pricing.

=> Airline

library(lattice)
boxplot(airlines.df$PriceRelative ~airlines.df$Airline , 
       xlab = "airline")

correlation between Airline and Relative Price.

=> Aircraft

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(airlines.df$PriceRelative~airlines.df$Aircraft,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.Aircraft ",
            xlab="Aircraft",
            ylab="Price Relative")

##  [1] "212" "308" "156" "157" "158" "159" "379" "380" "381" "382"

So we can say that the model of aircraft And relative price have a weak correlation.

=> Flight Duration

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$FlightDuration,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.Flight Duration ",
            xlab="Flight Duration",
            ylab="Price Relative")

correlation between Relative Price and FlightDuration

=> Travel Month

 library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$TravelMonth,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.PitchPremium ",
            xlab="Month of Travel",
            ylab="Price Relative")

##  [1] "157" "160" "379" "440" "159" "162" "381" "158" "161" "380" "406"

Month of travel does not have any correlation with Relative Price.

=> IsInternational

library(lattice)
boxplot(airlines.df$PriceRelative ~airlines.df$IsInternational , 
       ylab="Relative Price")

correlation between IsInternational and Relative Price.

=> Pitch

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$PitchEconomy,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.PitchEconomy ",
            xlab="PitchEconomy",
            ylab="Price Relative")

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$PitchPremium,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.PitchPremium ",
            xlab="PitchPremium",
            ylab="Price Relative")

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$PitchDifference,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.PitchDifference ",
            xlab="PitchDifference",
            ylab="Price Relative")

Therefore Pitch Economy,Pitch Premium and pitch Diff have a correlation with Relative price.

=> Width

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$WidthEconomy,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.WidthEconomy ",
            xlab="WidthEconomy",
            ylab="Price Relative")

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$WidthPremium,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.WidthPremium ",
            xlab="WidthPremium",
            ylab="Price Relative")

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$WidthDifference,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.WidthDifference ",
            xlab="WidthDifference",
            ylab="Price Relative")

Therefore WidthEconomy does not have a statistically significant correlation Whereas,WidthPremium and WidthDifference has a statistically significant correlation.

=> Number of Seats

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$SeatsEconomy,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.SeatsEconomy ",
            xlab="SeatsEconomy",
            ylab="Price Relative")

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$SeatsPremium,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.SeatsPremium ",
            xlab="SeatsPremium",
            ylab="Price Relative")

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$SeatsTotal,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Price Relative vs.SeatsTotal ",
            xlab="SeatsTotal",
            ylab="Price Relative")

Therefore Seats_Economy and Seats Total are not the factors but Seats_Premium has a negative correlation with Price_Relative

=> Percent of Premium seats

library(car)
scatterplot(airlines.df$PriceRelative~airlines.df$PercentPremiumSeats,spread=FALSE, smoother.args=list(lty=2), pch=19,
            main="Scatterplot of Relative price vs Percent of Premium seats ",
            xlab="Percent of Premium seats",
            ylab="Relative price")

negative correlation with Relative price.

So from the above data we conclude that, we get the factors which affect the Relative Pricing. Airline, Flightduration , IsInternational, Pitch Economy, Pitch Premium, Pitch Difference, Width Premium , Width Difference, Seats Premium

Scatterplot matrix of Variables

library(car)
scatterplotMatrix(airlines.df[,c("PriceRelative","Airline","FlightDuration","IsInternational","PitchEconomy","PitchPremium","PitchDifference","WidthPremium","WidthDifference","SeatsPremium")], 
                  spread=FALSE, smoother.args=list(lty=2),
                  main="Scatter Plot Matrix")
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

Corrgram matrix of variables

library(corrgram)
corrgram(airlines.df, order=FALSE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of Variables")

Positively Correlated factors:-

PriceRelative, Airline, Flight duration, IsInternational, PitchPremium, Pitch Difference, WidthPremium, Width Difference

Negatively Correlatted factors:-

PriceRelative, PitchEconomy, WidthEconomy

Hypothesis test

Null Hypothesis-“The relative price does not depends on any of the following variables- -Airline -Flight duration -IsInternational -PitchEconomy -PitchPremium -Pitch Difference -WidthEconomy -WidthPremium -Width Difference”

T-Test to check correlation between PriceRelative and IsInternational

t.test(airlines.df$PriceRelative ~ airlines.df$IsInternational )
## 
##  Welch Two Sample t-test
## 
## data:  airlines.df$PriceRelative by airlines.df$IsInternational
## t = -19.451, df = 446.12, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4855215 -0.3964139
## sample estimates:
##      mean in group Domestic mean in group International 
##                   0.0847500                   0.5257177

p-value=2.2e-16(<0.05).

Since the p value is less that 0.05 We reject the null hypothesis and conclude that There is a strong correlation between Relative price and IsInternational.

Regression model

Converting the variable Airline and IsInternational into a factor variable

str(airlines.df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

Running Regression model and displaying Beta Coefficients

model <- lm(PriceRelative ~Airline
+FlightDuration
+IsInternational
+PitchEconomy
+PitchPremium
+WidthEconomy
+WidthPremium
 ,  data=airlines.df)
summary(model)
## 
## Call:
## lm(formula = PriceRelative ~ Airline + FlightDuration + IsInternational + 
##     PitchEconomy + PitchPremium + WidthEconomy + WidthPremium, 
##     data = airlines.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8159 -0.1933 -0.0549  0.1009  1.4689 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -0.904967   4.976180  -0.182 0.855775    
## AirlineBritish                0.250299   0.159931   1.565 0.118283    
## AirlineDelta                  0.061917   0.354717   0.175 0.861510    
## AirlineJet                    0.534838   0.140825   3.798 0.000166 ***
## AirlineSingapore              0.322987   0.204182   1.582 0.114390    
## AirlineVirgin                 0.417100   0.326186   1.279 0.201661    
## FlightDuration                0.036153   0.006173   5.856  9.2e-09 ***
## IsInternationalInternational -0.406100   0.399055  -1.018 0.309394    
## PitchEconomy                 -0.055148   0.133621  -0.413 0.680012    
## PitchPremium                  0.085155   0.151506   0.562 0.574361    
## WidthEconomy                 -0.073197   0.078098  -0.937 0.349140    
## WidthPremium                  0.052925   0.222769   0.238 0.812319    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.36 on 446 degrees of freedom
## Multiple R-squared:  0.3772, Adjusted R-squared:  0.3618 
## F-statistic: 24.55 on 11 and 446 DF,  p-value: < 2.2e-16
model$coefficients
##                  (Intercept)               AirlineBritish 
##                  -0.90496697                   0.25029934 
##                 AirlineDelta                   AirlineJet 
##                   0.06191693                   0.53483769 
##             AirlineSingapore                AirlineVirgin 
##                   0.32298704                   0.41709999 
##               FlightDuration IsInternationalInternational 
##                   0.03615307                  -0.40609991 
##                 PitchEconomy                 PitchPremium 
##                  -0.05514750                   0.08515457 
##                 WidthEconomy                 WidthPremium 
##                  -0.07319684                   0.05292480

Comparision of Model with Dataset provided

predictedProfit = data.frame(fitted(model)) 
actualProfit = data.frame(airlines.df$PriceRelative)
Comparison = cbind(actualProfit, predictedProfit)
View(Comparison)
some(Comparison)
##     airlines.df.PriceRelative fitted.model.
## 41                       0.24     0.2920278
## 125                      0.17     0.2197217
## 139                      0.19     0.2406904
## 172                      0.84     0.8025652
## 241                      1.13     0.5299150
## 300                      0.07     0.1102492
## 342                      0.08     0.1924596
## 362                      0.03     0.3461101
## 372                      0.14     0.6354819
## 420                      0.61     0.6085275

So we can say that the Model predicts the Relative price values near to dataset values.

Summary

Airline Company : Lowest relative price difference in Airlines are AirFrance and Delta , Highest being Jet Airlines.

Type of Flight(International/Domestic) : t.Domestic Flights have low Difference in prices of tickets than International flights.

Increase in Flight duration, Pitch and width Results in Increasing Relative Price.