MINI-PROJECT ON AIRLINE TICKET PRICING ANALYSIS UNDER GUIDANCE OF PROF.SAMEER MATHUR(PH.D) IIM-LUCKNOW.

     ----------------Premium Economy Vs Economy Ticket Pricing by Airlines--------------
   

Mini project On Airline Ticket pricing

#reading the csv file into R
sixairlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
attach(sixairlines.df)
library(psych)
summary(sixairlines.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

using describe command to display the following content.

describe(sixairlines.df)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23
                          --------Calculating the mean & medians-----------
mean(sixairlines.df$FlightDuration)
## [1] 7.577838
mean(sixairlines.df$SeatsEconomy)
## [1] 202.3122
mean(sixairlines.df$SeatsPremium)
## [1] 33.64847
mean(sixairlines.df$PitchEconomy)
## [1] 31.21834
mean(sixairlines.df$WidthEconomy)
## [1] 17.83843
mean(sixairlines.df$WidthPremium)
## [1] 19.47162
mean(sixairlines.df$PriceEconomy)
## [1] 1327.076
mean(sixairlines.df$PricePremium)
## [1] 1845.258
mean(sixairlines.df$PriceRelative)
## [1] 0.4872052
mean(sixairlines.df$SeatsTotal)
## [1] 235.9607
mean(sixairlines.df$PitchDifference)
## [1] 6.687773
mean(sixairlines.df$WidthDifference)
## [1] 1.633188
            --------------------- medians for the following analysis -----------------------
median(sixairlines.df$FlightDuration)
## [1] 7.79
median(sixairlines.df$SeatsEconomy)
## [1] 185
median(sixairlines.df$SeatsPremium)
## [1] 36
median(sixairlines.df$PitchEconomy)
## [1] 31
median(sixairlines.df$WidthEconomy)
## [1] 18
median(sixairlines.df$WidthPremium)
## [1] 19
median(sixairlines.df$PriceEconomy)
## [1] 1242
median(sixairlines.df$PricePremium)
## [1] 1737
median(sixairlines.df$PriceRelative)
## [1] 0.365
median(sixairlines.df$SeatsTotal)
## [1] 227
median(sixairlines.df$PitchDifference)
## [1] 7
median(sixairlines.df$WidthDifference)
## [1] 1

using plots for more effective way to analyse the data

##  Price analysis for premium aswell as Economy.
plot(~PriceEconomy + PricePremium, main="Premium Economy Price vs Economy Price")
abline(0,1)

using histogram to know the pitchdifference and total percentage.

## Histogram for Pitchdifference
library(lattice)
 histogram(~PitchDifference, data = sixairlines.df,
 main = "Pitch Difference Analysis", xlab="Difference in Pitch", ylab = "Total_percentage", col='yellow' ) 

using histogram to know the seatwidthdifference and total percentage.

## Histogram for Widthdifference
library(lattice)
 histogram(~WidthDifference, data = sixairlines.df,
 main = "Distribution of Difference in Seat Width", xlab="Difference in Seat Width", ylab = "Total_percentage", col='orange' )

##analysing pitch difference
pitchDifference <- table(sixairlines.df$PitchDifference)
pitchDifference
## 
##   2   3   6   7  10 
##  24  16 121 243  54
     - - - - - - - - - - - Scatterplots for the various classes - - - - - -  - - - -  - - -  
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
attach(sixairlines.df)
## The following objects are masked from sixairlines.df (pos = 6):
## 
##     Aircraft, Airline, FlightDuration, IsInternational,
##     PercentPremiumSeats, PitchDifference, PitchEconomy,
##     PitchPremium, PriceEconomy, PricePremium, PriceRelative,
##     SeatsEconomy, SeatsPremium, SeatsTotal, TravelMonth,
##     WidthDifference, WidthEconomy, WidthPremium
scatterplot(PriceEconomy~FlightDuration,main="Scatterplot for PriceEconomy versus FlightDuration", xlab="Flight_Duration",ylab="Price_Economy")

scatterplot(PricePremium~FlightDuration,main="Scatterplot for Price Premium versus FlightDuration", xlab="Flight_Duration",ylab="Price_Premium", col='red')

library(car)
scatterplotMatrix(formula=~SeatsEconomy+SeatsPremium+PriceEconomy+PricePremium,cex=0.8,data = sixairlines.df)

library(car)
scatterplotMatrix(formula=~PriceRelative+PitchDifference+WidthDifference+PercentPremiumSeats,cex=0.8,data = sixairlines.df)

counts <- table(sixairlines.df$Airline)
barplot(counts,main="Distribution of airlines",xlab="Airline",col = 'brown')

##visualizing the data using boxplot
boxplot(FlightDuration~Airline,data = sixairlines.df,xlab="Airlines",ylab="FlightDuration",col=c("skyblue","purple","orange","green","yellow"),main="Flight Duration for Different Airlines")

   - - - - - - - Boxplot to analyse various categories - - - - - - - - - - 
boxplot(sixairlines.df$PitchDifference,main="Pitch Difference",horizontal=TRUE, col="blue")

boxplot(sixairlines.df$WidthDifference,main="Width Difference",horizontal=TRUE, col="blue")

boxplot(sixairlines.df$PriceEconomy~sixairlines.df$Airline,col=c("skyblue","purple","orange","green","yellow"),main="Airlines versus Price_Economy",horizontal = TRUE)

plot(sixairlines.df$Airline,sixairlines.df$PriceRelative,main="Flight Type vs Relative Price",xlab="Flight Type",ylab="Relative Price",col=c("grey","pink","red","white","black"),horizontal = TRUE)

boxplot(sixairlines.df$PricePremium , xlab= "PricePremium ", main= "PricePremium distribution boxplot",col="green", horizontal = TRUE)

``` ———using corrgram————-

library(corrgram)
corrgram(sixairlines.df, order = T, text.panel=panel.txt,lower.panel = panel.shade,upper.panel = panel.pie, main="Corrgram of all variables")

    ------testing correlation between priceRelative & pitchDifference-------------
cor.test(sixairlines.df$PriceRelative,sixairlines.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  sixairlines.df$PriceRelative and sixairlines.df$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3940262 0.5372817
## sample estimates:
##       cor 
## 0.4687302
       -------Null hypothesis because the test value is lessthan 0.05 we can straight forwardly reject the                                                      hypothesis----- 
    
    -------Testing correlation between PriceRelative & WidthDifference------------
cor.test(sixairlines.df$PriceRelative,sixairlines.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  sixairlines.df$PriceRelative and sixairlines.df$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4125388 0.5528218
## sample estimates:
##       cor 
## 0.4858024
     -------Null hypothesis because the test value is lessthan 0.05 we can straight forwardly reject the                                                      hypothesis----- 
    
                    ------------T-Test analysis-----------------------
    
t.test(sixairlines.df$PriceRelative, sixairlines.df$PitchDifference)
## 
##  Welch Two Sample t-test
## 
## data:  sixairlines.df$PriceRelative and sixairlines.df$PitchDifference
## t = -72.974, df = 516.54, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.367495 -6.033640
## sample estimates:
## mean of x mean of y 
## 0.4872052 6.6877729
t.test(sixairlines.df$PriceRelative,sixairlines.df$PercentPremiumSeats)
## 
##  Welch Two Sample t-test
## 
## data:  sixairlines.df$PriceRelative and sixairlines.df$PercentPremiumSeats
## t = -62.302, df = 464.91, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -14.60477 -13.71164
## sample estimates:
##  mean of x  mean of y 
##  0.4872052 14.6454148
                     ------Linear model analysis/regression model-------------
mini.df <- lm(formula = PriceRelative ~ PitchDifference + WidthDifference + PercentPremiumSeats, data = sixairlines.df)
summary(mini.df)
## 
## Call:
## lm(formula = PriceRelative ~ PitchDifference + WidthDifference + 
##     PercentPremiumSeats, data = sixairlines.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.88643 -0.29471 -0.05005  0.19013  1.17157 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.031508   0.097220  -0.324    0.746    
## PitchDifference      0.064596   0.016171   3.994 7.56e-05 ***
## WidthDifference      0.104782   0.024813   4.223 2.92e-05 ***
## PercentPremiumSeats -0.005764   0.003971  -1.451    0.147    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3882 on 454 degrees of freedom
## Multiple R-squared:  0.2627, Adjusted R-squared:  0.2579 
## F-statistic: 53.93 on 3 and 454 DF,  p-value: < 2.2e-16
fit <- lm(sixairlines.df$PriceRelative ~ sixairlines.df$TravelMonth)
summary(fit)
## 
## Call:
## lm(formula = sixairlines.df$PriceRelative ~ sixairlines.df$TravelMonth)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4908 -0.3779 -0.1179  0.2523  1.4321 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.47661    0.04005  11.899   <2e-16 ***
## sixairlines.df$TravelMonthJul  0.02205    0.06574   0.335    0.737    
## sixairlines.df$TravelMonthOct  0.04417    0.05665   0.780    0.436    
## sixairlines.df$TravelMonthSep -0.01871    0.05643  -0.332    0.740    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4514 on 454 degrees of freedom
## Multiple R-squared:  0.002997,   Adjusted R-squared:  -0.003591 
## F-statistic: 0.4549 on 3 and 454 DF,  p-value: 0.714
                                    -------Insight/conclusion-----------
                            
                            

There fore we can reject the following hypothisis because we got the values which are less than our standard value which is 0.05. still we can also state that the prices were really high when the travel month as well as class changes,as premium economy class has a huge facilites and they can still provide many other services for this passengers.this was the only reason many people chooses for their comfortness.still almost 83% was noticed by conducting the following tests.