setwd("C:/Users/Prabha Shankar/Desktop/Winter Internship/R file")
var1.df <- read.csv("SixAirlinesDataV2.csv")
summary(var1.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
library(psych)
## Warning: package 'psych' was built under R version 3.3.3

FlightDuration description

describe(var1.df$FlightDuration)
##    vars   n mean   sd median trimmed  mad  min   max range  skew kurtosis
## X1    1 458 7.58 3.54   7.79    7.57 4.81 1.25 14.66 13.41 -0.07    -1.12
##      se
## X1 0.17

TravelMonth description

table(var1.df$TravelMonth)
## 
## Aug Jul Oct Sep 
## 127  75 127 129
barplot(table(var1.df$TravelMonth), xlab="Month", ylab = "No. of Flights", col="grey" )

boxplot(var1.df$FlightDuration, horizontal = TRUE, xlab="Duration(hrs)")

IsInternational description

table(var1.df$IsInternational)
## 
##      Domestic International 
##            40           418
barplot(table(var1.df$IsInternational), ylab = "No. of Flights", col="grey" )

SeatsEconomy description

describe(var1.df$SeatsEconomy)
##    vars   n   mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 458 202.31 76.37    185  194.64 85.99  78 389   311 0.72    -0.36
##      se
## X1 3.57

SeatsPremium description

describe(var1.df$SeatsPremium)
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 458 33.65 13.26     36   33.35 11.86   8  66    58 0.23    -0.46
##      se
## X1 0.62
boxplot(var1.df$SeatsPremium, horizontal = TRUE, xlab="No. of Seats")

boxplot(var1.df$SeatsEconomy, horizontal = TRUE, xlab="No. of Seats")

##PitchEconomy description

describe(var1.df$PitchEconomy)
##    vars   n  mean   sd median trimmed mad min max range  skew kurtosis
## X1    1 458 31.22 0.66     31   31.26   0  30  33     3 -0.03    -0.35
##      se
## X1 0.03
barplot(table(var1.df$PitchEconomy),xlab="Pitch(Inches)", ylab = "No. of Flights", col="grey" )

PitchPremium description

describe(var1.df$PitchPremium)
##    vars   n  mean   sd median trimmed mad min max range  skew kurtosis
## X1    1 458 37.91 1.31     38   38.05   0  34  40     6 -1.51     3.52
##      se
## X1 0.06
barplot(table(var1.df$PitchPremium),xlab="Pitch(Inches)", ylab = "No. of Flights", col="grey" )

WidthEconomy description

describe(var1.df$WidthEconomy)
##    vars   n  mean   sd median trimmed mad min max range  skew kurtosis
## X1    1 458 17.84 0.56     18   17.81   0  17  19     2 -0.04    -0.08
##      se
## X1 0.03
barplot(table(var1.df$WidthEconomy),xlab="Width(Inches)", ylab = "No. of Flights", col="grey" )

WidthPremium description

describe(var1.df$WidthPremium)
##    vars   n  mean  sd median trimmed mad min max range  skew kurtosis   se
## X1    1 458 19.47 1.1     19   19.53   0  17  21     4 -0.08    -0.31 0.05
barplot(table(var1.df$WidthPremium),xlab="Width(Inches)", ylab = "No. of Flights", col="grey" )

PriceEconomy description

describe(var1.df$PriceEconomy)
##    vars   n    mean     sd median trimmed     mad min  max range skew
## X1    1 458 1327.08 988.27   1242  1244.4 1159.39  65 3593  3528 0.51
##    kurtosis    se
## X1    -0.88 46.18
boxplot(var1.df$PriceEconomy, horizontal = TRUE, xlab="Ticket Price(USD)")

PricePremium description

describe(var1.df$PricePremium)
##    vars   n    mean      sd median trimmed     mad min  max range skew
## X1    1 458 1845.26 1288.14   1737 1799.05 1845.84  86 7414  7328  0.5
##    kurtosis    se
## X1     0.43 60.19
boxplot(var1.df$PricePremium, horizontal = TRUE, xlab="Ticket Price(USD)")

PriceRelative description

describe(var1.df$PriceRelative)
##    vars   n mean   sd median trimmed  mad  min  max range skew kurtosis
## X1    1 458 0.49 0.45   0.36    0.42 0.41 0.02 1.89  1.87 1.17     0.72
##      se
## X1 0.02
boxplot(var1.df$PriceRelative, horizontal = TRUE, xlab="
(PricePremium - PriceEconomy) / PriceEconomy")

SeatsTotal description

describe(var1.df$SeatsTotal)
##    vars   n   mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 458 235.96 85.29    227  228.73 90.44  98 441   343  0.7    -0.53
##      se
## X1 3.99
boxplot(var1.df$SeatsTotal, horizontal = TRUE, xlab="No. of Seats")

PercentPremiumSeats description

describe(var1.df$PercentPremiumSeats)
##    vars   n  mean   sd median trimmed  mad  min   max range skew kurtosis
## X1    1 458 14.65 4.84  13.21   14.31 2.68 4.71 24.69 19.98 0.71     0.28
##      se
## X1 0.23
boxplot(var1.df$PercentPremiumSeats, horizontal = TRUE, xlab="Percentage of Premium Seats in Aircraft")

PitchDifference description

describe(var1.df$PitchDifference)
##    vars   n mean   sd median trimmed mad min max range  skew kurtosis   se
## X1    1 458 6.69 1.76      7    6.76   0   2  10     8 -0.54     1.78 0.08
barplot(table(var1.df$PitchDifference),xlab="PitchDifference(Inches)", ylab = "No. of Flights", col="grey" )

WidthDifference description

describe(var1.df$WidthDifference)
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 458 1.63 1.19      1    1.53   0   0   4     4 0.84    -0.53 0.06
barplot(table(var1.df$WidthDifference),xlab="WidthDifference(Inches)", ylab = "No. of Flights", col="grey" )

Corgram

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.3.3
corrgram(var1.df, order=FALSE, 
         lower.panel=panel.shade,
         upper.panel=panel.pie, 
         text.panel=panel.txt,
         main="Corrgram of analyze relations between variable of dataframe")

Pearson’s Corelaation Test

A.Test on the correlation between difference of price and PitchDifference .

cor.test((var1.df$PricePremium-var1.df$PriceEconomy),var1.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  (var1.df$PricePremium - var1.df$PriceEconomy) and var1.df$PitchDifference
## t = 2.7688, df = 456, p-value = 0.005855
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03739893 0.21764764
## sample estimates:
##       cor 
## 0.1285851
library(car)
## Warning: package 'car' was built under R version 3.3.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot((var1.df$PricePremium-var1.df$PriceEconomy),var1.df$PitchDifference)

B.Test on the corelation between difference of price and WidthDifference.

cor.test((var1.df$PricePremium-var1.df$PriceEconomy),var1.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  (var1.df$PricePremium - var1.df$PriceEconomy) and var1.df$WidthDifference
## t = 2.5291, df = 456, p-value = 0.01177
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02627012 0.20700978
## sample estimates:
##       cor 
## 0.1176138
library(car)
scatterplot((var1.df$PricePremium-var1.df$PriceEconomy),var1.df$WidthDifference)

C.Test on the corelation between difference of price and FlightDuration .

cor.test((var1.df$PricePremium-var1.df$PriceEconomy),var1.df$FlightDuration)
## 
##  Pearson's product-moment correlation
## 
## data:  (var1.df$PricePremium - var1.df$PriceEconomy) and var1.df$FlightDuration
## t = 11.435, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3976578 0.5403379
## sample estimates:
##       cor 
## 0.4720837
library(car)
scatterplot((var1.df$PricePremium-var1.df$PriceEconomy),var1.df$FlightDuration)

The above correlations tests yield or suggest that the difference in pricing of the 2 class of tickets depends strongly on the flightduration since p vlue is significantly less (i.e.2.2e-16) and also on the pitch and width difference.(p-value<0.05).

T-Test

Null Hypothesis : there is no difference between an economy class ticket and a premium economy class ticket.

t.test(var1.df$PriceEconomy,var1.df$PricePremium,var.equal = TRUE,paired = FALSE)
## 
##  Two Sample t-test
## 
## data:  var1.df$PriceEconomy and var1.df$PricePremium
## t = -6.8304, df = 914, p-value = 1.544e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0699 -369.2926
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

The null hypothesis is rejected because the t-Test gives a very low p-value and there is a difference between economy class and premium economy class tickets.

Regression Analysis

var2 <- (var1.df$PricePremium-var1.df$PriceEconomy) ~ var1.df$PitchDifference+var1.df$WidthDifference+var1.df$FlightDuration
var3 <- lm(var2)
summary(var3)
## 
## Call:
## lm(formula = var2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -859.4 -324.7  -62.7  150.1 3331.5 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -286.933    117.833  -2.435   0.0153 *  
## var1.df$PitchDifference   10.387     20.779   0.500   0.6174    
## var1.df$WidthDifference   74.641     30.977   2.410   0.0164 *  
## var1.df$FlightDuration    80.992      6.754  11.992   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.1 on 454 degrees of freedom
## Multiple R-squared:  0.2538, Adjusted R-squared:  0.2489 
## F-statistic: 51.48 on 3 and 454 DF,  p-value: < 2.2e-16

Observation

A.Beta coefficients of Model.

var3$coefficients
##             (Intercept) var1.df$PitchDifference var1.df$WidthDifference 
##              -286.93258                10.38682                74.64098 
##  var1.df$FlightDuration 
##                80.99227

B . Confidence Intervals on the beta coefficients.

confint(var3)
##                              2.5 %    97.5 %
## (Intercept)             -518.49881 -55.36635
## var1.df$PitchDifference  -30.44766  51.22130
## var1.df$WidthDifference   13.76513 135.51683
## var1.df$FlightDuration    67.72008  94.26446

C.Plot of the model.

library(car)
plot(var2)

abline(var2)

Summary

1.The data set is normally distributed therfore we can easily perform the regression analysis . 2. As we can see from the regression analysis, that the difference in price between an economy ticket and a premium-economy ticket (PriceRelative) depends significantly on FlightDuration and WidthDifference and less significantly on PitchDifference .