Reading the data

airline.df<-read.csv(paste("SixAirlinesDataV2.csv"))
View(airline.df)
summary(airline.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Summarizing the dataset

str(airline.df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...

Calculating the means for all variables

mean(airline.df$FlightDuration)
## [1] 7.577838
mean(airline.df$SeatsEconomy)
## [1] 202.3122
mean(airline.df$SeatsPremium)
## [1] 33.64847
mean(airline.df$PitchEconomy)
## [1] 31.21834
mean(airline.df$WidthEconomy)
## [1] 17.83843
mean(airline.df$WidthPremium)
## [1] 19.47162
mean(airline.df$PriceEconomy)
## [1] 1327.076
mean(airline.df$PricePremium)
## [1] 1845.258
mean(airline.df$PriceRelative)
## [1] 0.4872052
mean(airline.df$SeatsTotal)
## [1] 235.9607
mean(airline.df$PitchDifference)
## [1] 6.687773
mean(airline.df$WidthDifference)
## [1] 1.633188
mean(airline.df$PercentPremiumSeats)
## [1] 14.64541

Calculating medians for all variables

median(airline.df$FlightDuration)
## [1] 7.79
median(airline.df$SeatsEconomy)
## [1] 185
median(airline.df$SeatsPremium)
## [1] 36
median(airline.df$PitchEconomy)
## [1] 31
median(airline.df$WidthEconomy)
## [1] 18
median(airline.df$WidthPremium)
## [1] 19
median(airline.df$PriceEconomy)
## [1] 1242
median(airline.df$PricePremium)
## [1] 1737
median(airline.df$PriceRelative)
## [1] 0.365
median(airline.df$SeatsTotal)
## [1] 227
median(airline.df$PitchDifference)
## [1] 7
median(airline.df$WidthDifference)
## [1] 1
median(airline.df$PercentPremiumSeats)
## [1] 13.21

Calculating the standard deviations for all variables

sd(airline.df$FlightDuration)
## [1] 3.542064
sd(airline.df$SeatsEconomy)
## [1] 76.37353
sd(airline.df$SeatsPremium)
## [1] 13.26142
sd(airline.df$PitchEconomy)
## [1] 0.6551695
sd(airline.df$WidthEconomy)
## [1] 0.5575102
sd(airline.df$WidthPremium)
## [1] 1.097173
sd(airline.df$PriceEconomy)
## [1] 988.2733
sd(airline.df$PricePremium)
## [1] 1288.136
sd(airline.df$PriceRelative)
## [1] 0.4505873
sd(airline.df$SeatsTotal)
## [1] 85.29315
sd(airline.df$PitchDifference)
## [1] 1.761708
sd(airline.df$WidthDifference)
## [1] 1.189281
sd(airline.df$PercentPremiumSeats)
## [1] 4.842451

Visualising the dataset

boxplot(FlightDuration~Airline,data = airline.df,
        xlab="Airlines",ylab="FlightDuration",
        main="FlightDuration for Different Airlines")

boxplot(FlightDuration~Aircraft,data = airline.df,
        xlab="Aircraft",ylab="FlightDuration",
        main="FlightDuration for Different Aircrafts")

boxplot(SeatsEconomy~Airline,data = airline.df,
        xlab="Airlines",ylab="Economy Seats",
        main="Economy seats for different airlines")

boxplot(SeatsEconomy~Aircraft,data = airline.df,
        xlab="Aircraft",ylab="Economy Seats",
        main="Economy seats for different aircraft")

boxplot(SeatsPremium~Airline,data = airline.df,
        xlab="Airline",ylab="Premium Economy Seats",
        main="Premium Economy seats for different airlines")

boxplot(SeatsPremium~Aircraft,data = airline.df,
        xlab="Aircraft",ylab="Premium Economy Seats",
        main="Premium Economy seats for different aircrafts")

boxplot(PriceEconomy~Airline,data = airline.df,
        xlab="Airlines",ylab="Price for Economy Seats",
        main="Price for Economy seats in different airlines")

boxplot(PriceEconomy~Aircraft,data = airline.df,
        xlab="Aircraft",ylab="Price for Economy Seats",
        main="Price for Economy seats for different aircraft")

boxplot(PricePremium~Airline,data = airline.df,
        xlab="Airline",ylab="Price for Premium Economy Seats",
        main="Price for Premium Economy seats in different airlines")

boxplot(PricePremium~Aircraft,data = airline.df,
        xlab="Aircraft",ylab="Price for Premium Economy Seats",
        main="Price for Premium Economy seats in different aircrafts")

Visualising through Scatterplots

library(car)
scatterplotMatrix(formula=~SeatsEconomy+SeatsPremium+PriceEconomy+PricePremium,cex=0.6,data = airline.df)

library(car)
scatterplotMatrix(formula=~PriceRelative+PitchDifference+WidthDifference+PercentPremiumSeats,cex=0.6,data = airline.df)

Using Corrgram

library(corrgram)
corrgram(airline.df, order = T, text.panel=panel.txt,
         lower.panel = panel.shade,
         upper.panel = panel.pie, main="Corrgram of all variables")

Testing correlation between PriceRelative and PitchDifference

Null Hypothesis - There is no correlation between the relative prices of Economy and premium classes and PitchDifference between them.

cor.test(airline.df$PriceRelative,airline.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceRelative and airline.df$PitchDifference
## t = 11.331, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3940262 0.5372817
## sample estimates:
##       cor 
## 0.4687302

Since p<0.05 thus we can reject our null hypothesis.

Testing correlation between PriceRelative and WidthDifference

Null Hypothesis - There is no correlation between the relative prices of Economy and premium classes and WidthDifference between them.

cor.test(airline.df$PriceRelative,airline.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  airline.df$PriceRelative and airline.df$WidthDifference
## t = 11.869, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4125388 0.5528218
## sample estimates:
##       cor 
## 0.4858024

SInce p<0.05 we cab reject our null hypothesis.

Regression Model

regmodel<-lm(airline.df$PriceRelative~airline.df$PitchDifference+airline.df$WidthDifference)
summary(regmodel)
## 
## Call:
## lm(formula = airline.df$PriceRelative ~ airline.df$PitchDifference + 
##     airline.df$WidthDifference)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84163 -0.28484 -0.07241  0.17698  1.18778 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -0.10514    0.08304  -1.266 0.206077    
## airline.df$PitchDifference  0.06019    0.01590   3.785 0.000174 ***
## airline.df$WidthDifference  0.11621    0.02356   4.933 1.14e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3886 on 455 degrees of freedom
## Multiple R-squared:  0.2593, Adjusted R-squared:  0.2561 
## F-statistic: 79.65 on 2 and 455 DF,  p-value: < 2.2e-16