R Markdown

This is an R Markdown document which contains the analysis of the Six Airlines Dataset.

Reading the Six Airlines dataset into R

setwd("D:/R Internship")
air_data<-read.csv(paste("SixAirlinesDataV2.csv",sep = ""))
View(air_data)

Summarize the data to understand the mean, median, standard deviation of each variable

dim(air_data)                                               
## [1] 458  18
summary(air_data)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Adding an extra column called price difference which stores the difference between Economy and Premium tickets

air_data$PriceDifference<-air_data$PricePremium - air_data$PriceEconomy

Box Plot between Airline and Price Difference

boxplot(PriceDifference~Airline,data = air_data,
        main="Boxplot of Price Difference between Economy and Premium tickets by Airline",
        xlab="Price Difference",ylab="Airline")

BarPlot between Airline and Price Difference

library(lattice)
## Warning: package 'lattice' was built under R version 3.3.3
price_airline<-aggregate(PriceDifference~Airline,data=air_data, mean)
price_airline
##     Airline PriceDifference
## 1 AirFrance        295.4324
## 2   British        643.5486
## 3     Delta        123.7391
## 4       Jet        207.1967
## 5 Singapore        379.6750
## 6    Virgin       1118.1613
barchart(PriceDifference~Airline,data = price_airline, col="gray",
         main="Bargraph of Price Difference Vs Airlines",
         xlab="Airlines",ylab="Price Difference")

Box Plot between Travel Month and Price Difference

boxplot(PriceDifference~TravelMonth,data = air_data,
        main="Boxplot of Price Difference between Economy and Premium tickets by Travel Month",xlab="Price Difference",ylab="Travel Month")

BarPlot between Travel Month and Price Difference

price_month<-aggregate(PriceDifference~TravelMonth,data=air_data, mean)
price_month
##   TravelMonth PriceDifference
## 1         Aug        526.4646
## 2         Jul        462.9333
## 3         Oct        540.6850
## 4         Sep        519.9922
barchart(PriceDifference~TravelMonth,data = price_month, col="gray",
         main="Bargraph of Price Difference Vs Travel Month",
         xlab="Travel Month",ylab="Price Difference")

Draw Scatter Plots to understand how are the variables correlated pair-wise

library(car)
scatterplotMatrix(formula=~FlightDuration+PitchDifference+WidthDifference+PriceDifference,
                  cex=0.6,data=air_data)

Draw a Corrgram

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.3.3
corrgram(air_data,lower.panel = panel.shade
         ,upper.panel = panel.pie,text.panel = panel.txt
         , main="Corrgram of Airlines data")

T-Test between Economy and Premium price

t.test(air_data$PriceEconomy,air_data$PricePremium)
## 
##  Welch Two Sample t-test
## 
## data:  air_data$PriceEconomy and air_data$PricePremium
## t = -6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0831 -369.2793
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

Linear regression model between Price Difference, Flight duration, Pitch and Width difference

fit<-lm(PriceDifference~FlightDuration+PitchDifference+WidthDifference,data = air_data)
summary(fit)
## 
## Call:
## lm(formula = PriceDifference ~ FlightDuration + PitchDifference + 
##     WidthDifference, data = air_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -859.4 -324.7  -62.7  150.1 3331.5 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -286.933    117.833  -2.435   0.0153 *  
## FlightDuration    80.992      6.754  11.992   <2e-16 ***
## PitchDifference   10.387     20.779   0.500   0.6174    
## WidthDifference   74.641     30.977   2.410   0.0164 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.1 on 454 degrees of freedom
## Multiple R-squared:  0.2538, Adjusted R-squared:  0.2489 
## F-statistic: 51.48 on 3 and 454 DF,  p-value: < 2.2e-16

Correlation tests

cor.test(air_data$PriceDifference,air_data$FlightDuration)
## 
##  Pearson's product-moment correlation
## 
## data:  air_data$PriceDifference and air_data$FlightDuration
## t = 11.435, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3976578 0.5403379
## sample estimates:
##       cor 
## 0.4720837
cor.test(air_data$PriceDifference,air_data$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  air_data$PriceDifference and air_data$WidthDifference
## t = 2.5291, df = 456, p-value = 0.01177
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02627012 0.20700978
## sample estimates:
##       cor 
## 0.1176138