air.df<-read.csv("C:/Users/here_is_sachin/Downloads/R udemy/SixAirlinesDataV2.csv")
summary(air.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Different Airlines

Dif <- table(air.df$Airline)
Dif
## 
## AirFrance   British     Delta       Jet Singapore    Virgin 
##        74       175        46        61        40        62
barplot(Dif)

plot(air.df$TravelMonth,main = "Frequecy of flights in months")

Histogram

par(mfrow=c(1,2))
hist(air.df$PriceEconomy, breaks = 6, main = "Economy Price",xlab = "Price Economy", ylim = c(0,150))
hist(air.df$PricePremium,breaks = 6,main = "Premium Price",xlab = "Price Premium")

par(mfrow=c(1,2))
boxplot(air.df$SeatsEconomy, breaks = 10, main = "Economy Seats")
boxplot(air.df$SeatsPremium,breaks = 5,main = "Premium Seats")

Airline-Wise Relative Price Distribution

boxplot(PriceEconomy~Airline, data = air.df ,horizontal = TRUE,yaxt="n",xlab = "Price in USD", main= "Airline-wise Economy class-price distribution")
axis(side=2,at=c(1:6),labels = c("Aif","Bts","Delt","Jet","Sigp","Vg"))

Price-difference Vs Flight Duration

library(car)
scatterplot(PriceRelative~FlightDuration, cex = 0.9, pch=19, main = " Relative price difference vs Flight Duration",data = air.df)

Correlation

round(cor(Filter(is.numeric, air.df)),2)
##                     FlightDuration SeatsEconomy SeatsPremium PitchEconomy
## FlightDuration                1.00         0.20         0.16         0.29
## SeatsEconomy                  0.20         1.00         0.63         0.14
## SeatsPremium                  0.16         0.63         1.00        -0.03
## PitchEconomy                  0.29         0.14        -0.03         1.00
## PitchPremium                  0.10         0.12         0.00        -0.55
## WidthEconomy                  0.46         0.37         0.46         0.29
## WidthPremium                  0.10         0.10         0.00        -0.54
## PriceEconomy                  0.57         0.13         0.11         0.37
## PricePremium                  0.65         0.18         0.22         0.23
## PriceRelative                 0.12         0.00        -0.10        -0.42
## SeatsTotal                    0.20         0.99         0.72         0.12
## PitchDifference              -0.04         0.04         0.02        -0.78
## WidthDifference              -0.12        -0.08        -0.22        -0.64
## PercentPremiumSeats           0.06        -0.33         0.49        -0.10
##                     PitchPremium WidthEconomy WidthPremium PriceEconomy
## FlightDuration              0.10         0.46         0.10         0.57
## SeatsEconomy                0.12         0.37         0.10         0.13
## SeatsPremium                0.00         0.46         0.00         0.11
## PitchEconomy               -0.55         0.29        -0.54         0.37
## PitchPremium                1.00        -0.02         0.75         0.05
## WidthEconomy               -0.02         1.00         0.08         0.07
## WidthPremium                0.75         0.08         1.00        -0.06
## PriceEconomy                0.05         0.07        -0.06         1.00
## PricePremium                0.09         0.15         0.06         0.90
## PriceRelative               0.42        -0.04         0.50        -0.29
## SeatsTotal                  0.11         0.41         0.09         0.13
## PitchDifference             0.95        -0.13         0.76        -0.10
## WidthDifference             0.70        -0.39         0.88        -0.08
## PercentPremiumSeats        -0.18         0.23        -0.18         0.07
##                     PricePremium PriceRelative SeatsTotal PitchDifference
## FlightDuration              0.65          0.12       0.20           -0.04
## SeatsEconomy                0.18          0.00       0.99            0.04
## SeatsPremium                0.22         -0.10       0.72            0.02
## PitchEconomy                0.23         -0.42       0.12           -0.78
## PitchPremium                0.09          0.42       0.11            0.95
## WidthEconomy                0.15         -0.04       0.41           -0.13
## WidthPremium                0.06          0.50       0.09            0.76
## PriceEconomy                0.90         -0.29       0.13           -0.10
## PricePremium                1.00          0.03       0.19           -0.02
## PriceRelative               0.03          1.00      -0.01            0.47
## SeatsTotal                  0.19         -0.01       1.00            0.03
## PitchDifference            -0.02          0.47       0.03            1.00
## WidthDifference            -0.01          0.49      -0.11            0.76
## PercentPremiumSeats         0.12         -0.16      -0.22           -0.09
##                     WidthDifference PercentPremiumSeats
## FlightDuration                -0.12                0.06
## SeatsEconomy                  -0.08               -0.33
## SeatsPremium                  -0.22                0.49
## PitchEconomy                  -0.64               -0.10
## PitchPremium                   0.70               -0.18
## WidthEconomy                  -0.39                0.23
## WidthPremium                   0.88               -0.18
## PriceEconomy                  -0.08                0.07
## PricePremium                  -0.01                0.12
## PriceRelative                  0.49               -0.16
## SeatsTotal                    -0.11               -0.22
## PitchDifference                0.76               -0.09
## WidthDifference                1.00               -0.28
## PercentPremiumSeats           -0.28                1.00

Regression Analysis

fit=lm((air.df$PricePremium-air.df$PriceEconomy) ~ air.df$PitchDifference+air.df$WidthDifference+air.df$FlightDuration)
summary(fit)
## 
## Call:
## lm(formula = (air.df$PricePremium - air.df$PriceEconomy) ~ air.df$PitchDifference + 
##     air.df$WidthDifference + air.df$FlightDuration)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -859.4 -324.7  -62.7  150.1 3331.5 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -286.933    117.833  -2.435   0.0153 *  
## air.df$PitchDifference   10.387     20.779   0.500   0.6174    
## air.df$WidthDifference   74.641     30.977   2.410   0.0164 *  
## air.df$FlightDuration    80.992      6.754  11.992   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 506.1 on 454 degrees of freedom
## Multiple R-squared:  0.2538, Adjusted R-squared:  0.2489 
## F-statistic: 51.48 on 3 and 454 DF,  p-value: < 2.2e-16

T-test Hypothesis: there is no difference between an economy class ticket and a premium economy class ticket.

t.test(air.df$PriceEconomy,air.df$PricePremium,var.equal = TRUE,paired = FALSE)
## 
##  Two Sample t-test
## 
## data:  air.df$PriceEconomy and air.df$PricePremium
## t = -6.8304, df = 914, p-value = 1.544e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0699 -369.2926
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258

The null hypothesis is rejected because the t-Test gives a very low p-value(p<0.5) .

Pearson’s Correlation Tests: Between difference price with width

cor.test((air.df$PricePremium-air.df$PriceEconomy),air.df$WidthDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  (air.df$PricePremium - air.df$PriceEconomy) and air.df$WidthDifference
## t = 2.5291, df = 456, p-value = 0.01177
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02627012 0.20700978
## sample estimates:
##       cor 
## 0.1176138

Between difference in price with pitch

cor.test((air.df$PricePremium-air.df$PriceEconomy),air.df$PitchDifference)
## 
##  Pearson's product-moment correlation
## 
## data:  (air.df$PricePremium - air.df$PriceEconomy) and air.df$PitchDifference
## t = 2.7688, df = 456, p-value = 0.005855
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03739893 0.21764764
## sample estimates:
##       cor 
## 0.1285851

Regression Model

model <- lm(PriceRelative ~ ., data=air.df)     
summary(model)
## 
## Call:
## lm(formula = PriceRelative ~ ., data = air.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76373 -0.08269  0.00438  0.08002  0.84672 
## 
## Coefficients: (3 not defined because of singularities)
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -3.993e-01  2.948e+00  -0.135 0.892302    
## AirlineBritish               -3.971e-01  1.107e-01  -3.586 0.000373 ***
## AirlineDelta                 -3.865e-01  2.203e-01  -1.755 0.080020 .  
## AirlineJet                   -2.584e-01  9.594e-02  -2.693 0.007354 ** 
## AirlineSingapore             -3.535e-01  1.297e-01  -2.725 0.006685 ** 
## AirlineVirgin                -3.575e-01  2.031e-01  -1.761 0.078997 .  
## AircraftBoeing                4.003e-02  2.968e-02   1.349 0.178089    
## FlightDuration                2.613e-02  4.727e-03   5.526 5.63e-08 ***
## TravelMonthJul                2.111e-02  3.145e-02   0.671 0.502475    
## TravelMonthOct                2.778e-02  2.670e-02   1.041 0.298619    
## TravelMonthSep               -6.617e-03  2.664e-02  -0.248 0.803924    
## IsInternationalInternational  2.785e-02  2.502e-01   0.111 0.911400    
## SeatsEconomy                  8.090e-04  5.462e-04   1.481 0.139313    
## SeatsPremium                 -7.374e-03  3.615e-03  -2.040 0.041967 *  
## PitchEconomy                 -1.756e-02  7.994e-02  -0.220 0.826207    
## PitchPremium                  5.960e-02  9.165e-02   0.650 0.515823    
## WidthEconomy                 -9.207e-02  5.266e-02  -1.748 0.081085 .  
## WidthPremium                  4.904e-02  1.365e-01   0.359 0.719527    
## PriceEconomy                 -9.325e-04  3.318e-05 -28.105  < 2e-16 ***
## PricePremium                  5.781e-04  2.294e-05  25.197  < 2e-16 ***
## SeatsTotal                           NA         NA      NA       NA    
## PitchDifference                      NA         NA      NA       NA    
## WidthDifference                      NA         NA      NA       NA    
## PercentPremiumSeats           1.114e-02  7.653e-03   1.456 0.146197    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2123 on 437 degrees of freedom
## Multiple R-squared:  0.7878, Adjusted R-squared:  0.7781 
## F-statistic: 81.12 on 20 and 437 DF,  p-value: < 2.2e-16

Factors explain the difference in price

1.In International flights Variation in prices is more . 2.Pitch And Width Differences in International flights were more. 3.Relative pricing and the no. of premium class seats are more in international flights. 4. The intercept of the equation : y = b0 + b1x1 + b2x2 + . is 0.892302 where y is the relative premium price and x are all other variables.

Multiple r-squared value is 0.7878 therefore the model accounts for 78.78% of the variance in relative premium price.The fact that the adjusted r-squared value is less than that of multiple r-squared value, it shows that it improves the model more than would be expected by chance.

Since the p-value is less than 0.05, hence there is a strong correlation overall.