setwd("C:/Users/Dell/Downloads/Sameer Mathur")
airlines.df<- read.csv("SixAirlinesDataV2.csv")
View(airlines.df)
summary(airlines.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
library(psych)
describe(airlines.df)[,1:5]
##                     vars   n    mean      sd  median
## Airline*               1 458    3.01    1.65    2.00
## Aircraft*              2 458    1.67    0.47    2.00
## FlightDuration         3 458    7.58    3.54    7.79
## TravelMonth*           4 458    2.56    1.17    3.00
## IsInternational*       5 458    1.91    0.28    2.00
## SeatsEconomy           6 458  202.31   76.37  185.00
## SeatsPremium           7 458   33.65   13.26   36.00
## PitchEconomy           8 458   31.22    0.66   31.00
## PitchPremium           9 458   37.91    1.31   38.00
## WidthEconomy          10 458   17.84    0.56   18.00
## WidthPremium          11 458   19.47    1.10   19.00
## PriceEconomy          12 458 1327.08  988.27 1242.00
## PricePremium          13 458 1845.26 1288.14 1737.00
## PriceRelative         14 458    0.49    0.45    0.36
## SeatsTotal            15 458  235.96   85.29  227.00
## PitchDifference       16 458    6.69    1.76    7.00
## WidthDifference       17 458    1.63    1.19    1.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21
par(mfrow=c(2,2))
boxplot(airlines.df$SeatsEconomy,xlab="Seats in Economy" ,horizontal = TRUE)
boxplot(airlines.df$SeatsPremium,xlab="Seats in Premium" , horizontal = TRUE)
boxplot(airlines.df$PriceEconomy,xlab="Price of Economy", horizontal = TRUE)
boxplot(airlines.df$PricePremium,xlab="Price of Premium" ,horizontal = TRUE)

boxplot(airlines.df$PriceRelative,xlab="Relative Price difference", horizontal = TRUE)
boxplot(airlines.df$PitchDifference,xlab="Difference in Pitch" ,horizontal = TRUE)
boxplot(airlines.df$WidthDifference,xlab="Difference in Width" ,horizontal = TRUE)

boxplot(airlines.df$PriceEconomy~airlines.df$Airline,ylab="Price in economy",verticalal=TRUE,las=1)

boxplot(airlines.df$PricePremium~airlines.df$Airline,ylab="Price in premium",verticalal=TRUE,las=1)

boxplot(airlines.df$PriceRelative~airlines.df$Airline,ylab="Relative price",vertical=TRUE,las=1)

par(mfrow=c(1,2))
boxplot(airlines.df$PriceEconomy~airlines.df$WidthEconomy,xlab="Width in economy",ylab="Price in economy",vertical=TRUE,las=1)
boxplot(airlines.df$PricePremium~airlines.df$WidthPremium,xlab="Width in premium",ylab="Price in premium",vertical=TRUE,las=1)

boxplot(airlines.df$PriceRelative ~ airlines.df$WidthDifference,xlab="Relative price",ylab="Difference in width",horizontal=TRUE,las=1)
boxplot(airlines.df$PriceRelative ~ airlines.df$PitchDifference,xlab="Relative price",ylab="Difference in pitch",horizontal=TRUE,las=1)

airline<- subset(airlines.df,,select = c(FlightDuration,SeatsEconomy,SeatsPremium,PitchEconomy,PitchPremium,WidthEconomy,WidthPremium,PriceEconomy,PricePremium,PriceRelative,SeatsTotal,PitchDifference,WidthDifference,PercentPremiumSeats))
View(airline)
cov(airline)
##                     FlightDuration  SeatsEconomy  SeatsPremium
## FlightDuration          12.5462183    52.9194291    7.57372426
## SeatsEconomy            52.9194291  5832.9154300  633.07060954
## SeatsPremium             7.5737243   633.0706095  175.86521648
## PitchEconomy             0.6817421     7.2117665   -0.29725856
## PitchPremium             0.4477835    11.9637325    0.08508595
## WidthEconomy             0.9014224    15.9105138    3.36977440
## WidthPremium             0.4019845     8.5832800   -0.03954019
## PriceEconomy          1983.5401655  9673.7944684 1489.38359627
## PricePremium          2959.9783043 17413.2541733 3717.36428960
## PriceRelative            0.1932368     0.1361699   -0.58078765
## SeatsTotal              60.4931534  6465.9860396  808.93582602
## PitchDifference         -0.2339587     4.7519660    0.38234451
## WidthDifference         -0.4994380    -7.3272338   -3.40931459
## PercentPremiumSeats      1.0379912  -122.3914537   31.14753127
##                     PitchEconomy PitchPremium WidthEconomy WidthPremium
## FlightDuration         0.6817421   0.44778348   0.90142242   0.40198446
## SeatsEconomy           7.2117665  11.96373253  15.91051379   8.58327998
## SeatsPremium          -0.2972586   0.08508595   3.36977440  -0.03954019
## PitchEconomy           0.4292471  -0.47398546   0.10756500  -0.38766208
## PitchPremium          -0.4739855   1.72639580  -0.01739081   1.08157435
## WidthEconomy           0.1075650  -0.01739081   0.31081765   0.05010845
## WidthPremium          -0.3876621   1.08157435   0.05010845   1.20378776
## PriceEconomy         238.7031905  65.42513354  37.46095191 -61.85450011
## PricePremium         190.8517195 149.85356368 108.11611707  90.47997668
## PriceRelative         -0.1248808   0.24719874  -0.01104335   0.24928593
## SeatsTotal             6.9145079  12.04881848  19.28028819   8.54373979
## PitchDifference       -0.9032326   2.20038126  -0.12495581   1.46923643
## WidthDifference       -0.4952271   1.09896515  -0.26070920   1.15367930
## PercentPremiumSeats   -0.3261739  -1.11655834   0.61321816  -0.97393787
##                      PriceEconomy  PricePremium PriceRelative
## FlightDuration         1983.54017    2959.97830    0.19323683
## SeatsEconomy           9673.79447   17413.25417    0.13616991
## SeatsPremium           1489.38360    3717.36429   -0.58078765
## PitchEconomy            238.70319     190.85172   -0.12488080
## PitchPremium             65.42513     149.85356    0.24719874
## WidthEconomy             37.46095     108.11612   -0.01104335
## WidthPremium            -61.85450      90.47998    0.24928593
## PriceEconomy         976684.06198 1147494.76801 -128.49991725
## PricePremium        1147494.76801 1659293.11947   18.48428836
## PriceRelative          -128.49992      18.48429    0.20302893
## SeatsTotal            11163.17806   21130.61846   -0.44461774
## PitchDifference        -173.27806     -40.99816    0.37207954
## WidthDifference         -99.31545     -17.63614    0.26032928
## PercentPremiumSeats     312.61077     726.01582   -0.35252750
##                        SeatsTotal PitchDifference WidthDifference
## FlightDuration         60.4931534      -0.2339587      -0.4994380
## SeatsEconomy         6465.9860396       4.7519660      -7.3272338
## SeatsPremium          808.9358260       0.3823445      -3.4093146
## PitchEconomy            6.9145079      -0.9032326      -0.4952271
## PitchPremium           12.0488185       2.2003813       1.0989652
## WidthEconomy           19.2802882      -0.1249558      -0.2607092
## WidthPremium            8.5437398       1.4692364       1.1536793
## PriceEconomy        11163.1780647    -173.2780570     -99.3154520
## PricePremium        21130.6184629     -40.9981558     -17.6361404
## PriceRelative          -0.4446177       0.3720795       0.2603293
## SeatsTotal           7274.9218656       5.1343105     -10.7365484
## PitchDifference         5.1343105       3.1036138       1.5941922
## WidthDifference       -10.7365484       1.5941922       1.4143885
## PercentPremiumSeats   -91.2439224      -0.7903844      -1.5871560
##                     PercentPremiumSeats
## FlightDuration                1.0379912
## SeatsEconomy               -122.3914537
## SeatsPremium                 31.1475313
## PitchEconomy                 -0.3261739
## PitchPremium                 -1.1165583
## WidthEconomy                  0.6132182
## WidthPremium                 -0.9739379
## PriceEconomy                312.6107669
## PricePremium                726.0158229
## PriceRelative                -0.3525275
## SeatsTotal                  -91.2439224
## PitchDifference              -0.7903844
## WidthDifference              -1.5871560
## PercentPremiumSeats          23.4493343
library(corrgram)
corrgram(airlines.df,order=TRUE,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,main="Corrgram of airlines intercorrelations")

Hypothesis 1: The price of premium seats is greatly affected by the duration of the flight,the total number of seats,the number of premium seats,the difference in the width of the premium seats and the difference in the pitch of the premium seats.

library(psych)
t.test(airlines.df$PricePremium,airlines.df$FlightDuration,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PricePremium and airlines.df$FlightDuration
## t = 30.585, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1719.606 1955.754
## sample estimates:
## mean of the differences 
##                 1837.68
t.test(airlines.df$PricePremium,airlines.df$SeatsPremium,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PricePremium and airlines.df$SeatsPremium
## t = 30.164, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1693.583 1929.635
## sample estimates:
## mean of the differences 
##                1811.609
t.test(airlines.df$PricePremium,airlines.df$SeatsTotal,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PricePremium and airlines.df$SeatsTotal
## t = 27.023, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1492.266 1726.328
## sample estimates:
## mean of the differences 
##                1609.297
t.test(airlines.df$PricePremium,airlines.df$WidthDifference,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PricePremium and airlines.df$WidthDifference
## t = 30.629, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1725.338 1961.910
## sample estimates:
## mean of the differences 
##                1843.624
t.test(airlines.df$PricePremium,airlines.df$PitchDifference,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PricePremium and airlines.df$PitchDifference
## t = 30.545, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1720.282 1956.858
## sample estimates:
## mean of the differences 
##                 1838.57

Hypothesis 2: The relative price difference is greatly affected by the duration of the flight,the total number of seats,the difference in the pitch of economy and premium seats and the difference in the width of economy and premium seats

t.test(airlines.df$PriceRelative,airlines.df$FlightDuration,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PriceRelative and airlines.df$FlightDuration
## t = -43.158, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.413501 -6.767765
## sample estimates:
## mean of the differences 
##               -7.090633
t.test(airlines.df$PriceRelative,airlines.df$SeatsTotal,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PriceRelative and airlines.df$SeatsTotal
## t = -59.078, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -243.3062 -227.6408
## sample estimates:
## mean of the differences 
##               -235.4735
t.test(airlines.df$PriceRelative,airlines.df$PitchDifference,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PriceRelative and airlines.df$PitchDifference
## t = -82.896, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.347561 -6.053574
## sample estimates:
## mean of the differences 
##               -6.200568
t.test(airlines.df$PriceRelative,airlines.df$WidthDifference,paired = TRUE)
## 
##  Paired t-test
## 
## data:  airlines.df$PriceRelative and airlines.df$WidthDifference
## t = -23.418, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.242149 -1.049816
## sample estimates:
## mean of the differences 
##               -1.145983

From the testing of Hypothesis 1 we can say that the hypothesis cannot be considered null and all the factors affect the price of premium seats

final<-lm(PricePremium~FlightDuration+SeatsTotal+SeatsPremium+WidthDifference+PitchDifference,data = airlines.df)
summary(final)
## 
## Call:
## lm(formula = PricePremium ~ FlightDuration + SeatsTotal + SeatsPremium + 
##     WidthDifference + PitchDifference, data = airlines.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2273.0  -605.0   -19.5   701.9  4485.5 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       20.3415   238.4197   0.085  0.93205    
## FlightDuration   235.6225    12.9664  18.172  < 2e-16 ***
## SeatsTotal        -0.8036     0.7592  -1.058  0.29041    
## SeatsPremium      19.9846     5.0617   3.948 9.13e-05 ***
## WidthDifference  258.7604    62.3947   4.147 4.02e-05 ***
## PitchDifference -129.4945    40.8960  -3.166  0.00165 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 955.9 on 452 degrees of freedom
## Multiple R-squared:  0.4553, Adjusted R-squared:  0.4493 
## F-statistic: 75.57 on 5 and 452 DF,  p-value: < 2.2e-16

we can therefore generate a formula to calculate approximately the price of premium seats. PricePremium = 20.3415+235.6225FlightDuration + 19.9846SeatsPremium + 258.7604WidthDifference -129.4945PitchDifference.

From the testing of Hypothesis 1 we can say that the hypothesis cannot be considered null and all the factors affect the relative pricing of the seats.

final1<-lm(PriceRelative~FlightDuration+SeatsTotal+PitchDifference+WidthDifference, data = airlines.df)
summary(final1)
## 
## Call:
## lm(formula = PriceRelative ~ FlightDuration + SeatsTotal + PitchDifference + 
##     WidthDifference, data = airlines.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7518 -0.2876 -0.0572  0.1593  1.1593 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -2.403e-01  9.510e-02  -2.527 0.011854 *  
## FlightDuration   2.200e-02  5.180e-03   4.247 2.63e-05 ***
## SeatsTotal      -9.444e-05  2.176e-04  -0.434 0.664447    
## PitchDifference  5.590e-02  1.590e-02   3.515 0.000483 ***
## WidthDifference  1.281e-01  2.377e-02   5.390 1.14e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3819 on 453 degrees of freedom
## Multiple R-squared:  0.2878, Adjusted R-squared:  0.2816 
## F-statistic: 45.77 on 4 and 453 DF,  p-value: < 2.2e-16

we can therefore generate a formula to calculate approximately the relative price similarly.